1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qs8-igemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4)28 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4) {
29 TEST_REQUIRES_ARM_SIMD32;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(2)
33 .kr(4)
34 .sr(1)
35 .m(1)
36 .n(2)
37 .k(4)
38 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
39 }
40
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cn)41 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cn) {
42 TEST_REQUIRES_ARM_SIMD32;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(2)
46 .kr(4)
47 .sr(1)
48 .m(1)
49 .n(2)
50 .k(4)
51 .cn_stride(5)
52 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
53 }
54
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile)55 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile) {
56 TEST_REQUIRES_ARM_SIMD32;
57 for (uint32_t n = 1; n <= 2; n++) {
58 for (uint32_t m = 1; m <= 1; m++) {
59 GemmMicrokernelTester()
60 .mr(1)
61 .nr(2)
62 .kr(4)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(4)
67 .iterations(1)
68 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
69 }
70 }
71 }
72
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_m)73 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_m) {
74 TEST_REQUIRES_ARM_SIMD32;
75 for (uint32_t m = 1; m <= 1; m++) {
76 GemmMicrokernelTester()
77 .mr(1)
78 .nr(2)
79 .kr(4)
80 .sr(1)
81 .m(m)
82 .n(2)
83 .k(4)
84 .iterations(1)
85 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
86 }
87 }
88
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_n)89 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_n) {
90 TEST_REQUIRES_ARM_SIMD32;
91 for (uint32_t n = 1; n <= 2; n++) {
92 GemmMicrokernelTester()
93 .mr(1)
94 .nr(2)
95 .kr(4)
96 .sr(1)
97 .m(1)
98 .n(n)
99 .k(4)
100 .iterations(1)
101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
102 }
103 }
104
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4)105 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4) {
106 TEST_REQUIRES_ARM_SIMD32;
107 for (size_t k = 1; k < 4; k++) {
108 GemmMicrokernelTester()
109 .mr(1)
110 .nr(2)
111 .kr(4)
112 .sr(1)
113 .m(1)
114 .n(2)
115 .k(k)
116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
117 }
118 }
119
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4_subtile)120 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4_subtile) {
121 TEST_REQUIRES_ARM_SIMD32;
122 for (size_t k = 1; k < 4; k++) {
123 for (uint32_t n = 1; n <= 2; n++) {
124 for (uint32_t m = 1; m <= 1; m++) {
125 GemmMicrokernelTester()
126 .mr(1)
127 .nr(2)
128 .kr(4)
129 .sr(1)
130 .m(m)
131 .n(n)
132 .k(k)
133 .iterations(1)
134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
135 }
136 }
137 }
138 }
139
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4)140 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4) {
141 TEST_REQUIRES_ARM_SIMD32;
142 for (size_t k = 5; k < 8; k++) {
143 GemmMicrokernelTester()
144 .mr(1)
145 .nr(2)
146 .kr(4)
147 .sr(1)
148 .m(1)
149 .n(2)
150 .k(k)
151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
152 }
153 }
154
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4_subtile)155 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4_subtile) {
156 TEST_REQUIRES_ARM_SIMD32;
157 for (size_t k = 5; k < 8; k++) {
158 for (uint32_t n = 1; n <= 2; n++) {
159 for (uint32_t m = 1; m <= 1; m++) {
160 GemmMicrokernelTester()
161 .mr(1)
162 .nr(2)
163 .kr(4)
164 .sr(1)
165 .m(m)
166 .n(n)
167 .k(k)
168 .iterations(1)
169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
170 }
171 }
172 }
173 }
174
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4)175 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4) {
176 TEST_REQUIRES_ARM_SIMD32;
177 for (size_t k = 8; k <= 40; k += 4) {
178 GemmMicrokernelTester()
179 .mr(1)
180 .nr(2)
181 .kr(4)
182 .sr(1)
183 .m(1)
184 .n(2)
185 .k(k)
186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
187 }
188 }
189
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4_subtile)190 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4_subtile) {
191 TEST_REQUIRES_ARM_SIMD32;
192 for (size_t k = 8; k <= 40; k += 4) {
193 for (uint32_t n = 1; n <= 2; n++) {
194 for (uint32_t m = 1; m <= 1; m++) {
195 GemmMicrokernelTester()
196 .mr(1)
197 .nr(2)
198 .kr(4)
199 .sr(1)
200 .m(m)
201 .n(n)
202 .k(k)
203 .iterations(1)
204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
205 }
206 }
207 }
208 }
209
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2)210 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2) {
211 TEST_REQUIRES_ARM_SIMD32;
212 for (uint32_t n = 3; n < 4; n++) {
213 for (size_t k = 1; k <= 20; k += 5) {
214 GemmMicrokernelTester()
215 .mr(1)
216 .nr(2)
217 .kr(4)
218 .sr(1)
219 .m(1)
220 .n(n)
221 .k(k)
222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
223 }
224 }
225 }
226
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_strided_cn)227 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_strided_cn) {
228 TEST_REQUIRES_ARM_SIMD32;
229 for (uint32_t n = 3; n < 4; n++) {
230 for (size_t k = 1; k <= 20; k += 5) {
231 GemmMicrokernelTester()
232 .mr(1)
233 .nr(2)
234 .kr(4)
235 .sr(1)
236 .m(1)
237 .n(n)
238 .k(k)
239 .cn_stride(5)
240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
241 }
242 }
243 }
244
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_subtile)245 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_subtile) {
246 TEST_REQUIRES_ARM_SIMD32;
247 for (uint32_t n = 3; n < 4; n++) {
248 for (size_t k = 1; k <= 20; k += 5) {
249 for (uint32_t m = 1; m <= 1; m++) {
250 GemmMicrokernelTester()
251 .mr(1)
252 .nr(2)
253 .kr(4)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
260 }
261 }
262 }
263 }
264
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2)265 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2) {
266 TEST_REQUIRES_ARM_SIMD32;
267 for (uint32_t n = 4; n <= 6; n += 2) {
268 for (size_t k = 1; k <= 20; k += 5) {
269 GemmMicrokernelTester()
270 .mr(1)
271 .nr(2)
272 .kr(4)
273 .sr(1)
274 .m(1)
275 .n(n)
276 .k(k)
277 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
278 }
279 }
280 }
281
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_strided_cn)282 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_strided_cn) {
283 TEST_REQUIRES_ARM_SIMD32;
284 for (uint32_t n = 4; n <= 6; n += 2) {
285 for (size_t k = 1; k <= 20; k += 5) {
286 GemmMicrokernelTester()
287 .mr(1)
288 .nr(2)
289 .kr(4)
290 .sr(1)
291 .m(1)
292 .n(n)
293 .k(k)
294 .cn_stride(5)
295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
296 }
297 }
298 }
299
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_subtile)300 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_subtile) {
301 TEST_REQUIRES_ARM_SIMD32;
302 for (uint32_t n = 4; n <= 6; n += 2) {
303 for (size_t k = 1; k <= 20; k += 5) {
304 for (uint32_t m = 1; m <= 1; m++) {
305 GemmMicrokernelTester()
306 .mr(1)
307 .nr(2)
308 .kr(4)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .iterations(1)
314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
315 }
316 }
317 }
318 }
319
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel)320 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel) {
321 TEST_REQUIRES_ARM_SIMD32;
322 for (size_t k = 1; k <= 20; k += 5) {
323 GemmMicrokernelTester()
324 .mr(1)
325 .nr(2)
326 .kr(4)
327 .sr(1)
328 .m(1)
329 .n(2)
330 .k(k)
331 .ks(3)
332 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
333 }
334 }
335
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel_subtile)336 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel_subtile) {
337 TEST_REQUIRES_ARM_SIMD32;
338 for (size_t k = 1; k <= 20; k += 5) {
339 for (uint32_t n = 1; n <= 2; n++) {
340 for (uint32_t m = 1; m <= 1; m++) {
341 GemmMicrokernelTester()
342 .mr(1)
343 .nr(2)
344 .kr(4)
345 .sr(1)
346 .m(m)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .iterations(1)
351 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
352 }
353 }
354 }
355 }
356
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_small_kernel)357 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_small_kernel) {
358 TEST_REQUIRES_ARM_SIMD32;
359 for (uint32_t n = 3; n < 4; n++) {
360 for (size_t k = 1; k <= 20; k += 5) {
361 GemmMicrokernelTester()
362 .mr(1)
363 .nr(2)
364 .kr(4)
365 .sr(1)
366 .m(1)
367 .n(n)
368 .k(k)
369 .ks(3)
370 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
371 }
372 }
373 }
374
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_small_kernel)375 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_small_kernel) {
376 TEST_REQUIRES_ARM_SIMD32;
377 for (uint32_t n = 4; n <= 6; n += 2) {
378 for (size_t k = 1; k <= 20; k += 5) {
379 GemmMicrokernelTester()
380 .mr(1)
381 .nr(2)
382 .kr(4)
383 .sr(1)
384 .m(1)
385 .n(n)
386 .k(k)
387 .ks(3)
388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
389 }
390 }
391 }
392
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm_subtile)393 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm_subtile) {
394 TEST_REQUIRES_ARM_SIMD32;
395 for (size_t k = 1; k <= 20; k += 5) {
396 for (uint32_t n = 1; n <= 2; n++) {
397 for (uint32_t m = 1; m <= 1; m++) {
398 GemmMicrokernelTester()
399 .mr(1)
400 .nr(2)
401 .kr(4)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(5)
407 .iterations(1)
408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
409 }
410 }
411 }
412 }
413
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,a_offset)414 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, a_offset) {
415 TEST_REQUIRES_ARM_SIMD32;
416 for (size_t k = 1; k <= 20; k += 5) {
417 GemmMicrokernelTester()
418 .mr(1)
419 .nr(2)
420 .kr(4)
421 .sr(1)
422 .m(1)
423 .n(2)
424 .k(k)
425 .ks(3)
426 .a_offset(23)
427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
428 }
429 }
430
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,zero)431 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, zero) {
432 TEST_REQUIRES_ARM_SIMD32;
433 for (size_t k = 1; k <= 20; k += 5) {
434 for (uint32_t mz = 0; mz < 1; mz++) {
435 GemmMicrokernelTester()
436 .mr(1)
437 .nr(2)
438 .kr(4)
439 .sr(1)
440 .m(1)
441 .n(2)
442 .k(k)
443 .ks(3)
444 .a_offset(23)
445 .zero_index(mz)
446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
447 }
448 }
449 }
450
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmin)451 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmin) {
452 TEST_REQUIRES_ARM_SIMD32;
453 GemmMicrokernelTester()
454 .mr(1)
455 .nr(2)
456 .kr(4)
457 .sr(1)
458 .m(1)
459 .n(2)
460 .k(4)
461 .qmin(128)
462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
463 }
464
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmax)465 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmax) {
466 TEST_REQUIRES_ARM_SIMD32;
467 GemmMicrokernelTester()
468 .mr(1)
469 .nr(2)
470 .kr(4)
471 .sr(1)
472 .m(1)
473 .n(2)
474 .k(4)
475 .qmax(128)
476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
477 }
478
TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm)479 TEST(QS8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm) {
480 TEST_REQUIRES_ARM_SIMD32;
481 GemmMicrokernelTester()
482 .mr(1)
483 .nr(2)
484 .kr(4)
485 .sr(1)
486 .m(1)
487 .n(2)
488 .k(4)
489 .cm_stride(5)
490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
491 }
492 #endif // XNN_ARCH_ARM
493
494
495 #if XNN_ARCH_ARM
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4)496 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4) {
497 TEST_REQUIRES_ARM_SIMD32;
498 GemmMicrokernelTester()
499 .mr(2)
500 .nr(2)
501 .kr(4)
502 .sr(1)
503 .m(2)
504 .n(2)
505 .k(4)
506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
507 }
508
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cn)509 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cn) {
510 TEST_REQUIRES_ARM_SIMD32;
511 GemmMicrokernelTester()
512 .mr(2)
513 .nr(2)
514 .kr(4)
515 .sr(1)
516 .m(2)
517 .n(2)
518 .k(4)
519 .cn_stride(5)
520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
521 }
522
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile)523 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile) {
524 TEST_REQUIRES_ARM_SIMD32;
525 for (uint32_t n = 1; n <= 2; n++) {
526 for (uint32_t m = 1; m <= 2; m++) {
527 GemmMicrokernelTester()
528 .mr(2)
529 .nr(2)
530 .kr(4)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(4)
535 .iterations(1)
536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
537 }
538 }
539 }
540
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_m)541 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_m) {
542 TEST_REQUIRES_ARM_SIMD32;
543 for (uint32_t m = 1; m <= 2; m++) {
544 GemmMicrokernelTester()
545 .mr(2)
546 .nr(2)
547 .kr(4)
548 .sr(1)
549 .m(m)
550 .n(2)
551 .k(4)
552 .iterations(1)
553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
554 }
555 }
556
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_n)557 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_n) {
558 TEST_REQUIRES_ARM_SIMD32;
559 for (uint32_t n = 1; n <= 2; n++) {
560 GemmMicrokernelTester()
561 .mr(2)
562 .nr(2)
563 .kr(4)
564 .sr(1)
565 .m(2)
566 .n(n)
567 .k(4)
568 .iterations(1)
569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
570 }
571 }
572
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4)573 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4) {
574 TEST_REQUIRES_ARM_SIMD32;
575 for (size_t k = 1; k < 4; k++) {
576 GemmMicrokernelTester()
577 .mr(2)
578 .nr(2)
579 .kr(4)
580 .sr(1)
581 .m(2)
582 .n(2)
583 .k(k)
584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
585 }
586 }
587
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4_subtile)588 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4_subtile) {
589 TEST_REQUIRES_ARM_SIMD32;
590 for (size_t k = 1; k < 4; k++) {
591 for (uint32_t n = 1; n <= 2; n++) {
592 for (uint32_t m = 1; m <= 2; m++) {
593 GemmMicrokernelTester()
594 .mr(2)
595 .nr(2)
596 .kr(4)
597 .sr(1)
598 .m(m)
599 .n(n)
600 .k(k)
601 .iterations(1)
602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
603 }
604 }
605 }
606 }
607
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4)608 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4) {
609 TEST_REQUIRES_ARM_SIMD32;
610 for (size_t k = 5; k < 8; k++) {
611 GemmMicrokernelTester()
612 .mr(2)
613 .nr(2)
614 .kr(4)
615 .sr(1)
616 .m(2)
617 .n(2)
618 .k(k)
619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
620 }
621 }
622
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4_subtile)623 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4_subtile) {
624 TEST_REQUIRES_ARM_SIMD32;
625 for (size_t k = 5; k < 8; k++) {
626 for (uint32_t n = 1; n <= 2; n++) {
627 for (uint32_t m = 1; m <= 2; m++) {
628 GemmMicrokernelTester()
629 .mr(2)
630 .nr(2)
631 .kr(4)
632 .sr(1)
633 .m(m)
634 .n(n)
635 .k(k)
636 .iterations(1)
637 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
638 }
639 }
640 }
641 }
642
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4)643 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4) {
644 TEST_REQUIRES_ARM_SIMD32;
645 for (size_t k = 8; k <= 40; k += 4) {
646 GemmMicrokernelTester()
647 .mr(2)
648 .nr(2)
649 .kr(4)
650 .sr(1)
651 .m(2)
652 .n(2)
653 .k(k)
654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
655 }
656 }
657
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4_subtile)658 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4_subtile) {
659 TEST_REQUIRES_ARM_SIMD32;
660 for (size_t k = 8; k <= 40; k += 4) {
661 for (uint32_t n = 1; n <= 2; n++) {
662 for (uint32_t m = 1; m <= 2; m++) {
663 GemmMicrokernelTester()
664 .mr(2)
665 .nr(2)
666 .kr(4)
667 .sr(1)
668 .m(m)
669 .n(n)
670 .k(k)
671 .iterations(1)
672 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
673 }
674 }
675 }
676 }
677
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2)678 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2) {
679 TEST_REQUIRES_ARM_SIMD32;
680 for (uint32_t n = 3; n < 4; n++) {
681 for (size_t k = 1; k <= 20; k += 5) {
682 GemmMicrokernelTester()
683 .mr(2)
684 .nr(2)
685 .kr(4)
686 .sr(1)
687 .m(2)
688 .n(n)
689 .k(k)
690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
691 }
692 }
693 }
694
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_strided_cn)695 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_strided_cn) {
696 TEST_REQUIRES_ARM_SIMD32;
697 for (uint32_t n = 3; n < 4; n++) {
698 for (size_t k = 1; k <= 20; k += 5) {
699 GemmMicrokernelTester()
700 .mr(2)
701 .nr(2)
702 .kr(4)
703 .sr(1)
704 .m(2)
705 .n(n)
706 .k(k)
707 .cn_stride(5)
708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
709 }
710 }
711 }
712
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_subtile)713 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_subtile) {
714 TEST_REQUIRES_ARM_SIMD32;
715 for (uint32_t n = 3; n < 4; n++) {
716 for (size_t k = 1; k <= 20; k += 5) {
717 for (uint32_t m = 1; m <= 2; m++) {
718 GemmMicrokernelTester()
719 .mr(2)
720 .nr(2)
721 .kr(4)
722 .sr(1)
723 .m(m)
724 .n(n)
725 .k(k)
726 .iterations(1)
727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
728 }
729 }
730 }
731 }
732
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2)733 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2) {
734 TEST_REQUIRES_ARM_SIMD32;
735 for (uint32_t n = 4; n <= 6; n += 2) {
736 for (size_t k = 1; k <= 20; k += 5) {
737 GemmMicrokernelTester()
738 .mr(2)
739 .nr(2)
740 .kr(4)
741 .sr(1)
742 .m(2)
743 .n(n)
744 .k(k)
745 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
746 }
747 }
748 }
749
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_strided_cn)750 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_strided_cn) {
751 TEST_REQUIRES_ARM_SIMD32;
752 for (uint32_t n = 4; n <= 6; n += 2) {
753 for (size_t k = 1; k <= 20; k += 5) {
754 GemmMicrokernelTester()
755 .mr(2)
756 .nr(2)
757 .kr(4)
758 .sr(1)
759 .m(2)
760 .n(n)
761 .k(k)
762 .cn_stride(5)
763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
764 }
765 }
766 }
767
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_subtile)768 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_subtile) {
769 TEST_REQUIRES_ARM_SIMD32;
770 for (uint32_t n = 4; n <= 6; n += 2) {
771 for (size_t k = 1; k <= 20; k += 5) {
772 for (uint32_t m = 1; m <= 2; m++) {
773 GemmMicrokernelTester()
774 .mr(2)
775 .nr(2)
776 .kr(4)
777 .sr(1)
778 .m(m)
779 .n(n)
780 .k(k)
781 .iterations(1)
782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
783 }
784 }
785 }
786 }
787
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel)788 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel) {
789 TEST_REQUIRES_ARM_SIMD32;
790 for (size_t k = 1; k <= 20; k += 5) {
791 GemmMicrokernelTester()
792 .mr(2)
793 .nr(2)
794 .kr(4)
795 .sr(1)
796 .m(2)
797 .n(2)
798 .k(k)
799 .ks(3)
800 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
801 }
802 }
803
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel_subtile)804 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel_subtile) {
805 TEST_REQUIRES_ARM_SIMD32;
806 for (size_t k = 1; k <= 20; k += 5) {
807 for (uint32_t n = 1; n <= 2; n++) {
808 for (uint32_t m = 1; m <= 2; m++) {
809 GemmMicrokernelTester()
810 .mr(2)
811 .nr(2)
812 .kr(4)
813 .sr(1)
814 .m(m)
815 .n(n)
816 .k(k)
817 .ks(3)
818 .iterations(1)
819 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
820 }
821 }
822 }
823 }
824
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_small_kernel)825 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_small_kernel) {
826 TEST_REQUIRES_ARM_SIMD32;
827 for (uint32_t n = 3; n < 4; n++) {
828 for (size_t k = 1; k <= 20; k += 5) {
829 GemmMicrokernelTester()
830 .mr(2)
831 .nr(2)
832 .kr(4)
833 .sr(1)
834 .m(2)
835 .n(n)
836 .k(k)
837 .ks(3)
838 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
839 }
840 }
841 }
842
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_small_kernel)843 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_small_kernel) {
844 TEST_REQUIRES_ARM_SIMD32;
845 for (uint32_t n = 4; n <= 6; n += 2) {
846 for (size_t k = 1; k <= 20; k += 5) {
847 GemmMicrokernelTester()
848 .mr(2)
849 .nr(2)
850 .kr(4)
851 .sr(1)
852 .m(2)
853 .n(n)
854 .k(k)
855 .ks(3)
856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
857 }
858 }
859 }
860
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm_subtile)861 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm_subtile) {
862 TEST_REQUIRES_ARM_SIMD32;
863 for (size_t k = 1; k <= 20; k += 5) {
864 for (uint32_t n = 1; n <= 2; n++) {
865 for (uint32_t m = 1; m <= 2; m++) {
866 GemmMicrokernelTester()
867 .mr(2)
868 .nr(2)
869 .kr(4)
870 .sr(1)
871 .m(m)
872 .n(n)
873 .k(k)
874 .cm_stride(5)
875 .iterations(1)
876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
877 }
878 }
879 }
880 }
881
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,a_offset)882 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, a_offset) {
883 TEST_REQUIRES_ARM_SIMD32;
884 for (size_t k = 1; k <= 20; k += 5) {
885 GemmMicrokernelTester()
886 .mr(2)
887 .nr(2)
888 .kr(4)
889 .sr(1)
890 .m(2)
891 .n(2)
892 .k(k)
893 .ks(3)
894 .a_offset(43)
895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
896 }
897 }
898
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,zero)899 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, zero) {
900 TEST_REQUIRES_ARM_SIMD32;
901 for (size_t k = 1; k <= 20; k += 5) {
902 for (uint32_t mz = 0; mz < 2; mz++) {
903 GemmMicrokernelTester()
904 .mr(2)
905 .nr(2)
906 .kr(4)
907 .sr(1)
908 .m(2)
909 .n(2)
910 .k(k)
911 .ks(3)
912 .a_offset(43)
913 .zero_index(mz)
914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
915 }
916 }
917 }
918
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmin)919 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmin) {
920 TEST_REQUIRES_ARM_SIMD32;
921 GemmMicrokernelTester()
922 .mr(2)
923 .nr(2)
924 .kr(4)
925 .sr(1)
926 .m(2)
927 .n(2)
928 .k(4)
929 .qmin(128)
930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
931 }
932
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmax)933 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmax) {
934 TEST_REQUIRES_ARM_SIMD32;
935 GemmMicrokernelTester()
936 .mr(2)
937 .nr(2)
938 .kr(4)
939 .sr(1)
940 .m(2)
941 .n(2)
942 .k(4)
943 .qmax(128)
944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
945 }
946
TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm)947 TEST(QS8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm) {
948 TEST_REQUIRES_ARM_SIMD32;
949 GemmMicrokernelTester()
950 .mr(2)
951 .nr(2)
952 .kr(4)
953 .sr(1)
954 .m(2)
955 .n(2)
956 .k(4)
957 .cm_stride(5)
958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
959 }
960 #endif // XNN_ARCH_ARM
961
962
963 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16)964 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16) {
965 TEST_REQUIRES_ARM_NEON;
966 GemmMicrokernelTester()
967 .mr(1)
968 .nr(8)
969 .kr(2)
970 .sr(1)
971 .m(1)
972 .n(8)
973 .k(16)
974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
975 }
976
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cn)977 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cn) {
978 TEST_REQUIRES_ARM_NEON;
979 GemmMicrokernelTester()
980 .mr(1)
981 .nr(8)
982 .kr(2)
983 .sr(1)
984 .m(1)
985 .n(8)
986 .k(16)
987 .cn_stride(11)
988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
989 }
990
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)991 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
992 TEST_REQUIRES_ARM_NEON;
993 for (uint32_t n = 1; n <= 8; n++) {
994 for (uint32_t m = 1; m <= 1; m++) {
995 GemmMicrokernelTester()
996 .mr(1)
997 .nr(8)
998 .kr(2)
999 .sr(1)
1000 .m(m)
1001 .n(n)
1002 .k(16)
1003 .iterations(1)
1004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1005 }
1006 }
1007 }
1008
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)1009 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
1010 TEST_REQUIRES_ARM_NEON;
1011 for (uint32_t m = 1; m <= 1; m++) {
1012 GemmMicrokernelTester()
1013 .mr(1)
1014 .nr(8)
1015 .kr(2)
1016 .sr(1)
1017 .m(m)
1018 .n(8)
1019 .k(16)
1020 .iterations(1)
1021 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1022 }
1023 }
1024
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)1025 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
1026 TEST_REQUIRES_ARM_NEON;
1027 for (uint32_t n = 1; n <= 8; n++) {
1028 GemmMicrokernelTester()
1029 .mr(1)
1030 .nr(8)
1031 .kr(2)
1032 .sr(1)
1033 .m(1)
1034 .n(n)
1035 .k(16)
1036 .iterations(1)
1037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1038 }
1039 }
1040
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_lt_16)1041 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_lt_16) {
1042 TEST_REQUIRES_ARM_NEON;
1043 for (size_t k = 1; k < 16; k++) {
1044 GemmMicrokernelTester()
1045 .mr(1)
1046 .nr(8)
1047 .kr(2)
1048 .sr(1)
1049 .m(1)
1050 .n(8)
1051 .k(k)
1052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053 }
1054 }
1055
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)1056 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
1057 TEST_REQUIRES_ARM_NEON;
1058 for (size_t k = 1; k < 16; k++) {
1059 for (uint32_t n = 1; n <= 8; n++) {
1060 for (uint32_t m = 1; m <= 1; m++) {
1061 GemmMicrokernelTester()
1062 .mr(1)
1063 .nr(8)
1064 .kr(2)
1065 .sr(1)
1066 .m(m)
1067 .n(n)
1068 .k(k)
1069 .iterations(1)
1070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1071 }
1072 }
1073 }
1074 }
1075
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_gt_16)1076 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_gt_16) {
1077 TEST_REQUIRES_ARM_NEON;
1078 for (size_t k = 17; k < 32; k++) {
1079 GemmMicrokernelTester()
1080 .mr(1)
1081 .nr(8)
1082 .kr(2)
1083 .sr(1)
1084 .m(1)
1085 .n(8)
1086 .k(k)
1087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1088 }
1089 }
1090
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)1091 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
1092 TEST_REQUIRES_ARM_NEON;
1093 for (size_t k = 17; k < 32; k++) {
1094 for (uint32_t n = 1; n <= 8; n++) {
1095 for (uint32_t m = 1; m <= 1; m++) {
1096 GemmMicrokernelTester()
1097 .mr(1)
1098 .nr(8)
1099 .kr(2)
1100 .sr(1)
1101 .m(m)
1102 .n(n)
1103 .k(k)
1104 .iterations(1)
1105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1106 }
1107 }
1108 }
1109 }
1110
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_div_16)1111 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_div_16) {
1112 TEST_REQUIRES_ARM_NEON;
1113 for (size_t k = 32; k <= 160; k += 16) {
1114 GemmMicrokernelTester()
1115 .mr(1)
1116 .nr(8)
1117 .kr(2)
1118 .sr(1)
1119 .m(1)
1120 .n(8)
1121 .k(k)
1122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1123 }
1124 }
1125
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_div_16_subtile)1126 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
1127 TEST_REQUIRES_ARM_NEON;
1128 for (size_t k = 32; k <= 160; k += 16) {
1129 for (uint32_t n = 1; n <= 8; n++) {
1130 for (uint32_t m = 1; m <= 1; m++) {
1131 GemmMicrokernelTester()
1132 .mr(1)
1133 .nr(8)
1134 .kr(2)
1135 .sr(1)
1136 .m(m)
1137 .n(n)
1138 .k(k)
1139 .iterations(1)
1140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1141 }
1142 }
1143 }
1144 }
1145
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8)1146 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8) {
1147 TEST_REQUIRES_ARM_NEON;
1148 for (uint32_t n = 9; n < 16; n++) {
1149 for (size_t k = 1; k <= 80; k += 17) {
1150 GemmMicrokernelTester()
1151 .mr(1)
1152 .nr(8)
1153 .kr(2)
1154 .sr(1)
1155 .m(1)
1156 .n(n)
1157 .k(k)
1158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1159 }
1160 }
1161 }
1162
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)1163 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
1164 TEST_REQUIRES_ARM_NEON;
1165 for (uint32_t n = 9; n < 16; n++) {
1166 for (size_t k = 1; k <= 80; k += 17) {
1167 GemmMicrokernelTester()
1168 .mr(1)
1169 .nr(8)
1170 .kr(2)
1171 .sr(1)
1172 .m(1)
1173 .n(n)
1174 .k(k)
1175 .cn_stride(11)
1176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1177 }
1178 }
1179 }
1180
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)1181 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
1182 TEST_REQUIRES_ARM_NEON;
1183 for (uint32_t n = 9; n < 16; n++) {
1184 for (size_t k = 1; k <= 80; k += 17) {
1185 for (uint32_t m = 1; m <= 1; m++) {
1186 GemmMicrokernelTester()
1187 .mr(1)
1188 .nr(8)
1189 .kr(2)
1190 .sr(1)
1191 .m(m)
1192 .n(n)
1193 .k(k)
1194 .iterations(1)
1195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1196 }
1197 }
1198 }
1199 }
1200
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8)1201 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8) {
1202 TEST_REQUIRES_ARM_NEON;
1203 for (uint32_t n = 16; n <= 24; n += 8) {
1204 for (size_t k = 1; k <= 80; k += 17) {
1205 GemmMicrokernelTester()
1206 .mr(1)
1207 .nr(8)
1208 .kr(2)
1209 .sr(1)
1210 .m(1)
1211 .n(n)
1212 .k(k)
1213 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1214 }
1215 }
1216 }
1217
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)1218 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
1219 TEST_REQUIRES_ARM_NEON;
1220 for (uint32_t n = 16; n <= 24; n += 8) {
1221 for (size_t k = 1; k <= 80; k += 17) {
1222 GemmMicrokernelTester()
1223 .mr(1)
1224 .nr(8)
1225 .kr(2)
1226 .sr(1)
1227 .m(1)
1228 .n(n)
1229 .k(k)
1230 .cn_stride(11)
1231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1232 }
1233 }
1234 }
1235
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_subtile)1236 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON;
1238 for (uint32_t n = 16; n <= 24; n += 8) {
1239 for (size_t k = 1; k <= 80; k += 17) {
1240 for (uint32_t m = 1; m <= 1; m++) {
1241 GemmMicrokernelTester()
1242 .mr(1)
1243 .nr(8)
1244 .kr(2)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
1250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1251 }
1252 }
1253 }
1254 }
1255
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,small_kernel)1256 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, small_kernel) {
1257 TEST_REQUIRES_ARM_NEON;
1258 for (size_t k = 1; k <= 80; k += 17) {
1259 GemmMicrokernelTester()
1260 .mr(1)
1261 .nr(8)
1262 .kr(2)
1263 .sr(1)
1264 .m(1)
1265 .n(8)
1266 .k(k)
1267 .ks(3)
1268 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1269 }
1270 }
1271
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,small_kernel_subtile)1272 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
1273 TEST_REQUIRES_ARM_NEON;
1274 for (size_t k = 1; k <= 80; k += 17) {
1275 for (uint32_t n = 1; n <= 8; n++) {
1276 for (uint32_t m = 1; m <= 1; m++) {
1277 GemmMicrokernelTester()
1278 .mr(1)
1279 .nr(8)
1280 .kr(2)
1281 .sr(1)
1282 .m(m)
1283 .n(n)
1284 .k(k)
1285 .ks(3)
1286 .iterations(1)
1287 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288 }
1289 }
1290 }
1291 }
1292
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)1293 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
1294 TEST_REQUIRES_ARM_NEON;
1295 for (uint32_t n = 9; n < 16; n++) {
1296 for (size_t k = 1; k <= 80; k += 17) {
1297 GemmMicrokernelTester()
1298 .mr(1)
1299 .nr(8)
1300 .kr(2)
1301 .sr(1)
1302 .m(1)
1303 .n(n)
1304 .k(k)
1305 .ks(3)
1306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1307 }
1308 }
1309 }
1310
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)1311 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
1312 TEST_REQUIRES_ARM_NEON;
1313 for (uint32_t n = 16; n <= 24; n += 8) {
1314 for (size_t k = 1; k <= 80; k += 17) {
1315 GemmMicrokernelTester()
1316 .mr(1)
1317 .nr(8)
1318 .kr(2)
1319 .sr(1)
1320 .m(1)
1321 .n(n)
1322 .k(k)
1323 .ks(3)
1324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cm_subtile)1329 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON;
1331 for (size_t k = 1; k <= 80; k += 17) {
1332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 1; m++) {
1334 GemmMicrokernelTester()
1335 .mr(1)
1336 .nr(8)
1337 .kr(2)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
1344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1345 }
1346 }
1347 }
1348 }
1349
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,a_offset)1350 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, a_offset) {
1351 TEST_REQUIRES_ARM_NEON;
1352 for (size_t k = 1; k <= 80; k += 17) {
1353 GemmMicrokernelTester()
1354 .mr(1)
1355 .nr(8)
1356 .kr(2)
1357 .sr(1)
1358 .m(1)
1359 .n(8)
1360 .k(k)
1361 .ks(3)
1362 .a_offset(83)
1363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1364 }
1365 }
1366
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,zero)1367 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, zero) {
1368 TEST_REQUIRES_ARM_NEON;
1369 for (size_t k = 1; k <= 80; k += 17) {
1370 for (uint32_t mz = 0; mz < 1; mz++) {
1371 GemmMicrokernelTester()
1372 .mr(1)
1373 .nr(8)
1374 .kr(2)
1375 .sr(1)
1376 .m(1)
1377 .n(8)
1378 .k(k)
1379 .ks(3)
1380 .a_offset(83)
1381 .zero_index(mz)
1382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1383 }
1384 }
1385 }
1386
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,qmin)1387 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, qmin) {
1388 TEST_REQUIRES_ARM_NEON;
1389 GemmMicrokernelTester()
1390 .mr(1)
1391 .nr(8)
1392 .kr(2)
1393 .sr(1)
1394 .m(1)
1395 .n(8)
1396 .k(16)
1397 .qmin(128)
1398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1399 }
1400
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,qmax)1401 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, qmax) {
1402 TEST_REQUIRES_ARM_NEON;
1403 GemmMicrokernelTester()
1404 .mr(1)
1405 .nr(8)
1406 .kr(2)
1407 .sr(1)
1408 .m(1)
1409 .n(8)
1410 .k(16)
1411 .qmax(128)
1412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1413 }
1414
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cm)1415 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cm) {
1416 TEST_REQUIRES_ARM_NEON;
1417 GemmMicrokernelTester()
1418 .mr(1)
1419 .nr(8)
1420 .kr(2)
1421 .sr(1)
1422 .m(1)
1423 .n(8)
1424 .k(16)
1425 .cm_stride(11)
1426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1427 }
1428 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1429
1430
1431 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16)1432 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16) {
1433 TEST_REQUIRES_ARM_NEON_V8;
1434 GemmMicrokernelTester()
1435 .mr(1)
1436 .nr(8)
1437 .kr(2)
1438 .sr(1)
1439 .m(1)
1440 .n(8)
1441 .k(16)
1442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1443 }
1444
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cn)1445 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cn) {
1446 TEST_REQUIRES_ARM_NEON_V8;
1447 GemmMicrokernelTester()
1448 .mr(1)
1449 .nr(8)
1450 .kr(2)
1451 .sr(1)
1452 .m(1)
1453 .n(8)
1454 .k(16)
1455 .cn_stride(11)
1456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1457 }
1458
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile)1459 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
1460 TEST_REQUIRES_ARM_NEON_V8;
1461 for (uint32_t n = 1; n <= 8; n++) {
1462 for (uint32_t m = 1; m <= 1; m++) {
1463 GemmMicrokernelTester()
1464 .mr(1)
1465 .nr(8)
1466 .kr(2)
1467 .sr(1)
1468 .m(m)
1469 .n(n)
1470 .k(16)
1471 .iterations(1)
1472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1473 }
1474 }
1475 }
1476
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_m)1477 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
1478 TEST_REQUIRES_ARM_NEON_V8;
1479 for (uint32_t m = 1; m <= 1; m++) {
1480 GemmMicrokernelTester()
1481 .mr(1)
1482 .nr(8)
1483 .kr(2)
1484 .sr(1)
1485 .m(m)
1486 .n(8)
1487 .k(16)
1488 .iterations(1)
1489 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1490 }
1491 }
1492
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_n)1493 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
1494 TEST_REQUIRES_ARM_NEON_V8;
1495 for (uint32_t n = 1; n <= 8; n++) {
1496 GemmMicrokernelTester()
1497 .mr(1)
1498 .nr(8)
1499 .kr(2)
1500 .sr(1)
1501 .m(1)
1502 .n(n)
1503 .k(16)
1504 .iterations(1)
1505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1506 }
1507 }
1508
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_lt_16)1509 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16) {
1510 TEST_REQUIRES_ARM_NEON_V8;
1511 for (size_t k = 1; k < 16; k++) {
1512 GemmMicrokernelTester()
1513 .mr(1)
1514 .nr(8)
1515 .kr(2)
1516 .sr(1)
1517 .m(1)
1518 .n(8)
1519 .k(k)
1520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1521 }
1522 }
1523
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_lt_16_subtile)1524 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
1525 TEST_REQUIRES_ARM_NEON_V8;
1526 for (size_t k = 1; k < 16; k++) {
1527 for (uint32_t n = 1; n <= 8; n++) {
1528 for (uint32_t m = 1; m <= 1; m++) {
1529 GemmMicrokernelTester()
1530 .mr(1)
1531 .nr(8)
1532 .kr(2)
1533 .sr(1)
1534 .m(m)
1535 .n(n)
1536 .k(k)
1537 .iterations(1)
1538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1539 }
1540 }
1541 }
1542 }
1543
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_gt_16)1544 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16) {
1545 TEST_REQUIRES_ARM_NEON_V8;
1546 for (size_t k = 17; k < 32; k++) {
1547 GemmMicrokernelTester()
1548 .mr(1)
1549 .nr(8)
1550 .kr(2)
1551 .sr(1)
1552 .m(1)
1553 .n(8)
1554 .k(k)
1555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1556 }
1557 }
1558
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_gt_16_subtile)1559 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
1560 TEST_REQUIRES_ARM_NEON_V8;
1561 for (size_t k = 17; k < 32; k++) {
1562 for (uint32_t n = 1; n <= 8; n++) {
1563 for (uint32_t m = 1; m <= 1; m++) {
1564 GemmMicrokernelTester()
1565 .mr(1)
1566 .nr(8)
1567 .kr(2)
1568 .sr(1)
1569 .m(m)
1570 .n(n)
1571 .k(k)
1572 .iterations(1)
1573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1574 }
1575 }
1576 }
1577 }
1578
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_div_16)1579 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16) {
1580 TEST_REQUIRES_ARM_NEON_V8;
1581 for (size_t k = 32; k <= 160; k += 16) {
1582 GemmMicrokernelTester()
1583 .mr(1)
1584 .nr(8)
1585 .kr(2)
1586 .sr(1)
1587 .m(1)
1588 .n(8)
1589 .k(k)
1590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1591 }
1592 }
1593
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_div_16_subtile)1594 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
1595 TEST_REQUIRES_ARM_NEON_V8;
1596 for (size_t k = 32; k <= 160; k += 16) {
1597 for (uint32_t n = 1; n <= 8; n++) {
1598 for (uint32_t m = 1; m <= 1; m++) {
1599 GemmMicrokernelTester()
1600 .mr(1)
1601 .nr(8)
1602 .kr(2)
1603 .sr(1)
1604 .m(m)
1605 .n(n)
1606 .k(k)
1607 .iterations(1)
1608 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1609 }
1610 }
1611 }
1612 }
1613
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8)1614 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8) {
1615 TEST_REQUIRES_ARM_NEON_V8;
1616 for (uint32_t n = 9; n < 16; n++) {
1617 for (size_t k = 1; k <= 80; k += 17) {
1618 GemmMicrokernelTester()
1619 .mr(1)
1620 .nr(8)
1621 .kr(2)
1622 .sr(1)
1623 .m(1)
1624 .n(n)
1625 .k(k)
1626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1627 }
1628 }
1629 }
1630
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_cn)1631 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
1632 TEST_REQUIRES_ARM_NEON_V8;
1633 for (uint32_t n = 9; n < 16; n++) {
1634 for (size_t k = 1; k <= 80; k += 17) {
1635 GemmMicrokernelTester()
1636 .mr(1)
1637 .nr(8)
1638 .kr(2)
1639 .sr(1)
1640 .m(1)
1641 .n(n)
1642 .k(k)
1643 .cn_stride(11)
1644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1645 }
1646 }
1647 }
1648
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_subtile)1649 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
1650 TEST_REQUIRES_ARM_NEON_V8;
1651 for (uint32_t n = 9; n < 16; n++) {
1652 for (size_t k = 1; k <= 80; k += 17) {
1653 for (uint32_t m = 1; m <= 1; m++) {
1654 GemmMicrokernelTester()
1655 .mr(1)
1656 .nr(8)
1657 .kr(2)
1658 .sr(1)
1659 .m(m)
1660 .n(n)
1661 .k(k)
1662 .iterations(1)
1663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1664 }
1665 }
1666 }
1667 }
1668
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8)1669 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8) {
1670 TEST_REQUIRES_ARM_NEON_V8;
1671 for (uint32_t n = 16; n <= 24; n += 8) {
1672 for (size_t k = 1; k <= 80; k += 17) {
1673 GemmMicrokernelTester()
1674 .mr(1)
1675 .nr(8)
1676 .kr(2)
1677 .sr(1)
1678 .m(1)
1679 .n(n)
1680 .k(k)
1681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1682 }
1683 }
1684 }
1685
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_strided_cn)1686 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
1687 TEST_REQUIRES_ARM_NEON_V8;
1688 for (uint32_t n = 16; n <= 24; n += 8) {
1689 for (size_t k = 1; k <= 80; k += 17) {
1690 GemmMicrokernelTester()
1691 .mr(1)
1692 .nr(8)
1693 .kr(2)
1694 .sr(1)
1695 .m(1)
1696 .n(n)
1697 .k(k)
1698 .cn_stride(11)
1699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1700 }
1701 }
1702 }
1703
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_subtile)1704 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
1705 TEST_REQUIRES_ARM_NEON_V8;
1706 for (uint32_t n = 16; n <= 24; n += 8) {
1707 for (size_t k = 1; k <= 80; k += 17) {
1708 for (uint32_t m = 1; m <= 1; m++) {
1709 GemmMicrokernelTester()
1710 .mr(1)
1711 .nr(8)
1712 .kr(2)
1713 .sr(1)
1714 .m(m)
1715 .n(n)
1716 .k(k)
1717 .iterations(1)
1718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1719 }
1720 }
1721 }
1722 }
1723
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,small_kernel)1724 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, small_kernel) {
1725 TEST_REQUIRES_ARM_NEON_V8;
1726 for (size_t k = 1; k <= 80; k += 17) {
1727 GemmMicrokernelTester()
1728 .mr(1)
1729 .nr(8)
1730 .kr(2)
1731 .sr(1)
1732 .m(1)
1733 .n(8)
1734 .k(k)
1735 .ks(3)
1736 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1737 }
1738 }
1739
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,small_kernel_subtile)1740 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, small_kernel_subtile) {
1741 TEST_REQUIRES_ARM_NEON_V8;
1742 for (size_t k = 1; k <= 80; k += 17) {
1743 for (uint32_t n = 1; n <= 8; n++) {
1744 for (uint32_t m = 1; m <= 1; m++) {
1745 GemmMicrokernelTester()
1746 .mr(1)
1747 .nr(8)
1748 .kr(2)
1749 .sr(1)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .ks(3)
1754 .iterations(1)
1755 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1756 }
1757 }
1758 }
1759 }
1760
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_small_kernel)1761 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
1762 TEST_REQUIRES_ARM_NEON_V8;
1763 for (uint32_t n = 9; n < 16; n++) {
1764 for (size_t k = 1; k <= 80; k += 17) {
1765 GemmMicrokernelTester()
1766 .mr(1)
1767 .nr(8)
1768 .kr(2)
1769 .sr(1)
1770 .m(1)
1771 .n(n)
1772 .k(k)
1773 .ks(3)
1774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1775 }
1776 }
1777 }
1778
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_small_kernel)1779 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
1780 TEST_REQUIRES_ARM_NEON_V8;
1781 for (uint32_t n = 16; n <= 24; n += 8) {
1782 for (size_t k = 1; k <= 80; k += 17) {
1783 GemmMicrokernelTester()
1784 .mr(1)
1785 .nr(8)
1786 .kr(2)
1787 .sr(1)
1788 .m(1)
1789 .n(n)
1790 .k(k)
1791 .ks(3)
1792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1793 }
1794 }
1795 }
1796
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cm_subtile)1797 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
1798 TEST_REQUIRES_ARM_NEON_V8;
1799 for (size_t k = 1; k <= 80; k += 17) {
1800 for (uint32_t n = 1; n <= 8; n++) {
1801 for (uint32_t m = 1; m <= 1; m++) {
1802 GemmMicrokernelTester()
1803 .mr(1)
1804 .nr(8)
1805 .kr(2)
1806 .sr(1)
1807 .m(m)
1808 .n(n)
1809 .k(k)
1810 .cm_stride(11)
1811 .iterations(1)
1812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1813 }
1814 }
1815 }
1816 }
1817
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,a_offset)1818 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, a_offset) {
1819 TEST_REQUIRES_ARM_NEON_V8;
1820 for (size_t k = 1; k <= 80; k += 17) {
1821 GemmMicrokernelTester()
1822 .mr(1)
1823 .nr(8)
1824 .kr(2)
1825 .sr(1)
1826 .m(1)
1827 .n(8)
1828 .k(k)
1829 .ks(3)
1830 .a_offset(83)
1831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1832 }
1833 }
1834
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,zero)1835 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, zero) {
1836 TEST_REQUIRES_ARM_NEON_V8;
1837 for (size_t k = 1; k <= 80; k += 17) {
1838 for (uint32_t mz = 0; mz < 1; mz++) {
1839 GemmMicrokernelTester()
1840 .mr(1)
1841 .nr(8)
1842 .kr(2)
1843 .sr(1)
1844 .m(1)
1845 .n(8)
1846 .k(k)
1847 .ks(3)
1848 .a_offset(83)
1849 .zero_index(mz)
1850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1851 }
1852 }
1853 }
1854
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,qmin)1855 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmin) {
1856 TEST_REQUIRES_ARM_NEON_V8;
1857 GemmMicrokernelTester()
1858 .mr(1)
1859 .nr(8)
1860 .kr(2)
1861 .sr(1)
1862 .m(1)
1863 .n(8)
1864 .k(16)
1865 .qmin(128)
1866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1867 }
1868
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,qmax)1869 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmax) {
1870 TEST_REQUIRES_ARM_NEON_V8;
1871 GemmMicrokernelTester()
1872 .mr(1)
1873 .nr(8)
1874 .kr(2)
1875 .sr(1)
1876 .m(1)
1877 .n(8)
1878 .k(16)
1879 .qmax(128)
1880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1881 }
1882
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cm)1883 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm) {
1884 TEST_REQUIRES_ARM_NEON_V8;
1885 GemmMicrokernelTester()
1886 .mr(1)
1887 .nr(8)
1888 .kr(2)
1889 .sr(1)
1890 .m(1)
1891 .n(8)
1892 .k(16)
1893 .cm_stride(11)
1894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1895 }
1896 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1897
1898
1899 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16)1900 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
1901 TEST_REQUIRES_ARM_NEON_V8;
1902 GemmMicrokernelTester()
1903 .mr(1)
1904 .nr(8)
1905 .kr(2)
1906 .sr(1)
1907 .m(1)
1908 .n(8)
1909 .k(16)
1910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1911 }
1912
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cn)1913 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cn) {
1914 TEST_REQUIRES_ARM_NEON_V8;
1915 GemmMicrokernelTester()
1916 .mr(1)
1917 .nr(8)
1918 .kr(2)
1919 .sr(1)
1920 .m(1)
1921 .n(8)
1922 .k(16)
1923 .cn_stride(11)
1924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1925 }
1926
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile)1927 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
1928 TEST_REQUIRES_ARM_NEON_V8;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 for (uint32_t m = 1; m <= 1; m++) {
1931 GemmMicrokernelTester()
1932 .mr(1)
1933 .nr(8)
1934 .kr(2)
1935 .sr(1)
1936 .m(m)
1937 .n(n)
1938 .k(16)
1939 .iterations(1)
1940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1941 }
1942 }
1943 }
1944
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)1945 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
1946 TEST_REQUIRES_ARM_NEON_V8;
1947 for (uint32_t m = 1; m <= 1; m++) {
1948 GemmMicrokernelTester()
1949 .mr(1)
1950 .nr(8)
1951 .kr(2)
1952 .sr(1)
1953 .m(m)
1954 .n(8)
1955 .k(16)
1956 .iterations(1)
1957 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1958 }
1959 }
1960
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)1961 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
1962 TEST_REQUIRES_ARM_NEON_V8;
1963 for (uint32_t n = 1; n <= 8; n++) {
1964 GemmMicrokernelTester()
1965 .mr(1)
1966 .nr(8)
1967 .kr(2)
1968 .sr(1)
1969 .m(1)
1970 .n(n)
1971 .k(16)
1972 .iterations(1)
1973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1974 }
1975 }
1976
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_lt_16)1977 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
1978 TEST_REQUIRES_ARM_NEON_V8;
1979 for (size_t k = 1; k < 16; k++) {
1980 GemmMicrokernelTester()
1981 .mr(1)
1982 .nr(8)
1983 .kr(2)
1984 .sr(1)
1985 .m(1)
1986 .n(8)
1987 .k(k)
1988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989 }
1990 }
1991
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_lt_16_subtile)1992 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
1993 TEST_REQUIRES_ARM_NEON_V8;
1994 for (size_t k = 1; k < 16; k++) {
1995 for (uint32_t n = 1; n <= 8; n++) {
1996 for (uint32_t m = 1; m <= 1; m++) {
1997 GemmMicrokernelTester()
1998 .mr(1)
1999 .nr(8)
2000 .kr(2)
2001 .sr(1)
2002 .m(m)
2003 .n(n)
2004 .k(k)
2005 .iterations(1)
2006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2007 }
2008 }
2009 }
2010 }
2011
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_gt_16)2012 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
2013 TEST_REQUIRES_ARM_NEON_V8;
2014 for (size_t k = 17; k < 32; k++) {
2015 GemmMicrokernelTester()
2016 .mr(1)
2017 .nr(8)
2018 .kr(2)
2019 .sr(1)
2020 .m(1)
2021 .n(8)
2022 .k(k)
2023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2024 }
2025 }
2026
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_gt_16_subtile)2027 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
2028 TEST_REQUIRES_ARM_NEON_V8;
2029 for (size_t k = 17; k < 32; k++) {
2030 for (uint32_t n = 1; n <= 8; n++) {
2031 for (uint32_t m = 1; m <= 1; m++) {
2032 GemmMicrokernelTester()
2033 .mr(1)
2034 .nr(8)
2035 .kr(2)
2036 .sr(1)
2037 .m(m)
2038 .n(n)
2039 .k(k)
2040 .iterations(1)
2041 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2042 }
2043 }
2044 }
2045 }
2046
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_div_16)2047 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_div_16) {
2048 TEST_REQUIRES_ARM_NEON_V8;
2049 for (size_t k = 32; k <= 160; k += 16) {
2050 GemmMicrokernelTester()
2051 .mr(1)
2052 .nr(8)
2053 .kr(2)
2054 .sr(1)
2055 .m(1)
2056 .n(8)
2057 .k(k)
2058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2059 }
2060 }
2061
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_div_16_subtile)2062 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
2063 TEST_REQUIRES_ARM_NEON_V8;
2064 for (size_t k = 32; k <= 160; k += 16) {
2065 for (uint32_t n = 1; n <= 8; n++) {
2066 for (uint32_t m = 1; m <= 1; m++) {
2067 GemmMicrokernelTester()
2068 .mr(1)
2069 .nr(8)
2070 .kr(2)
2071 .sr(1)
2072 .m(m)
2073 .n(n)
2074 .k(k)
2075 .iterations(1)
2076 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2077 }
2078 }
2079 }
2080 }
2081
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8)2082 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
2083 TEST_REQUIRES_ARM_NEON_V8;
2084 for (uint32_t n = 9; n < 16; n++) {
2085 for (size_t k = 1; k <= 80; k += 17) {
2086 GemmMicrokernelTester()
2087 .mr(1)
2088 .nr(8)
2089 .kr(2)
2090 .sr(1)
2091 .m(1)
2092 .n(n)
2093 .k(k)
2094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2095 }
2096 }
2097 }
2098
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)2099 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
2100 TEST_REQUIRES_ARM_NEON_V8;
2101 for (uint32_t n = 9; n < 16; n++) {
2102 for (size_t k = 1; k <= 80; k += 17) {
2103 GemmMicrokernelTester()
2104 .mr(1)
2105 .nr(8)
2106 .kr(2)
2107 .sr(1)
2108 .m(1)
2109 .n(n)
2110 .k(k)
2111 .cn_stride(11)
2112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2113 }
2114 }
2115 }
2116
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_subtile)2117 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
2118 TEST_REQUIRES_ARM_NEON_V8;
2119 for (uint32_t n = 9; n < 16; n++) {
2120 for (size_t k = 1; k <= 80; k += 17) {
2121 for (uint32_t m = 1; m <= 1; m++) {
2122 GemmMicrokernelTester()
2123 .mr(1)
2124 .nr(8)
2125 .kr(2)
2126 .sr(1)
2127 .m(m)
2128 .n(n)
2129 .k(k)
2130 .iterations(1)
2131 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2132 }
2133 }
2134 }
2135 }
2136
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8)2137 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8) {
2138 TEST_REQUIRES_ARM_NEON_V8;
2139 for (uint32_t n = 16; n <= 24; n += 8) {
2140 for (size_t k = 1; k <= 80; k += 17) {
2141 GemmMicrokernelTester()
2142 .mr(1)
2143 .nr(8)
2144 .kr(2)
2145 .sr(1)
2146 .m(1)
2147 .n(n)
2148 .k(k)
2149 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2150 }
2151 }
2152 }
2153
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_cn)2154 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
2155 TEST_REQUIRES_ARM_NEON_V8;
2156 for (uint32_t n = 16; n <= 24; n += 8) {
2157 for (size_t k = 1; k <= 80; k += 17) {
2158 GemmMicrokernelTester()
2159 .mr(1)
2160 .nr(8)
2161 .kr(2)
2162 .sr(1)
2163 .m(1)
2164 .n(n)
2165 .k(k)
2166 .cn_stride(11)
2167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2168 }
2169 }
2170 }
2171
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_subtile)2172 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
2173 TEST_REQUIRES_ARM_NEON_V8;
2174 for (uint32_t n = 16; n <= 24; n += 8) {
2175 for (size_t k = 1; k <= 80; k += 17) {
2176 for (uint32_t m = 1; m <= 1; m++) {
2177 GemmMicrokernelTester()
2178 .mr(1)
2179 .nr(8)
2180 .kr(2)
2181 .sr(1)
2182 .m(m)
2183 .n(n)
2184 .k(k)
2185 .iterations(1)
2186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2187 }
2188 }
2189 }
2190 }
2191
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,small_kernel)2192 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, small_kernel) {
2193 TEST_REQUIRES_ARM_NEON_V8;
2194 for (size_t k = 1; k <= 80; k += 17) {
2195 GemmMicrokernelTester()
2196 .mr(1)
2197 .nr(8)
2198 .kr(2)
2199 .sr(1)
2200 .m(1)
2201 .n(8)
2202 .k(k)
2203 .ks(3)
2204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2205 }
2206 }
2207
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,small_kernel_subtile)2208 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, small_kernel_subtile) {
2209 TEST_REQUIRES_ARM_NEON_V8;
2210 for (size_t k = 1; k <= 80; k += 17) {
2211 for (uint32_t n = 1; n <= 8; n++) {
2212 for (uint32_t m = 1; m <= 1; m++) {
2213 GemmMicrokernelTester()
2214 .mr(1)
2215 .nr(8)
2216 .kr(2)
2217 .sr(1)
2218 .m(m)
2219 .n(n)
2220 .k(k)
2221 .ks(3)
2222 .iterations(1)
2223 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2224 }
2225 }
2226 }
2227 }
2228
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)2229 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
2230 TEST_REQUIRES_ARM_NEON_V8;
2231 for (uint32_t n = 9; n < 16; n++) {
2232 for (size_t k = 1; k <= 80; k += 17) {
2233 GemmMicrokernelTester()
2234 .mr(1)
2235 .nr(8)
2236 .kr(2)
2237 .sr(1)
2238 .m(1)
2239 .n(n)
2240 .k(k)
2241 .ks(3)
2242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2243 }
2244 }
2245 }
2246
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_small_kernel)2247 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
2248 TEST_REQUIRES_ARM_NEON_V8;
2249 for (uint32_t n = 16; n <= 24; n += 8) {
2250 for (size_t k = 1; k <= 80; k += 17) {
2251 GemmMicrokernelTester()
2252 .mr(1)
2253 .nr(8)
2254 .kr(2)
2255 .sr(1)
2256 .m(1)
2257 .n(n)
2258 .k(k)
2259 .ks(3)
2260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2261 }
2262 }
2263 }
2264
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cm_subtile)2265 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
2266 TEST_REQUIRES_ARM_NEON_V8;
2267 for (size_t k = 1; k <= 80; k += 17) {
2268 for (uint32_t n = 1; n <= 8; n++) {
2269 for (uint32_t m = 1; m <= 1; m++) {
2270 GemmMicrokernelTester()
2271 .mr(1)
2272 .nr(8)
2273 .kr(2)
2274 .sr(1)
2275 .m(m)
2276 .n(n)
2277 .k(k)
2278 .cm_stride(11)
2279 .iterations(1)
2280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2281 }
2282 }
2283 }
2284 }
2285
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,a_offset)2286 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, a_offset) {
2287 TEST_REQUIRES_ARM_NEON_V8;
2288 for (size_t k = 1; k <= 80; k += 17) {
2289 GemmMicrokernelTester()
2290 .mr(1)
2291 .nr(8)
2292 .kr(2)
2293 .sr(1)
2294 .m(1)
2295 .n(8)
2296 .k(k)
2297 .ks(3)
2298 .a_offset(83)
2299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2300 }
2301 }
2302
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,zero)2303 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, zero) {
2304 TEST_REQUIRES_ARM_NEON_V8;
2305 for (size_t k = 1; k <= 80; k += 17) {
2306 for (uint32_t mz = 0; mz < 1; mz++) {
2307 GemmMicrokernelTester()
2308 .mr(1)
2309 .nr(8)
2310 .kr(2)
2311 .sr(1)
2312 .m(1)
2313 .n(8)
2314 .k(k)
2315 .ks(3)
2316 .a_offset(83)
2317 .zero_index(mz)
2318 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319 }
2320 }
2321 }
2322
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,qmin)2323 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, qmin) {
2324 TEST_REQUIRES_ARM_NEON_V8;
2325 GemmMicrokernelTester()
2326 .mr(1)
2327 .nr(8)
2328 .kr(2)
2329 .sr(1)
2330 .m(1)
2331 .n(8)
2332 .k(16)
2333 .qmin(128)
2334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2335 }
2336
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,qmax)2337 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, qmax) {
2338 TEST_REQUIRES_ARM_NEON_V8;
2339 GemmMicrokernelTester()
2340 .mr(1)
2341 .nr(8)
2342 .kr(2)
2343 .sr(1)
2344 .m(1)
2345 .n(8)
2346 .k(16)
2347 .qmax(128)
2348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2349 }
2350
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cm)2351 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cm) {
2352 TEST_REQUIRES_ARM_NEON_V8;
2353 GemmMicrokernelTester()
2354 .mr(1)
2355 .nr(8)
2356 .kr(2)
2357 .sr(1)
2358 .m(1)
2359 .n(8)
2360 .k(16)
2361 .cm_stride(11)
2362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363 }
2364 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2365
2366
2367 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16)2368 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16) {
2369 TEST_REQUIRES_ARM_NEON;
2370 GemmMicrokernelTester()
2371 .mr(1)
2372 .nr(8)
2373 .kr(4)
2374 .sr(1)
2375 .m(1)
2376 .n(8)
2377 .k(16)
2378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2379 }
2380
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cn)2381 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cn) {
2382 TEST_REQUIRES_ARM_NEON;
2383 GemmMicrokernelTester()
2384 .mr(1)
2385 .nr(8)
2386 .kr(4)
2387 .sr(1)
2388 .m(1)
2389 .n(8)
2390 .k(16)
2391 .cn_stride(11)
2392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2393 }
2394
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)2395 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
2396 TEST_REQUIRES_ARM_NEON;
2397 for (uint32_t n = 1; n <= 8; n++) {
2398 for (uint32_t m = 1; m <= 1; m++) {
2399 GemmMicrokernelTester()
2400 .mr(1)
2401 .nr(8)
2402 .kr(4)
2403 .sr(1)
2404 .m(m)
2405 .n(n)
2406 .k(16)
2407 .iterations(1)
2408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2409 }
2410 }
2411 }
2412
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)2413 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
2414 TEST_REQUIRES_ARM_NEON;
2415 for (uint32_t m = 1; m <= 1; m++) {
2416 GemmMicrokernelTester()
2417 .mr(1)
2418 .nr(8)
2419 .kr(4)
2420 .sr(1)
2421 .m(m)
2422 .n(8)
2423 .k(16)
2424 .iterations(1)
2425 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2426 }
2427 }
2428
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)2429 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
2430 TEST_REQUIRES_ARM_NEON;
2431 for (uint32_t n = 1; n <= 8; n++) {
2432 GemmMicrokernelTester()
2433 .mr(1)
2434 .nr(8)
2435 .kr(4)
2436 .sr(1)
2437 .m(1)
2438 .n(n)
2439 .k(16)
2440 .iterations(1)
2441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2442 }
2443 }
2444
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16)2445 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16) {
2446 TEST_REQUIRES_ARM_NEON;
2447 for (size_t k = 1; k < 16; k++) {
2448 GemmMicrokernelTester()
2449 .mr(1)
2450 .nr(8)
2451 .kr(4)
2452 .sr(1)
2453 .m(1)
2454 .n(8)
2455 .k(k)
2456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2457 }
2458 }
2459
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)2460 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
2461 TEST_REQUIRES_ARM_NEON;
2462 for (size_t k = 1; k < 16; k++) {
2463 for (uint32_t n = 1; n <= 8; n++) {
2464 for (uint32_t m = 1; m <= 1; m++) {
2465 GemmMicrokernelTester()
2466 .mr(1)
2467 .nr(8)
2468 .kr(4)
2469 .sr(1)
2470 .m(m)
2471 .n(n)
2472 .k(k)
2473 .iterations(1)
2474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2475 }
2476 }
2477 }
2478 }
2479
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16)2480 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16) {
2481 TEST_REQUIRES_ARM_NEON;
2482 for (size_t k = 17; k < 32; k++) {
2483 GemmMicrokernelTester()
2484 .mr(1)
2485 .nr(8)
2486 .kr(4)
2487 .sr(1)
2488 .m(1)
2489 .n(8)
2490 .k(k)
2491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2492 }
2493 }
2494
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)2495 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
2496 TEST_REQUIRES_ARM_NEON;
2497 for (size_t k = 17; k < 32; k++) {
2498 for (uint32_t n = 1; n <= 8; n++) {
2499 for (uint32_t m = 1; m <= 1; m++) {
2500 GemmMicrokernelTester()
2501 .mr(1)
2502 .nr(8)
2503 .kr(4)
2504 .sr(1)
2505 .m(m)
2506 .n(n)
2507 .k(k)
2508 .iterations(1)
2509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2510 }
2511 }
2512 }
2513 }
2514
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16)2515 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16) {
2516 TEST_REQUIRES_ARM_NEON;
2517 for (size_t k = 32; k <= 160; k += 16) {
2518 GemmMicrokernelTester()
2519 .mr(1)
2520 .nr(8)
2521 .kr(4)
2522 .sr(1)
2523 .m(1)
2524 .n(8)
2525 .k(k)
2526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2527 }
2528 }
2529
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16_subtile)2530 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
2531 TEST_REQUIRES_ARM_NEON;
2532 for (size_t k = 32; k <= 160; k += 16) {
2533 for (uint32_t n = 1; n <= 8; n++) {
2534 for (uint32_t m = 1; m <= 1; m++) {
2535 GemmMicrokernelTester()
2536 .mr(1)
2537 .nr(8)
2538 .kr(4)
2539 .sr(1)
2540 .m(m)
2541 .n(n)
2542 .k(k)
2543 .iterations(1)
2544 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2545 }
2546 }
2547 }
2548 }
2549
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8)2550 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8) {
2551 TEST_REQUIRES_ARM_NEON;
2552 for (uint32_t n = 9; n < 16; n++) {
2553 for (size_t k = 1; k <= 80; k += 17) {
2554 GemmMicrokernelTester()
2555 .mr(1)
2556 .nr(8)
2557 .kr(4)
2558 .sr(1)
2559 .m(1)
2560 .n(n)
2561 .k(k)
2562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2563 }
2564 }
2565 }
2566
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)2567 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
2568 TEST_REQUIRES_ARM_NEON;
2569 for (uint32_t n = 9; n < 16; n++) {
2570 for (size_t k = 1; k <= 80; k += 17) {
2571 GemmMicrokernelTester()
2572 .mr(1)
2573 .nr(8)
2574 .kr(4)
2575 .sr(1)
2576 .m(1)
2577 .n(n)
2578 .k(k)
2579 .cn_stride(11)
2580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2581 }
2582 }
2583 }
2584
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)2585 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
2586 TEST_REQUIRES_ARM_NEON;
2587 for (uint32_t n = 9; n < 16; n++) {
2588 for (size_t k = 1; k <= 80; k += 17) {
2589 for (uint32_t m = 1; m <= 1; m++) {
2590 GemmMicrokernelTester()
2591 .mr(1)
2592 .nr(8)
2593 .kr(4)
2594 .sr(1)
2595 .m(m)
2596 .n(n)
2597 .k(k)
2598 .iterations(1)
2599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2600 }
2601 }
2602 }
2603 }
2604
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8)2605 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8) {
2606 TEST_REQUIRES_ARM_NEON;
2607 for (uint32_t n = 16; n <= 24; n += 8) {
2608 for (size_t k = 1; k <= 80; k += 17) {
2609 GemmMicrokernelTester()
2610 .mr(1)
2611 .nr(8)
2612 .kr(4)
2613 .sr(1)
2614 .m(1)
2615 .n(n)
2616 .k(k)
2617 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2618 }
2619 }
2620 }
2621
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)2622 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
2623 TEST_REQUIRES_ARM_NEON;
2624 for (uint32_t n = 16; n <= 24; n += 8) {
2625 for (size_t k = 1; k <= 80; k += 17) {
2626 GemmMicrokernelTester()
2627 .mr(1)
2628 .nr(8)
2629 .kr(4)
2630 .sr(1)
2631 .m(1)
2632 .n(n)
2633 .k(k)
2634 .cn_stride(11)
2635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2636 }
2637 }
2638 }
2639
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_subtile)2640 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
2641 TEST_REQUIRES_ARM_NEON;
2642 for (uint32_t n = 16; n <= 24; n += 8) {
2643 for (size_t k = 1; k <= 80; k += 17) {
2644 for (uint32_t m = 1; m <= 1; m++) {
2645 GemmMicrokernelTester()
2646 .mr(1)
2647 .nr(8)
2648 .kr(4)
2649 .sr(1)
2650 .m(m)
2651 .n(n)
2652 .k(k)
2653 .iterations(1)
2654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2655 }
2656 }
2657 }
2658 }
2659
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,small_kernel)2660 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, small_kernel) {
2661 TEST_REQUIRES_ARM_NEON;
2662 for (size_t k = 1; k <= 80; k += 17) {
2663 GemmMicrokernelTester()
2664 .mr(1)
2665 .nr(8)
2666 .kr(4)
2667 .sr(1)
2668 .m(1)
2669 .n(8)
2670 .k(k)
2671 .ks(3)
2672 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2673 }
2674 }
2675
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,small_kernel_subtile)2676 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
2677 TEST_REQUIRES_ARM_NEON;
2678 for (size_t k = 1; k <= 80; k += 17) {
2679 for (uint32_t n = 1; n <= 8; n++) {
2680 for (uint32_t m = 1; m <= 1; m++) {
2681 GemmMicrokernelTester()
2682 .mr(1)
2683 .nr(8)
2684 .kr(4)
2685 .sr(1)
2686 .m(m)
2687 .n(n)
2688 .k(k)
2689 .ks(3)
2690 .iterations(1)
2691 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2692 }
2693 }
2694 }
2695 }
2696
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)2697 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
2698 TEST_REQUIRES_ARM_NEON;
2699 for (uint32_t n = 9; n < 16; n++) {
2700 for (size_t k = 1; k <= 80; k += 17) {
2701 GemmMicrokernelTester()
2702 .mr(1)
2703 .nr(8)
2704 .kr(4)
2705 .sr(1)
2706 .m(1)
2707 .n(n)
2708 .k(k)
2709 .ks(3)
2710 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2711 }
2712 }
2713 }
2714
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)2715 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
2716 TEST_REQUIRES_ARM_NEON;
2717 for (uint32_t n = 16; n <= 24; n += 8) {
2718 for (size_t k = 1; k <= 80; k += 17) {
2719 GemmMicrokernelTester()
2720 .mr(1)
2721 .nr(8)
2722 .kr(4)
2723 .sr(1)
2724 .m(1)
2725 .n(n)
2726 .k(k)
2727 .ks(3)
2728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2729 }
2730 }
2731 }
2732
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm_subtile)2733 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
2734 TEST_REQUIRES_ARM_NEON;
2735 for (size_t k = 1; k <= 80; k += 17) {
2736 for (uint32_t n = 1; n <= 8; n++) {
2737 for (uint32_t m = 1; m <= 1; m++) {
2738 GemmMicrokernelTester()
2739 .mr(1)
2740 .nr(8)
2741 .kr(4)
2742 .sr(1)
2743 .m(m)
2744 .n(n)
2745 .k(k)
2746 .cm_stride(11)
2747 .iterations(1)
2748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2749 }
2750 }
2751 }
2752 }
2753
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,a_offset)2754 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, a_offset) {
2755 TEST_REQUIRES_ARM_NEON;
2756 for (size_t k = 1; k <= 80; k += 17) {
2757 GemmMicrokernelTester()
2758 .mr(1)
2759 .nr(8)
2760 .kr(4)
2761 .sr(1)
2762 .m(1)
2763 .n(8)
2764 .k(k)
2765 .ks(3)
2766 .a_offset(83)
2767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2768 }
2769 }
2770
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,zero)2771 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, zero) {
2772 TEST_REQUIRES_ARM_NEON;
2773 for (size_t k = 1; k <= 80; k += 17) {
2774 for (uint32_t mz = 0; mz < 1; mz++) {
2775 GemmMicrokernelTester()
2776 .mr(1)
2777 .nr(8)
2778 .kr(4)
2779 .sr(1)
2780 .m(1)
2781 .n(8)
2782 .k(k)
2783 .ks(3)
2784 .a_offset(83)
2785 .zero_index(mz)
2786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2787 }
2788 }
2789 }
2790
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmin)2791 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmin) {
2792 TEST_REQUIRES_ARM_NEON;
2793 GemmMicrokernelTester()
2794 .mr(1)
2795 .nr(8)
2796 .kr(4)
2797 .sr(1)
2798 .m(1)
2799 .n(8)
2800 .k(16)
2801 .qmin(128)
2802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2803 }
2804
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmax)2805 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmax) {
2806 TEST_REQUIRES_ARM_NEON;
2807 GemmMicrokernelTester()
2808 .mr(1)
2809 .nr(8)
2810 .kr(4)
2811 .sr(1)
2812 .m(1)
2813 .n(8)
2814 .k(16)
2815 .qmax(128)
2816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2817 }
2818
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm)2819 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm) {
2820 TEST_REQUIRES_ARM_NEON;
2821 GemmMicrokernelTester()
2822 .mr(1)
2823 .nr(8)
2824 .kr(4)
2825 .sr(1)
2826 .m(1)
2827 .n(8)
2828 .k(16)
2829 .cm_stride(11)
2830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2831 }
2832 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2833
2834
2835 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16)2836 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
2837 TEST_REQUIRES_ARM_NEON_V8;
2838 GemmMicrokernelTester()
2839 .mr(1)
2840 .nr(8)
2841 .kr(4)
2842 .sr(1)
2843 .m(1)
2844 .n(8)
2845 .k(16)
2846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2847 }
2848
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cn)2849 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cn) {
2850 TEST_REQUIRES_ARM_NEON_V8;
2851 GemmMicrokernelTester()
2852 .mr(1)
2853 .nr(8)
2854 .kr(4)
2855 .sr(1)
2856 .m(1)
2857 .n(8)
2858 .k(16)
2859 .cn_stride(11)
2860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2861 }
2862
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile)2863 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
2864 TEST_REQUIRES_ARM_NEON_V8;
2865 for (uint32_t n = 1; n <= 8; n++) {
2866 for (uint32_t m = 1; m <= 1; m++) {
2867 GemmMicrokernelTester()
2868 .mr(1)
2869 .nr(8)
2870 .kr(4)
2871 .sr(1)
2872 .m(m)
2873 .n(n)
2874 .k(16)
2875 .iterations(1)
2876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877 }
2878 }
2879 }
2880
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)2881 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
2882 TEST_REQUIRES_ARM_NEON_V8;
2883 for (uint32_t m = 1; m <= 1; m++) {
2884 GemmMicrokernelTester()
2885 .mr(1)
2886 .nr(8)
2887 .kr(4)
2888 .sr(1)
2889 .m(m)
2890 .n(8)
2891 .k(16)
2892 .iterations(1)
2893 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2894 }
2895 }
2896
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)2897 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
2898 TEST_REQUIRES_ARM_NEON_V8;
2899 for (uint32_t n = 1; n <= 8; n++) {
2900 GemmMicrokernelTester()
2901 .mr(1)
2902 .nr(8)
2903 .kr(4)
2904 .sr(1)
2905 .m(1)
2906 .n(n)
2907 .k(16)
2908 .iterations(1)
2909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2910 }
2911 }
2912
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_lt_16)2913 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
2914 TEST_REQUIRES_ARM_NEON_V8;
2915 for (size_t k = 1; k < 16; k++) {
2916 GemmMicrokernelTester()
2917 .mr(1)
2918 .nr(8)
2919 .kr(4)
2920 .sr(1)
2921 .m(1)
2922 .n(8)
2923 .k(k)
2924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2925 }
2926 }
2927
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_lt_16_subtile)2928 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
2929 TEST_REQUIRES_ARM_NEON_V8;
2930 for (size_t k = 1; k < 16; k++) {
2931 for (uint32_t n = 1; n <= 8; n++) {
2932 for (uint32_t m = 1; m <= 1; m++) {
2933 GemmMicrokernelTester()
2934 .mr(1)
2935 .nr(8)
2936 .kr(4)
2937 .sr(1)
2938 .m(m)
2939 .n(n)
2940 .k(k)
2941 .iterations(1)
2942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2943 }
2944 }
2945 }
2946 }
2947
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_gt_16)2948 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
2949 TEST_REQUIRES_ARM_NEON_V8;
2950 for (size_t k = 17; k < 32; k++) {
2951 GemmMicrokernelTester()
2952 .mr(1)
2953 .nr(8)
2954 .kr(4)
2955 .sr(1)
2956 .m(1)
2957 .n(8)
2958 .k(k)
2959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2960 }
2961 }
2962
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_gt_16_subtile)2963 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
2964 TEST_REQUIRES_ARM_NEON_V8;
2965 for (size_t k = 17; k < 32; k++) {
2966 for (uint32_t n = 1; n <= 8; n++) {
2967 for (uint32_t m = 1; m <= 1; m++) {
2968 GemmMicrokernelTester()
2969 .mr(1)
2970 .nr(8)
2971 .kr(4)
2972 .sr(1)
2973 .m(m)
2974 .n(n)
2975 .k(k)
2976 .iterations(1)
2977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2978 }
2979 }
2980 }
2981 }
2982
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_div_16)2983 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_div_16) {
2984 TEST_REQUIRES_ARM_NEON_V8;
2985 for (size_t k = 32; k <= 160; k += 16) {
2986 GemmMicrokernelTester()
2987 .mr(1)
2988 .nr(8)
2989 .kr(4)
2990 .sr(1)
2991 .m(1)
2992 .n(8)
2993 .k(k)
2994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2995 }
2996 }
2997
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_div_16_subtile)2998 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
2999 TEST_REQUIRES_ARM_NEON_V8;
3000 for (size_t k = 32; k <= 160; k += 16) {
3001 for (uint32_t n = 1; n <= 8; n++) {
3002 for (uint32_t m = 1; m <= 1; m++) {
3003 GemmMicrokernelTester()
3004 .mr(1)
3005 .nr(8)
3006 .kr(4)
3007 .sr(1)
3008 .m(m)
3009 .n(n)
3010 .k(k)
3011 .iterations(1)
3012 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3013 }
3014 }
3015 }
3016 }
3017
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8)3018 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
3019 TEST_REQUIRES_ARM_NEON_V8;
3020 for (uint32_t n = 9; n < 16; n++) {
3021 for (size_t k = 1; k <= 80; k += 17) {
3022 GemmMicrokernelTester()
3023 .mr(1)
3024 .nr(8)
3025 .kr(4)
3026 .sr(1)
3027 .m(1)
3028 .n(n)
3029 .k(k)
3030 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3031 }
3032 }
3033 }
3034
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)3035 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
3036 TEST_REQUIRES_ARM_NEON_V8;
3037 for (uint32_t n = 9; n < 16; n++) {
3038 for (size_t k = 1; k <= 80; k += 17) {
3039 GemmMicrokernelTester()
3040 .mr(1)
3041 .nr(8)
3042 .kr(4)
3043 .sr(1)
3044 .m(1)
3045 .n(n)
3046 .k(k)
3047 .cn_stride(11)
3048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3049 }
3050 }
3051 }
3052
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_subtile)3053 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
3054 TEST_REQUIRES_ARM_NEON_V8;
3055 for (uint32_t n = 9; n < 16; n++) {
3056 for (size_t k = 1; k <= 80; k += 17) {
3057 for (uint32_t m = 1; m <= 1; m++) {
3058 GemmMicrokernelTester()
3059 .mr(1)
3060 .nr(8)
3061 .kr(4)
3062 .sr(1)
3063 .m(m)
3064 .n(n)
3065 .k(k)
3066 .iterations(1)
3067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3068 }
3069 }
3070 }
3071 }
3072
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8)3073 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8) {
3074 TEST_REQUIRES_ARM_NEON_V8;
3075 for (uint32_t n = 16; n <= 24; n += 8) {
3076 for (size_t k = 1; k <= 80; k += 17) {
3077 GemmMicrokernelTester()
3078 .mr(1)
3079 .nr(8)
3080 .kr(4)
3081 .sr(1)
3082 .m(1)
3083 .n(n)
3084 .k(k)
3085 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3086 }
3087 }
3088 }
3089
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_cn)3090 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
3091 TEST_REQUIRES_ARM_NEON_V8;
3092 for (uint32_t n = 16; n <= 24; n += 8) {
3093 for (size_t k = 1; k <= 80; k += 17) {
3094 GemmMicrokernelTester()
3095 .mr(1)
3096 .nr(8)
3097 .kr(4)
3098 .sr(1)
3099 .m(1)
3100 .n(n)
3101 .k(k)
3102 .cn_stride(11)
3103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3104 }
3105 }
3106 }
3107
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_subtile)3108 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
3109 TEST_REQUIRES_ARM_NEON_V8;
3110 for (uint32_t n = 16; n <= 24; n += 8) {
3111 for (size_t k = 1; k <= 80; k += 17) {
3112 for (uint32_t m = 1; m <= 1; m++) {
3113 GemmMicrokernelTester()
3114 .mr(1)
3115 .nr(8)
3116 .kr(4)
3117 .sr(1)
3118 .m(m)
3119 .n(n)
3120 .k(k)
3121 .iterations(1)
3122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3123 }
3124 }
3125 }
3126 }
3127
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,small_kernel)3128 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, small_kernel) {
3129 TEST_REQUIRES_ARM_NEON_V8;
3130 for (size_t k = 1; k <= 80; k += 17) {
3131 GemmMicrokernelTester()
3132 .mr(1)
3133 .nr(8)
3134 .kr(4)
3135 .sr(1)
3136 .m(1)
3137 .n(8)
3138 .k(k)
3139 .ks(3)
3140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3141 }
3142 }
3143
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,small_kernel_subtile)3144 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, small_kernel_subtile) {
3145 TEST_REQUIRES_ARM_NEON_V8;
3146 for (size_t k = 1; k <= 80; k += 17) {
3147 for (uint32_t n = 1; n <= 8; n++) {
3148 for (uint32_t m = 1; m <= 1; m++) {
3149 GemmMicrokernelTester()
3150 .mr(1)
3151 .nr(8)
3152 .kr(4)
3153 .sr(1)
3154 .m(m)
3155 .n(n)
3156 .k(k)
3157 .ks(3)
3158 .iterations(1)
3159 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3160 }
3161 }
3162 }
3163 }
3164
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)3165 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
3166 TEST_REQUIRES_ARM_NEON_V8;
3167 for (uint32_t n = 9; n < 16; n++) {
3168 for (size_t k = 1; k <= 80; k += 17) {
3169 GemmMicrokernelTester()
3170 .mr(1)
3171 .nr(8)
3172 .kr(4)
3173 .sr(1)
3174 .m(1)
3175 .n(n)
3176 .k(k)
3177 .ks(3)
3178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3179 }
3180 }
3181 }
3182
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_small_kernel)3183 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
3184 TEST_REQUIRES_ARM_NEON_V8;
3185 for (uint32_t n = 16; n <= 24; n += 8) {
3186 for (size_t k = 1; k <= 80; k += 17) {
3187 GemmMicrokernelTester()
3188 .mr(1)
3189 .nr(8)
3190 .kr(4)
3191 .sr(1)
3192 .m(1)
3193 .n(n)
3194 .k(k)
3195 .ks(3)
3196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3197 }
3198 }
3199 }
3200
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cm_subtile)3201 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
3202 TEST_REQUIRES_ARM_NEON_V8;
3203 for (size_t k = 1; k <= 80; k += 17) {
3204 for (uint32_t n = 1; n <= 8; n++) {
3205 for (uint32_t m = 1; m <= 1; m++) {
3206 GemmMicrokernelTester()
3207 .mr(1)
3208 .nr(8)
3209 .kr(4)
3210 .sr(1)
3211 .m(m)
3212 .n(n)
3213 .k(k)
3214 .cm_stride(11)
3215 .iterations(1)
3216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3217 }
3218 }
3219 }
3220 }
3221
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,a_offset)3222 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, a_offset) {
3223 TEST_REQUIRES_ARM_NEON_V8;
3224 for (size_t k = 1; k <= 80; k += 17) {
3225 GemmMicrokernelTester()
3226 .mr(1)
3227 .nr(8)
3228 .kr(4)
3229 .sr(1)
3230 .m(1)
3231 .n(8)
3232 .k(k)
3233 .ks(3)
3234 .a_offset(83)
3235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3236 }
3237 }
3238
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,zero)3239 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, zero) {
3240 TEST_REQUIRES_ARM_NEON_V8;
3241 for (size_t k = 1; k <= 80; k += 17) {
3242 for (uint32_t mz = 0; mz < 1; mz++) {
3243 GemmMicrokernelTester()
3244 .mr(1)
3245 .nr(8)
3246 .kr(4)
3247 .sr(1)
3248 .m(1)
3249 .n(8)
3250 .k(k)
3251 .ks(3)
3252 .a_offset(83)
3253 .zero_index(mz)
3254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3255 }
3256 }
3257 }
3258
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,qmin)3259 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, qmin) {
3260 TEST_REQUIRES_ARM_NEON_V8;
3261 GemmMicrokernelTester()
3262 .mr(1)
3263 .nr(8)
3264 .kr(4)
3265 .sr(1)
3266 .m(1)
3267 .n(8)
3268 .k(16)
3269 .qmin(128)
3270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3271 }
3272
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,qmax)3273 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, qmax) {
3274 TEST_REQUIRES_ARM_NEON_V8;
3275 GemmMicrokernelTester()
3276 .mr(1)
3277 .nr(8)
3278 .kr(4)
3279 .sr(1)
3280 .m(1)
3281 .n(8)
3282 .k(16)
3283 .qmax(128)
3284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3285 }
3286
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cm)3287 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cm) {
3288 TEST_REQUIRES_ARM_NEON_V8;
3289 GemmMicrokernelTester()
3290 .mr(1)
3291 .nr(8)
3292 .kr(4)
3293 .sr(1)
3294 .m(1)
3295 .n(8)
3296 .k(16)
3297 .cm_stride(11)
3298 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3299 }
3300 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3301
3302
3303 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16)3304 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16) {
3305 TEST_REQUIRES_ARM_NEON;
3306 GemmMicrokernelTester()
3307 .mr(2)
3308 .nr(8)
3309 .kr(2)
3310 .sr(1)
3311 .m(2)
3312 .n(8)
3313 .k(16)
3314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3315 }
3316
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cn)3317 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cn) {
3318 TEST_REQUIRES_ARM_NEON;
3319 GemmMicrokernelTester()
3320 .mr(2)
3321 .nr(8)
3322 .kr(2)
3323 .sr(1)
3324 .m(2)
3325 .n(8)
3326 .k(16)
3327 .cn_stride(11)
3328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3329 }
3330
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)3331 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
3332 TEST_REQUIRES_ARM_NEON;
3333 for (uint32_t n = 1; n <= 8; n++) {
3334 for (uint32_t m = 1; m <= 2; m++) {
3335 GemmMicrokernelTester()
3336 .mr(2)
3337 .nr(8)
3338 .kr(2)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(16)
3343 .iterations(1)
3344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3345 }
3346 }
3347 }
3348
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)3349 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
3350 TEST_REQUIRES_ARM_NEON;
3351 for (uint32_t m = 1; m <= 2; m++) {
3352 GemmMicrokernelTester()
3353 .mr(2)
3354 .nr(8)
3355 .kr(2)
3356 .sr(1)
3357 .m(m)
3358 .n(8)
3359 .k(16)
3360 .iterations(1)
3361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3362 }
3363 }
3364
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)3365 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
3366 TEST_REQUIRES_ARM_NEON;
3367 for (uint32_t n = 1; n <= 8; n++) {
3368 GemmMicrokernelTester()
3369 .mr(2)
3370 .nr(8)
3371 .kr(2)
3372 .sr(1)
3373 .m(2)
3374 .n(n)
3375 .k(16)
3376 .iterations(1)
3377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3378 }
3379 }
3380
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16)3381 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16) {
3382 TEST_REQUIRES_ARM_NEON;
3383 for (size_t k = 1; k < 16; k++) {
3384 GemmMicrokernelTester()
3385 .mr(2)
3386 .nr(8)
3387 .kr(2)
3388 .sr(1)
3389 .m(2)
3390 .n(8)
3391 .k(k)
3392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3393 }
3394 }
3395
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)3396 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
3397 TEST_REQUIRES_ARM_NEON;
3398 for (size_t k = 1; k < 16; k++) {
3399 for (uint32_t n = 1; n <= 8; n++) {
3400 for (uint32_t m = 1; m <= 2; m++) {
3401 GemmMicrokernelTester()
3402 .mr(2)
3403 .nr(8)
3404 .kr(2)
3405 .sr(1)
3406 .m(m)
3407 .n(n)
3408 .k(k)
3409 .iterations(1)
3410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3411 }
3412 }
3413 }
3414 }
3415
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16)3416 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16) {
3417 TEST_REQUIRES_ARM_NEON;
3418 for (size_t k = 17; k < 32; k++) {
3419 GemmMicrokernelTester()
3420 .mr(2)
3421 .nr(8)
3422 .kr(2)
3423 .sr(1)
3424 .m(2)
3425 .n(8)
3426 .k(k)
3427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3428 }
3429 }
3430
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)3431 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
3432 TEST_REQUIRES_ARM_NEON;
3433 for (size_t k = 17; k < 32; k++) {
3434 for (uint32_t n = 1; n <= 8; n++) {
3435 for (uint32_t m = 1; m <= 2; m++) {
3436 GemmMicrokernelTester()
3437 .mr(2)
3438 .nr(8)
3439 .kr(2)
3440 .sr(1)
3441 .m(m)
3442 .n(n)
3443 .k(k)
3444 .iterations(1)
3445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3446 }
3447 }
3448 }
3449 }
3450
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16)3451 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16) {
3452 TEST_REQUIRES_ARM_NEON;
3453 for (size_t k = 32; k <= 160; k += 16) {
3454 GemmMicrokernelTester()
3455 .mr(2)
3456 .nr(8)
3457 .kr(2)
3458 .sr(1)
3459 .m(2)
3460 .n(8)
3461 .k(k)
3462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3463 }
3464 }
3465
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16_subtile)3466 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
3467 TEST_REQUIRES_ARM_NEON;
3468 for (size_t k = 32; k <= 160; k += 16) {
3469 for (uint32_t n = 1; n <= 8; n++) {
3470 for (uint32_t m = 1; m <= 2; m++) {
3471 GemmMicrokernelTester()
3472 .mr(2)
3473 .nr(8)
3474 .kr(2)
3475 .sr(1)
3476 .m(m)
3477 .n(n)
3478 .k(k)
3479 .iterations(1)
3480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3481 }
3482 }
3483 }
3484 }
3485
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8)3486 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8) {
3487 TEST_REQUIRES_ARM_NEON;
3488 for (uint32_t n = 9; n < 16; n++) {
3489 for (size_t k = 1; k <= 80; k += 17) {
3490 GemmMicrokernelTester()
3491 .mr(2)
3492 .nr(8)
3493 .kr(2)
3494 .sr(1)
3495 .m(2)
3496 .n(n)
3497 .k(k)
3498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3499 }
3500 }
3501 }
3502
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)3503 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
3504 TEST_REQUIRES_ARM_NEON;
3505 for (uint32_t n = 9; n < 16; n++) {
3506 for (size_t k = 1; k <= 80; k += 17) {
3507 GemmMicrokernelTester()
3508 .mr(2)
3509 .nr(8)
3510 .kr(2)
3511 .sr(1)
3512 .m(2)
3513 .n(n)
3514 .k(k)
3515 .cn_stride(11)
3516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3517 }
3518 }
3519 }
3520
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)3521 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
3522 TEST_REQUIRES_ARM_NEON;
3523 for (uint32_t n = 9; n < 16; n++) {
3524 for (size_t k = 1; k <= 80; k += 17) {
3525 for (uint32_t m = 1; m <= 2; m++) {
3526 GemmMicrokernelTester()
3527 .mr(2)
3528 .nr(8)
3529 .kr(2)
3530 .sr(1)
3531 .m(m)
3532 .n(n)
3533 .k(k)
3534 .iterations(1)
3535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3536 }
3537 }
3538 }
3539 }
3540
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8)3541 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8) {
3542 TEST_REQUIRES_ARM_NEON;
3543 for (uint32_t n = 16; n <= 24; n += 8) {
3544 for (size_t k = 1; k <= 80; k += 17) {
3545 GemmMicrokernelTester()
3546 .mr(2)
3547 .nr(8)
3548 .kr(2)
3549 .sr(1)
3550 .m(2)
3551 .n(n)
3552 .k(k)
3553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3554 }
3555 }
3556 }
3557
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)3558 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
3559 TEST_REQUIRES_ARM_NEON;
3560 for (uint32_t n = 16; n <= 24; n += 8) {
3561 for (size_t k = 1; k <= 80; k += 17) {
3562 GemmMicrokernelTester()
3563 .mr(2)
3564 .nr(8)
3565 .kr(2)
3566 .sr(1)
3567 .m(2)
3568 .n(n)
3569 .k(k)
3570 .cn_stride(11)
3571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3572 }
3573 }
3574 }
3575
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_subtile)3576 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
3577 TEST_REQUIRES_ARM_NEON;
3578 for (uint32_t n = 16; n <= 24; n += 8) {
3579 for (size_t k = 1; k <= 80; k += 17) {
3580 for (uint32_t m = 1; m <= 2; m++) {
3581 GemmMicrokernelTester()
3582 .mr(2)
3583 .nr(8)
3584 .kr(2)
3585 .sr(1)
3586 .m(m)
3587 .n(n)
3588 .k(k)
3589 .iterations(1)
3590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3591 }
3592 }
3593 }
3594 }
3595
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,small_kernel)3596 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, small_kernel) {
3597 TEST_REQUIRES_ARM_NEON;
3598 for (size_t k = 1; k <= 80; k += 17) {
3599 GemmMicrokernelTester()
3600 .mr(2)
3601 .nr(8)
3602 .kr(2)
3603 .sr(1)
3604 .m(2)
3605 .n(8)
3606 .k(k)
3607 .ks(3)
3608 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3609 }
3610 }
3611
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,small_kernel_subtile)3612 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
3613 TEST_REQUIRES_ARM_NEON;
3614 for (size_t k = 1; k <= 80; k += 17) {
3615 for (uint32_t n = 1; n <= 8; n++) {
3616 for (uint32_t m = 1; m <= 2; m++) {
3617 GemmMicrokernelTester()
3618 .mr(2)
3619 .nr(8)
3620 .kr(2)
3621 .sr(1)
3622 .m(m)
3623 .n(n)
3624 .k(k)
3625 .ks(3)
3626 .iterations(1)
3627 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3628 }
3629 }
3630 }
3631 }
3632
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)3633 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
3634 TEST_REQUIRES_ARM_NEON;
3635 for (uint32_t n = 9; n < 16; n++) {
3636 for (size_t k = 1; k <= 80; k += 17) {
3637 GemmMicrokernelTester()
3638 .mr(2)
3639 .nr(8)
3640 .kr(2)
3641 .sr(1)
3642 .m(2)
3643 .n(n)
3644 .k(k)
3645 .ks(3)
3646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3647 }
3648 }
3649 }
3650
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)3651 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
3652 TEST_REQUIRES_ARM_NEON;
3653 for (uint32_t n = 16; n <= 24; n += 8) {
3654 for (size_t k = 1; k <= 80; k += 17) {
3655 GemmMicrokernelTester()
3656 .mr(2)
3657 .nr(8)
3658 .kr(2)
3659 .sr(1)
3660 .m(2)
3661 .n(n)
3662 .k(k)
3663 .ks(3)
3664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3665 }
3666 }
3667 }
3668
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm_subtile)3669 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
3670 TEST_REQUIRES_ARM_NEON;
3671 for (size_t k = 1; k <= 80; k += 17) {
3672 for (uint32_t n = 1; n <= 8; n++) {
3673 for (uint32_t m = 1; m <= 2; m++) {
3674 GemmMicrokernelTester()
3675 .mr(2)
3676 .nr(8)
3677 .kr(2)
3678 .sr(1)
3679 .m(m)
3680 .n(n)
3681 .k(k)
3682 .cm_stride(11)
3683 .iterations(1)
3684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3685 }
3686 }
3687 }
3688 }
3689
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,a_offset)3690 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, a_offset) {
3691 TEST_REQUIRES_ARM_NEON;
3692 for (size_t k = 1; k <= 80; k += 17) {
3693 GemmMicrokernelTester()
3694 .mr(2)
3695 .nr(8)
3696 .kr(2)
3697 .sr(1)
3698 .m(2)
3699 .n(8)
3700 .k(k)
3701 .ks(3)
3702 .a_offset(163)
3703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3704 }
3705 }
3706
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,zero)3707 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, zero) {
3708 TEST_REQUIRES_ARM_NEON;
3709 for (size_t k = 1; k <= 80; k += 17) {
3710 for (uint32_t mz = 0; mz < 2; mz++) {
3711 GemmMicrokernelTester()
3712 .mr(2)
3713 .nr(8)
3714 .kr(2)
3715 .sr(1)
3716 .m(2)
3717 .n(8)
3718 .k(k)
3719 .ks(3)
3720 .a_offset(163)
3721 .zero_index(mz)
3722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3723 }
3724 }
3725 }
3726
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmin)3727 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmin) {
3728 TEST_REQUIRES_ARM_NEON;
3729 GemmMicrokernelTester()
3730 .mr(2)
3731 .nr(8)
3732 .kr(2)
3733 .sr(1)
3734 .m(2)
3735 .n(8)
3736 .k(16)
3737 .qmin(128)
3738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3739 }
3740
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmax)3741 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmax) {
3742 TEST_REQUIRES_ARM_NEON;
3743 GemmMicrokernelTester()
3744 .mr(2)
3745 .nr(8)
3746 .kr(2)
3747 .sr(1)
3748 .m(2)
3749 .n(8)
3750 .k(16)
3751 .qmax(128)
3752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3753 }
3754
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm)3755 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm) {
3756 TEST_REQUIRES_ARM_NEON;
3757 GemmMicrokernelTester()
3758 .mr(2)
3759 .nr(8)
3760 .kr(2)
3761 .sr(1)
3762 .m(2)
3763 .n(8)
3764 .k(16)
3765 .cm_stride(11)
3766 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3767 }
3768 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3769
3770
3771 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16)3772 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16) {
3773 TEST_REQUIRES_ARM_NEON;
3774 GemmMicrokernelTester()
3775 .mr(2)
3776 .nr(8)
3777 .kr(2)
3778 .sr(1)
3779 .m(2)
3780 .n(8)
3781 .k(16)
3782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3783 }
3784
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cn)3785 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cn) {
3786 TEST_REQUIRES_ARM_NEON;
3787 GemmMicrokernelTester()
3788 .mr(2)
3789 .nr(8)
3790 .kr(2)
3791 .sr(1)
3792 .m(2)
3793 .n(8)
3794 .k(16)
3795 .cn_stride(11)
3796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3797 }
3798
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)3799 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
3800 TEST_REQUIRES_ARM_NEON;
3801 for (uint32_t n = 1; n <= 8; n++) {
3802 for (uint32_t m = 1; m <= 2; m++) {
3803 GemmMicrokernelTester()
3804 .mr(2)
3805 .nr(8)
3806 .kr(2)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(16)
3811 .iterations(1)
3812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3813 }
3814 }
3815 }
3816
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)3817 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
3818 TEST_REQUIRES_ARM_NEON;
3819 for (uint32_t m = 1; m <= 2; m++) {
3820 GemmMicrokernelTester()
3821 .mr(2)
3822 .nr(8)
3823 .kr(2)
3824 .sr(1)
3825 .m(m)
3826 .n(8)
3827 .k(16)
3828 .iterations(1)
3829 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3830 }
3831 }
3832
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)3833 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
3834 TEST_REQUIRES_ARM_NEON;
3835 for (uint32_t n = 1; n <= 8; n++) {
3836 GemmMicrokernelTester()
3837 .mr(2)
3838 .nr(8)
3839 .kr(2)
3840 .sr(1)
3841 .m(2)
3842 .n(n)
3843 .k(16)
3844 .iterations(1)
3845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3846 }
3847 }
3848
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16)3849 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16) {
3850 TEST_REQUIRES_ARM_NEON;
3851 for (size_t k = 1; k < 16; k++) {
3852 GemmMicrokernelTester()
3853 .mr(2)
3854 .nr(8)
3855 .kr(2)
3856 .sr(1)
3857 .m(2)
3858 .n(8)
3859 .k(k)
3860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3861 }
3862 }
3863
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)3864 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
3865 TEST_REQUIRES_ARM_NEON;
3866 for (size_t k = 1; k < 16; k++) {
3867 for (uint32_t n = 1; n <= 8; n++) {
3868 for (uint32_t m = 1; m <= 2; m++) {
3869 GemmMicrokernelTester()
3870 .mr(2)
3871 .nr(8)
3872 .kr(2)
3873 .sr(1)
3874 .m(m)
3875 .n(n)
3876 .k(k)
3877 .iterations(1)
3878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3879 }
3880 }
3881 }
3882 }
3883
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16)3884 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16) {
3885 TEST_REQUIRES_ARM_NEON;
3886 for (size_t k = 17; k < 32; k++) {
3887 GemmMicrokernelTester()
3888 .mr(2)
3889 .nr(8)
3890 .kr(2)
3891 .sr(1)
3892 .m(2)
3893 .n(8)
3894 .k(k)
3895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3896 }
3897 }
3898
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)3899 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
3900 TEST_REQUIRES_ARM_NEON;
3901 for (size_t k = 17; k < 32; k++) {
3902 for (uint32_t n = 1; n <= 8; n++) {
3903 for (uint32_t m = 1; m <= 2; m++) {
3904 GemmMicrokernelTester()
3905 .mr(2)
3906 .nr(8)
3907 .kr(2)
3908 .sr(1)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
3913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3914 }
3915 }
3916 }
3917 }
3918
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16)3919 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16) {
3920 TEST_REQUIRES_ARM_NEON;
3921 for (size_t k = 32; k <= 160; k += 16) {
3922 GemmMicrokernelTester()
3923 .mr(2)
3924 .nr(8)
3925 .kr(2)
3926 .sr(1)
3927 .m(2)
3928 .n(8)
3929 .k(k)
3930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3931 }
3932 }
3933
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16_subtile)3934 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
3935 TEST_REQUIRES_ARM_NEON;
3936 for (size_t k = 32; k <= 160; k += 16) {
3937 for (uint32_t n = 1; n <= 8; n++) {
3938 for (uint32_t m = 1; m <= 2; m++) {
3939 GemmMicrokernelTester()
3940 .mr(2)
3941 .nr(8)
3942 .kr(2)
3943 .sr(1)
3944 .m(m)
3945 .n(n)
3946 .k(k)
3947 .iterations(1)
3948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3949 }
3950 }
3951 }
3952 }
3953
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8)3954 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8) {
3955 TEST_REQUIRES_ARM_NEON;
3956 for (uint32_t n = 9; n < 16; n++) {
3957 for (size_t k = 1; k <= 80; k += 17) {
3958 GemmMicrokernelTester()
3959 .mr(2)
3960 .nr(8)
3961 .kr(2)
3962 .sr(1)
3963 .m(2)
3964 .n(n)
3965 .k(k)
3966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3967 }
3968 }
3969 }
3970
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)3971 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
3972 TEST_REQUIRES_ARM_NEON;
3973 for (uint32_t n = 9; n < 16; n++) {
3974 for (size_t k = 1; k <= 80; k += 17) {
3975 GemmMicrokernelTester()
3976 .mr(2)
3977 .nr(8)
3978 .kr(2)
3979 .sr(1)
3980 .m(2)
3981 .n(n)
3982 .k(k)
3983 .cn_stride(11)
3984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3985 }
3986 }
3987 }
3988
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)3989 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
3990 TEST_REQUIRES_ARM_NEON;
3991 for (uint32_t n = 9; n < 16; n++) {
3992 for (size_t k = 1; k <= 80; k += 17) {
3993 for (uint32_t m = 1; m <= 2; m++) {
3994 GemmMicrokernelTester()
3995 .mr(2)
3996 .nr(8)
3997 .kr(2)
3998 .sr(1)
3999 .m(m)
4000 .n(n)
4001 .k(k)
4002 .iterations(1)
4003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4004 }
4005 }
4006 }
4007 }
4008
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8)4009 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8) {
4010 TEST_REQUIRES_ARM_NEON;
4011 for (uint32_t n = 16; n <= 24; n += 8) {
4012 for (size_t k = 1; k <= 80; k += 17) {
4013 GemmMicrokernelTester()
4014 .mr(2)
4015 .nr(8)
4016 .kr(2)
4017 .sr(1)
4018 .m(2)
4019 .n(n)
4020 .k(k)
4021 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4022 }
4023 }
4024 }
4025
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)4026 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
4027 TEST_REQUIRES_ARM_NEON;
4028 for (uint32_t n = 16; n <= 24; n += 8) {
4029 for (size_t k = 1; k <= 80; k += 17) {
4030 GemmMicrokernelTester()
4031 .mr(2)
4032 .nr(8)
4033 .kr(2)
4034 .sr(1)
4035 .m(2)
4036 .n(n)
4037 .k(k)
4038 .cn_stride(11)
4039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4040 }
4041 }
4042 }
4043
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_subtile)4044 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
4045 TEST_REQUIRES_ARM_NEON;
4046 for (uint32_t n = 16; n <= 24; n += 8) {
4047 for (size_t k = 1; k <= 80; k += 17) {
4048 for (uint32_t m = 1; m <= 2; m++) {
4049 GemmMicrokernelTester()
4050 .mr(2)
4051 .nr(8)
4052 .kr(2)
4053 .sr(1)
4054 .m(m)
4055 .n(n)
4056 .k(k)
4057 .iterations(1)
4058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4059 }
4060 }
4061 }
4062 }
4063
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,small_kernel)4064 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, small_kernel) {
4065 TEST_REQUIRES_ARM_NEON;
4066 for (size_t k = 1; k <= 80; k += 17) {
4067 GemmMicrokernelTester()
4068 .mr(2)
4069 .nr(8)
4070 .kr(2)
4071 .sr(1)
4072 .m(2)
4073 .n(8)
4074 .k(k)
4075 .ks(3)
4076 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4077 }
4078 }
4079
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,small_kernel_subtile)4080 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
4081 TEST_REQUIRES_ARM_NEON;
4082 for (size_t k = 1; k <= 80; k += 17) {
4083 for (uint32_t n = 1; n <= 8; n++) {
4084 for (uint32_t m = 1; m <= 2; m++) {
4085 GemmMicrokernelTester()
4086 .mr(2)
4087 .nr(8)
4088 .kr(2)
4089 .sr(1)
4090 .m(m)
4091 .n(n)
4092 .k(k)
4093 .ks(3)
4094 .iterations(1)
4095 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4096 }
4097 }
4098 }
4099 }
4100
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)4101 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
4102 TEST_REQUIRES_ARM_NEON;
4103 for (uint32_t n = 9; n < 16; n++) {
4104 for (size_t k = 1; k <= 80; k += 17) {
4105 GemmMicrokernelTester()
4106 .mr(2)
4107 .nr(8)
4108 .kr(2)
4109 .sr(1)
4110 .m(2)
4111 .n(n)
4112 .k(k)
4113 .ks(3)
4114 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4115 }
4116 }
4117 }
4118
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)4119 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
4120 TEST_REQUIRES_ARM_NEON;
4121 for (uint32_t n = 16; n <= 24; n += 8) {
4122 for (size_t k = 1; k <= 80; k += 17) {
4123 GemmMicrokernelTester()
4124 .mr(2)
4125 .nr(8)
4126 .kr(2)
4127 .sr(1)
4128 .m(2)
4129 .n(n)
4130 .k(k)
4131 .ks(3)
4132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4133 }
4134 }
4135 }
4136
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm_subtile)4137 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
4138 TEST_REQUIRES_ARM_NEON;
4139 for (size_t k = 1; k <= 80; k += 17) {
4140 for (uint32_t n = 1; n <= 8; n++) {
4141 for (uint32_t m = 1; m <= 2; m++) {
4142 GemmMicrokernelTester()
4143 .mr(2)
4144 .nr(8)
4145 .kr(2)
4146 .sr(1)
4147 .m(m)
4148 .n(n)
4149 .k(k)
4150 .cm_stride(11)
4151 .iterations(1)
4152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4153 }
4154 }
4155 }
4156 }
4157
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,a_offset)4158 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, a_offset) {
4159 TEST_REQUIRES_ARM_NEON;
4160 for (size_t k = 1; k <= 80; k += 17) {
4161 GemmMicrokernelTester()
4162 .mr(2)
4163 .nr(8)
4164 .kr(2)
4165 .sr(1)
4166 .m(2)
4167 .n(8)
4168 .k(k)
4169 .ks(3)
4170 .a_offset(163)
4171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4172 }
4173 }
4174
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,zero)4175 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, zero) {
4176 TEST_REQUIRES_ARM_NEON;
4177 for (size_t k = 1; k <= 80; k += 17) {
4178 for (uint32_t mz = 0; mz < 2; mz++) {
4179 GemmMicrokernelTester()
4180 .mr(2)
4181 .nr(8)
4182 .kr(2)
4183 .sr(1)
4184 .m(2)
4185 .n(8)
4186 .k(k)
4187 .ks(3)
4188 .a_offset(163)
4189 .zero_index(mz)
4190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4191 }
4192 }
4193 }
4194
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmin)4195 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmin) {
4196 TEST_REQUIRES_ARM_NEON;
4197 GemmMicrokernelTester()
4198 .mr(2)
4199 .nr(8)
4200 .kr(2)
4201 .sr(1)
4202 .m(2)
4203 .n(8)
4204 .k(16)
4205 .qmin(128)
4206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4207 }
4208
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmax)4209 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmax) {
4210 TEST_REQUIRES_ARM_NEON;
4211 GemmMicrokernelTester()
4212 .mr(2)
4213 .nr(8)
4214 .kr(2)
4215 .sr(1)
4216 .m(2)
4217 .n(8)
4218 .k(16)
4219 .qmax(128)
4220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4221 }
4222
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm)4223 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm) {
4224 TEST_REQUIRES_ARM_NEON;
4225 GemmMicrokernelTester()
4226 .mr(2)
4227 .nr(8)
4228 .kr(2)
4229 .sr(1)
4230 .m(2)
4231 .n(8)
4232 .k(16)
4233 .cm_stride(11)
4234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4235 }
4236 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4237
4238
4239 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16)4240 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16) {
4241 TEST_REQUIRES_ARM_NEON_V8;
4242 GemmMicrokernelTester()
4243 .mr(2)
4244 .nr(8)
4245 .kr(2)
4246 .sr(1)
4247 .m(2)
4248 .n(8)
4249 .k(16)
4250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251 }
4252
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cn)4253 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cn) {
4254 TEST_REQUIRES_ARM_NEON_V8;
4255 GemmMicrokernelTester()
4256 .mr(2)
4257 .nr(8)
4258 .kr(2)
4259 .sr(1)
4260 .m(2)
4261 .n(8)
4262 .k(16)
4263 .cn_stride(11)
4264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4265 }
4266
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile)4267 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
4268 TEST_REQUIRES_ARM_NEON_V8;
4269 for (uint32_t n = 1; n <= 8; n++) {
4270 for (uint32_t m = 1; m <= 2; m++) {
4271 GemmMicrokernelTester()
4272 .mr(2)
4273 .nr(8)
4274 .kr(2)
4275 .sr(1)
4276 .m(m)
4277 .n(n)
4278 .k(16)
4279 .iterations(1)
4280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4281 }
4282 }
4283 }
4284
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_m)4285 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
4286 TEST_REQUIRES_ARM_NEON_V8;
4287 for (uint32_t m = 1; m <= 2; m++) {
4288 GemmMicrokernelTester()
4289 .mr(2)
4290 .nr(8)
4291 .kr(2)
4292 .sr(1)
4293 .m(m)
4294 .n(8)
4295 .k(16)
4296 .iterations(1)
4297 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4298 }
4299 }
4300
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_n)4301 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
4302 TEST_REQUIRES_ARM_NEON_V8;
4303 for (uint32_t n = 1; n <= 8; n++) {
4304 GemmMicrokernelTester()
4305 .mr(2)
4306 .nr(8)
4307 .kr(2)
4308 .sr(1)
4309 .m(2)
4310 .n(n)
4311 .k(16)
4312 .iterations(1)
4313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4314 }
4315 }
4316
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16)4317 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16) {
4318 TEST_REQUIRES_ARM_NEON_V8;
4319 for (size_t k = 1; k < 16; k++) {
4320 GemmMicrokernelTester()
4321 .mr(2)
4322 .nr(8)
4323 .kr(2)
4324 .sr(1)
4325 .m(2)
4326 .n(8)
4327 .k(k)
4328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4329 }
4330 }
4331
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16_subtile)4332 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
4333 TEST_REQUIRES_ARM_NEON_V8;
4334 for (size_t k = 1; k < 16; k++) {
4335 for (uint32_t n = 1; n <= 8; n++) {
4336 for (uint32_t m = 1; m <= 2; m++) {
4337 GemmMicrokernelTester()
4338 .mr(2)
4339 .nr(8)
4340 .kr(2)
4341 .sr(1)
4342 .m(m)
4343 .n(n)
4344 .k(k)
4345 .iterations(1)
4346 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4347 }
4348 }
4349 }
4350 }
4351
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16)4352 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16) {
4353 TEST_REQUIRES_ARM_NEON_V8;
4354 for (size_t k = 17; k < 32; k++) {
4355 GemmMicrokernelTester()
4356 .mr(2)
4357 .nr(8)
4358 .kr(2)
4359 .sr(1)
4360 .m(2)
4361 .n(8)
4362 .k(k)
4363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4364 }
4365 }
4366
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16_subtile)4367 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
4368 TEST_REQUIRES_ARM_NEON_V8;
4369 for (size_t k = 17; k < 32; k++) {
4370 for (uint32_t n = 1; n <= 8; n++) {
4371 for (uint32_t m = 1; m <= 2; m++) {
4372 GemmMicrokernelTester()
4373 .mr(2)
4374 .nr(8)
4375 .kr(2)
4376 .sr(1)
4377 .m(m)
4378 .n(n)
4379 .k(k)
4380 .iterations(1)
4381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4382 }
4383 }
4384 }
4385 }
4386
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16)4387 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16) {
4388 TEST_REQUIRES_ARM_NEON_V8;
4389 for (size_t k = 32; k <= 160; k += 16) {
4390 GemmMicrokernelTester()
4391 .mr(2)
4392 .nr(8)
4393 .kr(2)
4394 .sr(1)
4395 .m(2)
4396 .n(8)
4397 .k(k)
4398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4399 }
4400 }
4401
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16_subtile)4402 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
4403 TEST_REQUIRES_ARM_NEON_V8;
4404 for (size_t k = 32; k <= 160; k += 16) {
4405 for (uint32_t n = 1; n <= 8; n++) {
4406 for (uint32_t m = 1; m <= 2; m++) {
4407 GemmMicrokernelTester()
4408 .mr(2)
4409 .nr(8)
4410 .kr(2)
4411 .sr(1)
4412 .m(m)
4413 .n(n)
4414 .k(k)
4415 .iterations(1)
4416 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4417 }
4418 }
4419 }
4420 }
4421
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8)4422 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8) {
4423 TEST_REQUIRES_ARM_NEON_V8;
4424 for (uint32_t n = 9; n < 16; n++) {
4425 for (size_t k = 1; k <= 80; k += 17) {
4426 GemmMicrokernelTester()
4427 .mr(2)
4428 .nr(8)
4429 .kr(2)
4430 .sr(1)
4431 .m(2)
4432 .n(n)
4433 .k(k)
4434 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4435 }
4436 }
4437 }
4438
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_cn)4439 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
4440 TEST_REQUIRES_ARM_NEON_V8;
4441 for (uint32_t n = 9; n < 16; n++) {
4442 for (size_t k = 1; k <= 80; k += 17) {
4443 GemmMicrokernelTester()
4444 .mr(2)
4445 .nr(8)
4446 .kr(2)
4447 .sr(1)
4448 .m(2)
4449 .n(n)
4450 .k(k)
4451 .cn_stride(11)
4452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4453 }
4454 }
4455 }
4456
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_subtile)4457 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
4458 TEST_REQUIRES_ARM_NEON_V8;
4459 for (uint32_t n = 9; n < 16; n++) {
4460 for (size_t k = 1; k <= 80; k += 17) {
4461 for (uint32_t m = 1; m <= 2; m++) {
4462 GemmMicrokernelTester()
4463 .mr(2)
4464 .nr(8)
4465 .kr(2)
4466 .sr(1)
4467 .m(m)
4468 .n(n)
4469 .k(k)
4470 .iterations(1)
4471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4472 }
4473 }
4474 }
4475 }
4476
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8)4477 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8) {
4478 TEST_REQUIRES_ARM_NEON_V8;
4479 for (uint32_t n = 16; n <= 24; n += 8) {
4480 for (size_t k = 1; k <= 80; k += 17) {
4481 GemmMicrokernelTester()
4482 .mr(2)
4483 .nr(8)
4484 .kr(2)
4485 .sr(1)
4486 .m(2)
4487 .n(n)
4488 .k(k)
4489 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4490 }
4491 }
4492 }
4493
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_strided_cn)4494 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
4495 TEST_REQUIRES_ARM_NEON_V8;
4496 for (uint32_t n = 16; n <= 24; n += 8) {
4497 for (size_t k = 1; k <= 80; k += 17) {
4498 GemmMicrokernelTester()
4499 .mr(2)
4500 .nr(8)
4501 .kr(2)
4502 .sr(1)
4503 .m(2)
4504 .n(n)
4505 .k(k)
4506 .cn_stride(11)
4507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4508 }
4509 }
4510 }
4511
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_subtile)4512 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
4513 TEST_REQUIRES_ARM_NEON_V8;
4514 for (uint32_t n = 16; n <= 24; n += 8) {
4515 for (size_t k = 1; k <= 80; k += 17) {
4516 for (uint32_t m = 1; m <= 2; m++) {
4517 GemmMicrokernelTester()
4518 .mr(2)
4519 .nr(8)
4520 .kr(2)
4521 .sr(1)
4522 .m(m)
4523 .n(n)
4524 .k(k)
4525 .iterations(1)
4526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4527 }
4528 }
4529 }
4530 }
4531
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,small_kernel)4532 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, small_kernel) {
4533 TEST_REQUIRES_ARM_NEON_V8;
4534 for (size_t k = 1; k <= 80; k += 17) {
4535 GemmMicrokernelTester()
4536 .mr(2)
4537 .nr(8)
4538 .kr(2)
4539 .sr(1)
4540 .m(2)
4541 .n(8)
4542 .k(k)
4543 .ks(3)
4544 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4545 }
4546 }
4547
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,small_kernel_subtile)4548 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, small_kernel_subtile) {
4549 TEST_REQUIRES_ARM_NEON_V8;
4550 for (size_t k = 1; k <= 80; k += 17) {
4551 for (uint32_t n = 1; n <= 8; n++) {
4552 for (uint32_t m = 1; m <= 2; m++) {
4553 GemmMicrokernelTester()
4554 .mr(2)
4555 .nr(8)
4556 .kr(2)
4557 .sr(1)
4558 .m(m)
4559 .n(n)
4560 .k(k)
4561 .ks(3)
4562 .iterations(1)
4563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4564 }
4565 }
4566 }
4567 }
4568
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_small_kernel)4569 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
4570 TEST_REQUIRES_ARM_NEON_V8;
4571 for (uint32_t n = 9; n < 16; n++) {
4572 for (size_t k = 1; k <= 80; k += 17) {
4573 GemmMicrokernelTester()
4574 .mr(2)
4575 .nr(8)
4576 .kr(2)
4577 .sr(1)
4578 .m(2)
4579 .n(n)
4580 .k(k)
4581 .ks(3)
4582 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583 }
4584 }
4585 }
4586
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_small_kernel)4587 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
4588 TEST_REQUIRES_ARM_NEON_V8;
4589 for (uint32_t n = 16; n <= 24; n += 8) {
4590 for (size_t k = 1; k <= 80; k += 17) {
4591 GemmMicrokernelTester()
4592 .mr(2)
4593 .nr(8)
4594 .kr(2)
4595 .sr(1)
4596 .m(2)
4597 .n(n)
4598 .k(k)
4599 .ks(3)
4600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4601 }
4602 }
4603 }
4604
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm_subtile)4605 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
4606 TEST_REQUIRES_ARM_NEON_V8;
4607 for (size_t k = 1; k <= 80; k += 17) {
4608 for (uint32_t n = 1; n <= 8; n++) {
4609 for (uint32_t m = 1; m <= 2; m++) {
4610 GemmMicrokernelTester()
4611 .mr(2)
4612 .nr(8)
4613 .kr(2)
4614 .sr(1)
4615 .m(m)
4616 .n(n)
4617 .k(k)
4618 .cm_stride(11)
4619 .iterations(1)
4620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4621 }
4622 }
4623 }
4624 }
4625
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,a_offset)4626 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, a_offset) {
4627 TEST_REQUIRES_ARM_NEON_V8;
4628 for (size_t k = 1; k <= 80; k += 17) {
4629 GemmMicrokernelTester()
4630 .mr(2)
4631 .nr(8)
4632 .kr(2)
4633 .sr(1)
4634 .m(2)
4635 .n(8)
4636 .k(k)
4637 .ks(3)
4638 .a_offset(163)
4639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4640 }
4641 }
4642
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,zero)4643 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, zero) {
4644 TEST_REQUIRES_ARM_NEON_V8;
4645 for (size_t k = 1; k <= 80; k += 17) {
4646 for (uint32_t mz = 0; mz < 2; mz++) {
4647 GemmMicrokernelTester()
4648 .mr(2)
4649 .nr(8)
4650 .kr(2)
4651 .sr(1)
4652 .m(2)
4653 .n(8)
4654 .k(k)
4655 .ks(3)
4656 .a_offset(163)
4657 .zero_index(mz)
4658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4659 }
4660 }
4661 }
4662
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmin)4663 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmin) {
4664 TEST_REQUIRES_ARM_NEON_V8;
4665 GemmMicrokernelTester()
4666 .mr(2)
4667 .nr(8)
4668 .kr(2)
4669 .sr(1)
4670 .m(2)
4671 .n(8)
4672 .k(16)
4673 .qmin(128)
4674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4675 }
4676
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmax)4677 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmax) {
4678 TEST_REQUIRES_ARM_NEON_V8;
4679 GemmMicrokernelTester()
4680 .mr(2)
4681 .nr(8)
4682 .kr(2)
4683 .sr(1)
4684 .m(2)
4685 .n(8)
4686 .k(16)
4687 .qmax(128)
4688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4689 }
4690
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm)4691 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm) {
4692 TEST_REQUIRES_ARM_NEON_V8;
4693 GemmMicrokernelTester()
4694 .mr(2)
4695 .nr(8)
4696 .kr(2)
4697 .sr(1)
4698 .m(2)
4699 .n(8)
4700 .k(16)
4701 .cm_stride(11)
4702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4703 }
4704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4705
4706
4707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16)4708 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
4709 TEST_REQUIRES_ARM_NEON_V8;
4710 GemmMicrokernelTester()
4711 .mr(2)
4712 .nr(8)
4713 .kr(2)
4714 .sr(1)
4715 .m(2)
4716 .n(8)
4717 .k(16)
4718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4719 }
4720
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cn)4721 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cn) {
4722 TEST_REQUIRES_ARM_NEON_V8;
4723 GemmMicrokernelTester()
4724 .mr(2)
4725 .nr(8)
4726 .kr(2)
4727 .sr(1)
4728 .m(2)
4729 .n(8)
4730 .k(16)
4731 .cn_stride(11)
4732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4733 }
4734
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile)4735 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
4736 TEST_REQUIRES_ARM_NEON_V8;
4737 for (uint32_t n = 1; n <= 8; n++) {
4738 for (uint32_t m = 1; m <= 2; m++) {
4739 GemmMicrokernelTester()
4740 .mr(2)
4741 .nr(8)
4742 .kr(2)
4743 .sr(1)
4744 .m(m)
4745 .n(n)
4746 .k(16)
4747 .iterations(1)
4748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4749 }
4750 }
4751 }
4752
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)4753 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
4754 TEST_REQUIRES_ARM_NEON_V8;
4755 for (uint32_t m = 1; m <= 2; m++) {
4756 GemmMicrokernelTester()
4757 .mr(2)
4758 .nr(8)
4759 .kr(2)
4760 .sr(1)
4761 .m(m)
4762 .n(8)
4763 .k(16)
4764 .iterations(1)
4765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4766 }
4767 }
4768
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)4769 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
4770 TEST_REQUIRES_ARM_NEON_V8;
4771 for (uint32_t n = 1; n <= 8; n++) {
4772 GemmMicrokernelTester()
4773 .mr(2)
4774 .nr(8)
4775 .kr(2)
4776 .sr(1)
4777 .m(2)
4778 .n(n)
4779 .k(16)
4780 .iterations(1)
4781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4782 }
4783 }
4784
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_lt_16)4785 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
4786 TEST_REQUIRES_ARM_NEON_V8;
4787 for (size_t k = 1; k < 16; k++) {
4788 GemmMicrokernelTester()
4789 .mr(2)
4790 .nr(8)
4791 .kr(2)
4792 .sr(1)
4793 .m(2)
4794 .n(8)
4795 .k(k)
4796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4797 }
4798 }
4799
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_lt_16_subtile)4800 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
4801 TEST_REQUIRES_ARM_NEON_V8;
4802 for (size_t k = 1; k < 16; k++) {
4803 for (uint32_t n = 1; n <= 8; n++) {
4804 for (uint32_t m = 1; m <= 2; m++) {
4805 GemmMicrokernelTester()
4806 .mr(2)
4807 .nr(8)
4808 .kr(2)
4809 .sr(1)
4810 .m(m)
4811 .n(n)
4812 .k(k)
4813 .iterations(1)
4814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4815 }
4816 }
4817 }
4818 }
4819
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_gt_16)4820 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
4821 TEST_REQUIRES_ARM_NEON_V8;
4822 for (size_t k = 17; k < 32; k++) {
4823 GemmMicrokernelTester()
4824 .mr(2)
4825 .nr(8)
4826 .kr(2)
4827 .sr(1)
4828 .m(2)
4829 .n(8)
4830 .k(k)
4831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4832 }
4833 }
4834
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_gt_16_subtile)4835 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
4836 TEST_REQUIRES_ARM_NEON_V8;
4837 for (size_t k = 17; k < 32; k++) {
4838 for (uint32_t n = 1; n <= 8; n++) {
4839 for (uint32_t m = 1; m <= 2; m++) {
4840 GemmMicrokernelTester()
4841 .mr(2)
4842 .nr(8)
4843 .kr(2)
4844 .sr(1)
4845 .m(m)
4846 .n(n)
4847 .k(k)
4848 .iterations(1)
4849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4850 }
4851 }
4852 }
4853 }
4854
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_div_16)4855 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_div_16) {
4856 TEST_REQUIRES_ARM_NEON_V8;
4857 for (size_t k = 32; k <= 160; k += 16) {
4858 GemmMicrokernelTester()
4859 .mr(2)
4860 .nr(8)
4861 .kr(2)
4862 .sr(1)
4863 .m(2)
4864 .n(8)
4865 .k(k)
4866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4867 }
4868 }
4869
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_div_16_subtile)4870 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
4871 TEST_REQUIRES_ARM_NEON_V8;
4872 for (size_t k = 32; k <= 160; k += 16) {
4873 for (uint32_t n = 1; n <= 8; n++) {
4874 for (uint32_t m = 1; m <= 2; m++) {
4875 GemmMicrokernelTester()
4876 .mr(2)
4877 .nr(8)
4878 .kr(2)
4879 .sr(1)
4880 .m(m)
4881 .n(n)
4882 .k(k)
4883 .iterations(1)
4884 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4885 }
4886 }
4887 }
4888 }
4889
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8)4890 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
4891 TEST_REQUIRES_ARM_NEON_V8;
4892 for (uint32_t n = 9; n < 16; n++) {
4893 for (size_t k = 1; k <= 80; k += 17) {
4894 GemmMicrokernelTester()
4895 .mr(2)
4896 .nr(8)
4897 .kr(2)
4898 .sr(1)
4899 .m(2)
4900 .n(n)
4901 .k(k)
4902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4903 }
4904 }
4905 }
4906
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)4907 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
4908 TEST_REQUIRES_ARM_NEON_V8;
4909 for (uint32_t n = 9; n < 16; n++) {
4910 for (size_t k = 1; k <= 80; k += 17) {
4911 GemmMicrokernelTester()
4912 .mr(2)
4913 .nr(8)
4914 .kr(2)
4915 .sr(1)
4916 .m(2)
4917 .n(n)
4918 .k(k)
4919 .cn_stride(11)
4920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4921 }
4922 }
4923 }
4924
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_subtile)4925 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
4926 TEST_REQUIRES_ARM_NEON_V8;
4927 for (uint32_t n = 9; n < 16; n++) {
4928 for (size_t k = 1; k <= 80; k += 17) {
4929 for (uint32_t m = 1; m <= 2; m++) {
4930 GemmMicrokernelTester()
4931 .mr(2)
4932 .nr(8)
4933 .kr(2)
4934 .sr(1)
4935 .m(m)
4936 .n(n)
4937 .k(k)
4938 .iterations(1)
4939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4940 }
4941 }
4942 }
4943 }
4944
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8)4945 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8) {
4946 TEST_REQUIRES_ARM_NEON_V8;
4947 for (uint32_t n = 16; n <= 24; n += 8) {
4948 for (size_t k = 1; k <= 80; k += 17) {
4949 GemmMicrokernelTester()
4950 .mr(2)
4951 .nr(8)
4952 .kr(2)
4953 .sr(1)
4954 .m(2)
4955 .n(n)
4956 .k(k)
4957 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4958 }
4959 }
4960 }
4961
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_cn)4962 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
4963 TEST_REQUIRES_ARM_NEON_V8;
4964 for (uint32_t n = 16; n <= 24; n += 8) {
4965 for (size_t k = 1; k <= 80; k += 17) {
4966 GemmMicrokernelTester()
4967 .mr(2)
4968 .nr(8)
4969 .kr(2)
4970 .sr(1)
4971 .m(2)
4972 .n(n)
4973 .k(k)
4974 .cn_stride(11)
4975 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4976 }
4977 }
4978 }
4979
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_subtile)4980 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
4981 TEST_REQUIRES_ARM_NEON_V8;
4982 for (uint32_t n = 16; n <= 24; n += 8) {
4983 for (size_t k = 1; k <= 80; k += 17) {
4984 for (uint32_t m = 1; m <= 2; m++) {
4985 GemmMicrokernelTester()
4986 .mr(2)
4987 .nr(8)
4988 .kr(2)
4989 .sr(1)
4990 .m(m)
4991 .n(n)
4992 .k(k)
4993 .iterations(1)
4994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4995 }
4996 }
4997 }
4998 }
4999
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,small_kernel)5000 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, small_kernel) {
5001 TEST_REQUIRES_ARM_NEON_V8;
5002 for (size_t k = 1; k <= 80; k += 17) {
5003 GemmMicrokernelTester()
5004 .mr(2)
5005 .nr(8)
5006 .kr(2)
5007 .sr(1)
5008 .m(2)
5009 .n(8)
5010 .k(k)
5011 .ks(3)
5012 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5013 }
5014 }
5015
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,small_kernel_subtile)5016 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, small_kernel_subtile) {
5017 TEST_REQUIRES_ARM_NEON_V8;
5018 for (size_t k = 1; k <= 80; k += 17) {
5019 for (uint32_t n = 1; n <= 8; n++) {
5020 for (uint32_t m = 1; m <= 2; m++) {
5021 GemmMicrokernelTester()
5022 .mr(2)
5023 .nr(8)
5024 .kr(2)
5025 .sr(1)
5026 .m(m)
5027 .n(n)
5028 .k(k)
5029 .ks(3)
5030 .iterations(1)
5031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5032 }
5033 }
5034 }
5035 }
5036
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)5037 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
5038 TEST_REQUIRES_ARM_NEON_V8;
5039 for (uint32_t n = 9; n < 16; n++) {
5040 for (size_t k = 1; k <= 80; k += 17) {
5041 GemmMicrokernelTester()
5042 .mr(2)
5043 .nr(8)
5044 .kr(2)
5045 .sr(1)
5046 .m(2)
5047 .n(n)
5048 .k(k)
5049 .ks(3)
5050 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5051 }
5052 }
5053 }
5054
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_small_kernel)5055 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
5056 TEST_REQUIRES_ARM_NEON_V8;
5057 for (uint32_t n = 16; n <= 24; n += 8) {
5058 for (size_t k = 1; k <= 80; k += 17) {
5059 GemmMicrokernelTester()
5060 .mr(2)
5061 .nr(8)
5062 .kr(2)
5063 .sr(1)
5064 .m(2)
5065 .n(n)
5066 .k(k)
5067 .ks(3)
5068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5069 }
5070 }
5071 }
5072
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cm_subtile)5073 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
5074 TEST_REQUIRES_ARM_NEON_V8;
5075 for (size_t k = 1; k <= 80; k += 17) {
5076 for (uint32_t n = 1; n <= 8; n++) {
5077 for (uint32_t m = 1; m <= 2; m++) {
5078 GemmMicrokernelTester()
5079 .mr(2)
5080 .nr(8)
5081 .kr(2)
5082 .sr(1)
5083 .m(m)
5084 .n(n)
5085 .k(k)
5086 .cm_stride(11)
5087 .iterations(1)
5088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5089 }
5090 }
5091 }
5092 }
5093
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,a_offset)5094 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, a_offset) {
5095 TEST_REQUIRES_ARM_NEON_V8;
5096 for (size_t k = 1; k <= 80; k += 17) {
5097 GemmMicrokernelTester()
5098 .mr(2)
5099 .nr(8)
5100 .kr(2)
5101 .sr(1)
5102 .m(2)
5103 .n(8)
5104 .k(k)
5105 .ks(3)
5106 .a_offset(163)
5107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5108 }
5109 }
5110
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,zero)5111 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, zero) {
5112 TEST_REQUIRES_ARM_NEON_V8;
5113 for (size_t k = 1; k <= 80; k += 17) {
5114 for (uint32_t mz = 0; mz < 2; mz++) {
5115 GemmMicrokernelTester()
5116 .mr(2)
5117 .nr(8)
5118 .kr(2)
5119 .sr(1)
5120 .m(2)
5121 .n(8)
5122 .k(k)
5123 .ks(3)
5124 .a_offset(163)
5125 .zero_index(mz)
5126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5127 }
5128 }
5129 }
5130
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,qmin)5131 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, qmin) {
5132 TEST_REQUIRES_ARM_NEON_V8;
5133 GemmMicrokernelTester()
5134 .mr(2)
5135 .nr(8)
5136 .kr(2)
5137 .sr(1)
5138 .m(2)
5139 .n(8)
5140 .k(16)
5141 .qmin(128)
5142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5143 }
5144
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,qmax)5145 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, qmax) {
5146 TEST_REQUIRES_ARM_NEON_V8;
5147 GemmMicrokernelTester()
5148 .mr(2)
5149 .nr(8)
5150 .kr(2)
5151 .sr(1)
5152 .m(2)
5153 .n(8)
5154 .k(16)
5155 .qmax(128)
5156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5157 }
5158
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cm)5159 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cm) {
5160 TEST_REQUIRES_ARM_NEON_V8;
5161 GemmMicrokernelTester()
5162 .mr(2)
5163 .nr(8)
5164 .kr(2)
5165 .sr(1)
5166 .m(2)
5167 .n(8)
5168 .k(16)
5169 .cm_stride(11)
5170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5171 }
5172 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5173
5174
5175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16)5176 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
5177 TEST_REQUIRES_ARM_NEON_V8;
5178 GemmMicrokernelTester()
5179 .mr(2)
5180 .nr(8)
5181 .kr(2)
5182 .sr(1)
5183 .m(2)
5184 .n(8)
5185 .k(16)
5186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5187 }
5188
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cn)5189 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cn) {
5190 TEST_REQUIRES_ARM_NEON_V8;
5191 GemmMicrokernelTester()
5192 .mr(2)
5193 .nr(8)
5194 .kr(2)
5195 .sr(1)
5196 .m(2)
5197 .n(8)
5198 .k(16)
5199 .cn_stride(11)
5200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5201 }
5202
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile)5203 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
5204 TEST_REQUIRES_ARM_NEON_V8;
5205 for (uint32_t n = 1; n <= 8; n++) {
5206 for (uint32_t m = 1; m <= 2; m++) {
5207 GemmMicrokernelTester()
5208 .mr(2)
5209 .nr(8)
5210 .kr(2)
5211 .sr(1)
5212 .m(m)
5213 .n(n)
5214 .k(16)
5215 .iterations(1)
5216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5217 }
5218 }
5219 }
5220
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_m)5221 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
5222 TEST_REQUIRES_ARM_NEON_V8;
5223 for (uint32_t m = 1; m <= 2; m++) {
5224 GemmMicrokernelTester()
5225 .mr(2)
5226 .nr(8)
5227 .kr(2)
5228 .sr(1)
5229 .m(m)
5230 .n(8)
5231 .k(16)
5232 .iterations(1)
5233 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5234 }
5235 }
5236
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_n)5237 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
5238 TEST_REQUIRES_ARM_NEON_V8;
5239 for (uint32_t n = 1; n <= 8; n++) {
5240 GemmMicrokernelTester()
5241 .mr(2)
5242 .nr(8)
5243 .kr(2)
5244 .sr(1)
5245 .m(2)
5246 .n(n)
5247 .k(16)
5248 .iterations(1)
5249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5250 }
5251 }
5252
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_lt_16)5253 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
5254 TEST_REQUIRES_ARM_NEON_V8;
5255 for (size_t k = 1; k < 16; k++) {
5256 GemmMicrokernelTester()
5257 .mr(2)
5258 .nr(8)
5259 .kr(2)
5260 .sr(1)
5261 .m(2)
5262 .n(8)
5263 .k(k)
5264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5265 }
5266 }
5267
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_lt_16_subtile)5268 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
5269 TEST_REQUIRES_ARM_NEON_V8;
5270 for (size_t k = 1; k < 16; k++) {
5271 for (uint32_t n = 1; n <= 8; n++) {
5272 for (uint32_t m = 1; m <= 2; m++) {
5273 GemmMicrokernelTester()
5274 .mr(2)
5275 .nr(8)
5276 .kr(2)
5277 .sr(1)
5278 .m(m)
5279 .n(n)
5280 .k(k)
5281 .iterations(1)
5282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5283 }
5284 }
5285 }
5286 }
5287
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_gt_16)5288 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
5289 TEST_REQUIRES_ARM_NEON_V8;
5290 for (size_t k = 17; k < 32; k++) {
5291 GemmMicrokernelTester()
5292 .mr(2)
5293 .nr(8)
5294 .kr(2)
5295 .sr(1)
5296 .m(2)
5297 .n(8)
5298 .k(k)
5299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5300 }
5301 }
5302
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_gt_16_subtile)5303 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
5304 TEST_REQUIRES_ARM_NEON_V8;
5305 for (size_t k = 17; k < 32; k++) {
5306 for (uint32_t n = 1; n <= 8; n++) {
5307 for (uint32_t m = 1; m <= 2; m++) {
5308 GemmMicrokernelTester()
5309 .mr(2)
5310 .nr(8)
5311 .kr(2)
5312 .sr(1)
5313 .m(m)
5314 .n(n)
5315 .k(k)
5316 .iterations(1)
5317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5318 }
5319 }
5320 }
5321 }
5322
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_div_16)5323 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_div_16) {
5324 TEST_REQUIRES_ARM_NEON_V8;
5325 for (size_t k = 32; k <= 160; k += 16) {
5326 GemmMicrokernelTester()
5327 .mr(2)
5328 .nr(8)
5329 .kr(2)
5330 .sr(1)
5331 .m(2)
5332 .n(8)
5333 .k(k)
5334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5335 }
5336 }
5337
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_div_16_subtile)5338 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
5339 TEST_REQUIRES_ARM_NEON_V8;
5340 for (size_t k = 32; k <= 160; k += 16) {
5341 for (uint32_t n = 1; n <= 8; n++) {
5342 for (uint32_t m = 1; m <= 2; m++) {
5343 GemmMicrokernelTester()
5344 .mr(2)
5345 .nr(8)
5346 .kr(2)
5347 .sr(1)
5348 .m(m)
5349 .n(n)
5350 .k(k)
5351 .iterations(1)
5352 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5353 }
5354 }
5355 }
5356 }
5357
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8)5358 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
5359 TEST_REQUIRES_ARM_NEON_V8;
5360 for (uint32_t n = 9; n < 16; n++) {
5361 for (size_t k = 1; k <= 80; k += 17) {
5362 GemmMicrokernelTester()
5363 .mr(2)
5364 .nr(8)
5365 .kr(2)
5366 .sr(1)
5367 .m(2)
5368 .n(n)
5369 .k(k)
5370 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5371 }
5372 }
5373 }
5374
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_cn)5375 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
5376 TEST_REQUIRES_ARM_NEON_V8;
5377 for (uint32_t n = 9; n < 16; n++) {
5378 for (size_t k = 1; k <= 80; k += 17) {
5379 GemmMicrokernelTester()
5380 .mr(2)
5381 .nr(8)
5382 .kr(2)
5383 .sr(1)
5384 .m(2)
5385 .n(n)
5386 .k(k)
5387 .cn_stride(11)
5388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5389 }
5390 }
5391 }
5392
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_subtile)5393 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
5394 TEST_REQUIRES_ARM_NEON_V8;
5395 for (uint32_t n = 9; n < 16; n++) {
5396 for (size_t k = 1; k <= 80; k += 17) {
5397 for (uint32_t m = 1; m <= 2; m++) {
5398 GemmMicrokernelTester()
5399 .mr(2)
5400 .nr(8)
5401 .kr(2)
5402 .sr(1)
5403 .m(m)
5404 .n(n)
5405 .k(k)
5406 .iterations(1)
5407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5408 }
5409 }
5410 }
5411 }
5412
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8)5413 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8) {
5414 TEST_REQUIRES_ARM_NEON_V8;
5415 for (uint32_t n = 16; n <= 24; n += 8) {
5416 for (size_t k = 1; k <= 80; k += 17) {
5417 GemmMicrokernelTester()
5418 .mr(2)
5419 .nr(8)
5420 .kr(2)
5421 .sr(1)
5422 .m(2)
5423 .n(n)
5424 .k(k)
5425 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5426 }
5427 }
5428 }
5429
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_cn)5430 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
5431 TEST_REQUIRES_ARM_NEON_V8;
5432 for (uint32_t n = 16; n <= 24; n += 8) {
5433 for (size_t k = 1; k <= 80; k += 17) {
5434 GemmMicrokernelTester()
5435 .mr(2)
5436 .nr(8)
5437 .kr(2)
5438 .sr(1)
5439 .m(2)
5440 .n(n)
5441 .k(k)
5442 .cn_stride(11)
5443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5444 }
5445 }
5446 }
5447
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_subtile)5448 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
5449 TEST_REQUIRES_ARM_NEON_V8;
5450 for (uint32_t n = 16; n <= 24; n += 8) {
5451 for (size_t k = 1; k <= 80; k += 17) {
5452 for (uint32_t m = 1; m <= 2; m++) {
5453 GemmMicrokernelTester()
5454 .mr(2)
5455 .nr(8)
5456 .kr(2)
5457 .sr(1)
5458 .m(m)
5459 .n(n)
5460 .k(k)
5461 .iterations(1)
5462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5463 }
5464 }
5465 }
5466 }
5467
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,small_kernel)5468 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, small_kernel) {
5469 TEST_REQUIRES_ARM_NEON_V8;
5470 for (size_t k = 1; k <= 80; k += 17) {
5471 GemmMicrokernelTester()
5472 .mr(2)
5473 .nr(8)
5474 .kr(2)
5475 .sr(1)
5476 .m(2)
5477 .n(8)
5478 .k(k)
5479 .ks(3)
5480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481 }
5482 }
5483
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,small_kernel_subtile)5484 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, small_kernel_subtile) {
5485 TEST_REQUIRES_ARM_NEON_V8;
5486 for (size_t k = 1; k <= 80; k += 17) {
5487 for (uint32_t n = 1; n <= 8; n++) {
5488 for (uint32_t m = 1; m <= 2; m++) {
5489 GemmMicrokernelTester()
5490 .mr(2)
5491 .nr(8)
5492 .kr(2)
5493 .sr(1)
5494 .m(m)
5495 .n(n)
5496 .k(k)
5497 .ks(3)
5498 .iterations(1)
5499 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5500 }
5501 }
5502 }
5503 }
5504
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_small_kernel)5505 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_small_kernel) {
5506 TEST_REQUIRES_ARM_NEON_V8;
5507 for (uint32_t n = 9; n < 16; n++) {
5508 for (size_t k = 1; k <= 80; k += 17) {
5509 GemmMicrokernelTester()
5510 .mr(2)
5511 .nr(8)
5512 .kr(2)
5513 .sr(1)
5514 .m(2)
5515 .n(n)
5516 .k(k)
5517 .ks(3)
5518 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5519 }
5520 }
5521 }
5522
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_small_kernel)5523 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_small_kernel) {
5524 TEST_REQUIRES_ARM_NEON_V8;
5525 for (uint32_t n = 16; n <= 24; n += 8) {
5526 for (size_t k = 1; k <= 80; k += 17) {
5527 GemmMicrokernelTester()
5528 .mr(2)
5529 .nr(8)
5530 .kr(2)
5531 .sr(1)
5532 .m(2)
5533 .n(n)
5534 .k(k)
5535 .ks(3)
5536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5537 }
5538 }
5539 }
5540
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cm_subtile)5541 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
5542 TEST_REQUIRES_ARM_NEON_V8;
5543 for (size_t k = 1; k <= 80; k += 17) {
5544 for (uint32_t n = 1; n <= 8; n++) {
5545 for (uint32_t m = 1; m <= 2; m++) {
5546 GemmMicrokernelTester()
5547 .mr(2)
5548 .nr(8)
5549 .kr(2)
5550 .sr(1)
5551 .m(m)
5552 .n(n)
5553 .k(k)
5554 .cm_stride(11)
5555 .iterations(1)
5556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5557 }
5558 }
5559 }
5560 }
5561
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,a_offset)5562 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, a_offset) {
5563 TEST_REQUIRES_ARM_NEON_V8;
5564 for (size_t k = 1; k <= 80; k += 17) {
5565 GemmMicrokernelTester()
5566 .mr(2)
5567 .nr(8)
5568 .kr(2)
5569 .sr(1)
5570 .m(2)
5571 .n(8)
5572 .k(k)
5573 .ks(3)
5574 .a_offset(163)
5575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5576 }
5577 }
5578
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,zero)5579 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, zero) {
5580 TEST_REQUIRES_ARM_NEON_V8;
5581 for (size_t k = 1; k <= 80; k += 17) {
5582 for (uint32_t mz = 0; mz < 2; mz++) {
5583 GemmMicrokernelTester()
5584 .mr(2)
5585 .nr(8)
5586 .kr(2)
5587 .sr(1)
5588 .m(2)
5589 .n(8)
5590 .k(k)
5591 .ks(3)
5592 .a_offset(163)
5593 .zero_index(mz)
5594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5595 }
5596 }
5597 }
5598
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,qmin)5599 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, qmin) {
5600 TEST_REQUIRES_ARM_NEON_V8;
5601 GemmMicrokernelTester()
5602 .mr(2)
5603 .nr(8)
5604 .kr(2)
5605 .sr(1)
5606 .m(2)
5607 .n(8)
5608 .k(16)
5609 .qmin(128)
5610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5611 }
5612
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,qmax)5613 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, qmax) {
5614 TEST_REQUIRES_ARM_NEON_V8;
5615 GemmMicrokernelTester()
5616 .mr(2)
5617 .nr(8)
5618 .kr(2)
5619 .sr(1)
5620 .m(2)
5621 .n(8)
5622 .k(16)
5623 .qmax(128)
5624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5625 }
5626
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cm)5627 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cm) {
5628 TEST_REQUIRES_ARM_NEON_V8;
5629 GemmMicrokernelTester()
5630 .mr(2)
5631 .nr(8)
5632 .kr(2)
5633 .sr(1)
5634 .m(2)
5635 .n(8)
5636 .k(16)
5637 .cm_stride(11)
5638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5639 }
5640 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5641
5642
5643 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16)5644 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16) {
5645 TEST_REQUIRES_ARM_NEON;
5646 GemmMicrokernelTester()
5647 .mr(2)
5648 .nr(8)
5649 .kr(4)
5650 .sr(1)
5651 .m(2)
5652 .n(8)
5653 .k(16)
5654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5655 }
5656
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cn)5657 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cn) {
5658 TEST_REQUIRES_ARM_NEON;
5659 GemmMicrokernelTester()
5660 .mr(2)
5661 .nr(8)
5662 .kr(4)
5663 .sr(1)
5664 .m(2)
5665 .n(8)
5666 .k(16)
5667 .cn_stride(11)
5668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5669 }
5670
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)5671 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
5672 TEST_REQUIRES_ARM_NEON;
5673 for (uint32_t n = 1; n <= 8; n++) {
5674 for (uint32_t m = 1; m <= 2; m++) {
5675 GemmMicrokernelTester()
5676 .mr(2)
5677 .nr(8)
5678 .kr(4)
5679 .sr(1)
5680 .m(m)
5681 .n(n)
5682 .k(16)
5683 .iterations(1)
5684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5685 }
5686 }
5687 }
5688
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)5689 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
5690 TEST_REQUIRES_ARM_NEON;
5691 for (uint32_t m = 1; m <= 2; m++) {
5692 GemmMicrokernelTester()
5693 .mr(2)
5694 .nr(8)
5695 .kr(4)
5696 .sr(1)
5697 .m(m)
5698 .n(8)
5699 .k(16)
5700 .iterations(1)
5701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5702 }
5703 }
5704
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)5705 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
5706 TEST_REQUIRES_ARM_NEON;
5707 for (uint32_t n = 1; n <= 8; n++) {
5708 GemmMicrokernelTester()
5709 .mr(2)
5710 .nr(8)
5711 .kr(4)
5712 .sr(1)
5713 .m(2)
5714 .n(n)
5715 .k(16)
5716 .iterations(1)
5717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5718 }
5719 }
5720
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_lt_16)5721 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_lt_16) {
5722 TEST_REQUIRES_ARM_NEON;
5723 for (size_t k = 1; k < 16; k++) {
5724 GemmMicrokernelTester()
5725 .mr(2)
5726 .nr(8)
5727 .kr(4)
5728 .sr(1)
5729 .m(2)
5730 .n(8)
5731 .k(k)
5732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5733 }
5734 }
5735
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)5736 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
5737 TEST_REQUIRES_ARM_NEON;
5738 for (size_t k = 1; k < 16; k++) {
5739 for (uint32_t n = 1; n <= 8; n++) {
5740 for (uint32_t m = 1; m <= 2; m++) {
5741 GemmMicrokernelTester()
5742 .mr(2)
5743 .nr(8)
5744 .kr(4)
5745 .sr(1)
5746 .m(m)
5747 .n(n)
5748 .k(k)
5749 .iterations(1)
5750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5751 }
5752 }
5753 }
5754 }
5755
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_gt_16)5756 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_gt_16) {
5757 TEST_REQUIRES_ARM_NEON;
5758 for (size_t k = 17; k < 32; k++) {
5759 GemmMicrokernelTester()
5760 .mr(2)
5761 .nr(8)
5762 .kr(4)
5763 .sr(1)
5764 .m(2)
5765 .n(8)
5766 .k(k)
5767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5768 }
5769 }
5770
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)5771 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
5772 TEST_REQUIRES_ARM_NEON;
5773 for (size_t k = 17; k < 32; k++) {
5774 for (uint32_t n = 1; n <= 8; n++) {
5775 for (uint32_t m = 1; m <= 2; m++) {
5776 GemmMicrokernelTester()
5777 .mr(2)
5778 .nr(8)
5779 .kr(4)
5780 .sr(1)
5781 .m(m)
5782 .n(n)
5783 .k(k)
5784 .iterations(1)
5785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5786 }
5787 }
5788 }
5789 }
5790
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_div_16)5791 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_div_16) {
5792 TEST_REQUIRES_ARM_NEON;
5793 for (size_t k = 32; k <= 160; k += 16) {
5794 GemmMicrokernelTester()
5795 .mr(2)
5796 .nr(8)
5797 .kr(4)
5798 .sr(1)
5799 .m(2)
5800 .n(8)
5801 .k(k)
5802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5803 }
5804 }
5805
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_div_16_subtile)5806 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
5807 TEST_REQUIRES_ARM_NEON;
5808 for (size_t k = 32; k <= 160; k += 16) {
5809 for (uint32_t n = 1; n <= 8; n++) {
5810 for (uint32_t m = 1; m <= 2; m++) {
5811 GemmMicrokernelTester()
5812 .mr(2)
5813 .nr(8)
5814 .kr(4)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
5820 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5821 }
5822 }
5823 }
5824 }
5825
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8)5826 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8) {
5827 TEST_REQUIRES_ARM_NEON;
5828 for (uint32_t n = 9; n < 16; n++) {
5829 for (size_t k = 1; k <= 80; k += 17) {
5830 GemmMicrokernelTester()
5831 .mr(2)
5832 .nr(8)
5833 .kr(4)
5834 .sr(1)
5835 .m(2)
5836 .n(n)
5837 .k(k)
5838 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5839 }
5840 }
5841 }
5842
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)5843 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
5844 TEST_REQUIRES_ARM_NEON;
5845 for (uint32_t n = 9; n < 16; n++) {
5846 for (size_t k = 1; k <= 80; k += 17) {
5847 GemmMicrokernelTester()
5848 .mr(2)
5849 .nr(8)
5850 .kr(4)
5851 .sr(1)
5852 .m(2)
5853 .n(n)
5854 .k(k)
5855 .cn_stride(11)
5856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5857 }
5858 }
5859 }
5860
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)5861 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
5862 TEST_REQUIRES_ARM_NEON;
5863 for (uint32_t n = 9; n < 16; n++) {
5864 for (size_t k = 1; k <= 80; k += 17) {
5865 for (uint32_t m = 1; m <= 2; m++) {
5866 GemmMicrokernelTester()
5867 .mr(2)
5868 .nr(8)
5869 .kr(4)
5870 .sr(1)
5871 .m(m)
5872 .n(n)
5873 .k(k)
5874 .iterations(1)
5875 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5876 }
5877 }
5878 }
5879 }
5880
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8)5881 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8) {
5882 TEST_REQUIRES_ARM_NEON;
5883 for (uint32_t n = 16; n <= 24; n += 8) {
5884 for (size_t k = 1; k <= 80; k += 17) {
5885 GemmMicrokernelTester()
5886 .mr(2)
5887 .nr(8)
5888 .kr(4)
5889 .sr(1)
5890 .m(2)
5891 .n(n)
5892 .k(k)
5893 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5894 }
5895 }
5896 }
5897
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)5898 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
5899 TEST_REQUIRES_ARM_NEON;
5900 for (uint32_t n = 16; n <= 24; n += 8) {
5901 for (size_t k = 1; k <= 80; k += 17) {
5902 GemmMicrokernelTester()
5903 .mr(2)
5904 .nr(8)
5905 .kr(4)
5906 .sr(1)
5907 .m(2)
5908 .n(n)
5909 .k(k)
5910 .cn_stride(11)
5911 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5912 }
5913 }
5914 }
5915
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_subtile)5916 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
5917 TEST_REQUIRES_ARM_NEON;
5918 for (uint32_t n = 16; n <= 24; n += 8) {
5919 for (size_t k = 1; k <= 80; k += 17) {
5920 for (uint32_t m = 1; m <= 2; m++) {
5921 GemmMicrokernelTester()
5922 .mr(2)
5923 .nr(8)
5924 .kr(4)
5925 .sr(1)
5926 .m(m)
5927 .n(n)
5928 .k(k)
5929 .iterations(1)
5930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5931 }
5932 }
5933 }
5934 }
5935
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,small_kernel)5936 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, small_kernel) {
5937 TEST_REQUIRES_ARM_NEON;
5938 for (size_t k = 1; k <= 80; k += 17) {
5939 GemmMicrokernelTester()
5940 .mr(2)
5941 .nr(8)
5942 .kr(4)
5943 .sr(1)
5944 .m(2)
5945 .n(8)
5946 .k(k)
5947 .ks(3)
5948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5949 }
5950 }
5951
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,small_kernel_subtile)5952 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
5953 TEST_REQUIRES_ARM_NEON;
5954 for (size_t k = 1; k <= 80; k += 17) {
5955 for (uint32_t n = 1; n <= 8; n++) {
5956 for (uint32_t m = 1; m <= 2; m++) {
5957 GemmMicrokernelTester()
5958 .mr(2)
5959 .nr(8)
5960 .kr(4)
5961 .sr(1)
5962 .m(m)
5963 .n(n)
5964 .k(k)
5965 .ks(3)
5966 .iterations(1)
5967 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5968 }
5969 }
5970 }
5971 }
5972
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_small_kernel)5973 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
5974 TEST_REQUIRES_ARM_NEON;
5975 for (uint32_t n = 9; n < 16; n++) {
5976 for (size_t k = 1; k <= 80; k += 17) {
5977 GemmMicrokernelTester()
5978 .mr(2)
5979 .nr(8)
5980 .kr(4)
5981 .sr(1)
5982 .m(2)
5983 .n(n)
5984 .k(k)
5985 .ks(3)
5986 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5987 }
5988 }
5989 }
5990
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_small_kernel)5991 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
5992 TEST_REQUIRES_ARM_NEON;
5993 for (uint32_t n = 16; n <= 24; n += 8) {
5994 for (size_t k = 1; k <= 80; k += 17) {
5995 GemmMicrokernelTester()
5996 .mr(2)
5997 .nr(8)
5998 .kr(4)
5999 .sr(1)
6000 .m(2)
6001 .n(n)
6002 .k(k)
6003 .ks(3)
6004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6005 }
6006 }
6007 }
6008
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cm_subtile)6009 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
6010 TEST_REQUIRES_ARM_NEON;
6011 for (size_t k = 1; k <= 80; k += 17) {
6012 for (uint32_t n = 1; n <= 8; n++) {
6013 for (uint32_t m = 1; m <= 2; m++) {
6014 GemmMicrokernelTester()
6015 .mr(2)
6016 .nr(8)
6017 .kr(4)
6018 .sr(1)
6019 .m(m)
6020 .n(n)
6021 .k(k)
6022 .cm_stride(11)
6023 .iterations(1)
6024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6025 }
6026 }
6027 }
6028 }
6029
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,a_offset)6030 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, a_offset) {
6031 TEST_REQUIRES_ARM_NEON;
6032 for (size_t k = 1; k <= 80; k += 17) {
6033 GemmMicrokernelTester()
6034 .mr(2)
6035 .nr(8)
6036 .kr(4)
6037 .sr(1)
6038 .m(2)
6039 .n(8)
6040 .k(k)
6041 .ks(3)
6042 .a_offset(163)
6043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6044 }
6045 }
6046
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,zero)6047 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, zero) {
6048 TEST_REQUIRES_ARM_NEON;
6049 for (size_t k = 1; k <= 80; k += 17) {
6050 for (uint32_t mz = 0; mz < 2; mz++) {
6051 GemmMicrokernelTester()
6052 .mr(2)
6053 .nr(8)
6054 .kr(4)
6055 .sr(1)
6056 .m(2)
6057 .n(8)
6058 .k(k)
6059 .ks(3)
6060 .a_offset(163)
6061 .zero_index(mz)
6062 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6063 }
6064 }
6065 }
6066
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,qmin)6067 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, qmin) {
6068 TEST_REQUIRES_ARM_NEON;
6069 GemmMicrokernelTester()
6070 .mr(2)
6071 .nr(8)
6072 .kr(4)
6073 .sr(1)
6074 .m(2)
6075 .n(8)
6076 .k(16)
6077 .qmin(128)
6078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6079 }
6080
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,qmax)6081 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, qmax) {
6082 TEST_REQUIRES_ARM_NEON;
6083 GemmMicrokernelTester()
6084 .mr(2)
6085 .nr(8)
6086 .kr(4)
6087 .sr(1)
6088 .m(2)
6089 .n(8)
6090 .k(16)
6091 .qmax(128)
6092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6093 }
6094
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cm)6095 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cm) {
6096 TEST_REQUIRES_ARM_NEON;
6097 GemmMicrokernelTester()
6098 .mr(2)
6099 .nr(8)
6100 .kr(4)
6101 .sr(1)
6102 .m(2)
6103 .n(8)
6104 .k(16)
6105 .cm_stride(11)
6106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6107 }
6108 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6109
6110
6111 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16)6112 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
6113 TEST_REQUIRES_ARM_NEON_V8;
6114 GemmMicrokernelTester()
6115 .mr(2)
6116 .nr(8)
6117 .kr(4)
6118 .sr(1)
6119 .m(2)
6120 .n(8)
6121 .k(16)
6122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6123 }
6124
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cn)6125 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cn) {
6126 TEST_REQUIRES_ARM_NEON_V8;
6127 GemmMicrokernelTester()
6128 .mr(2)
6129 .nr(8)
6130 .kr(4)
6131 .sr(1)
6132 .m(2)
6133 .n(8)
6134 .k(16)
6135 .cn_stride(11)
6136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6137 }
6138
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile)6139 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
6140 TEST_REQUIRES_ARM_NEON_V8;
6141 for (uint32_t n = 1; n <= 8; n++) {
6142 for (uint32_t m = 1; m <= 2; m++) {
6143 GemmMicrokernelTester()
6144 .mr(2)
6145 .nr(8)
6146 .kr(4)
6147 .sr(1)
6148 .m(m)
6149 .n(n)
6150 .k(16)
6151 .iterations(1)
6152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6153 }
6154 }
6155 }
6156
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)6157 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
6158 TEST_REQUIRES_ARM_NEON_V8;
6159 for (uint32_t m = 1; m <= 2; m++) {
6160 GemmMicrokernelTester()
6161 .mr(2)
6162 .nr(8)
6163 .kr(4)
6164 .sr(1)
6165 .m(m)
6166 .n(8)
6167 .k(16)
6168 .iterations(1)
6169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6170 }
6171 }
6172
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)6173 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
6174 TEST_REQUIRES_ARM_NEON_V8;
6175 for (uint32_t n = 1; n <= 8; n++) {
6176 GemmMicrokernelTester()
6177 .mr(2)
6178 .nr(8)
6179 .kr(4)
6180 .sr(1)
6181 .m(2)
6182 .n(n)
6183 .k(16)
6184 .iterations(1)
6185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6186 }
6187 }
6188
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_lt_16)6189 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
6190 TEST_REQUIRES_ARM_NEON_V8;
6191 for (size_t k = 1; k < 16; k++) {
6192 GemmMicrokernelTester()
6193 .mr(2)
6194 .nr(8)
6195 .kr(4)
6196 .sr(1)
6197 .m(2)
6198 .n(8)
6199 .k(k)
6200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6201 }
6202 }
6203
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_lt_16_subtile)6204 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
6205 TEST_REQUIRES_ARM_NEON_V8;
6206 for (size_t k = 1; k < 16; k++) {
6207 for (uint32_t n = 1; n <= 8; n++) {
6208 for (uint32_t m = 1; m <= 2; m++) {
6209 GemmMicrokernelTester()
6210 .mr(2)
6211 .nr(8)
6212 .kr(4)
6213 .sr(1)
6214 .m(m)
6215 .n(n)
6216 .k(k)
6217 .iterations(1)
6218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6219 }
6220 }
6221 }
6222 }
6223
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_gt_16)6224 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
6225 TEST_REQUIRES_ARM_NEON_V8;
6226 for (size_t k = 17; k < 32; k++) {
6227 GemmMicrokernelTester()
6228 .mr(2)
6229 .nr(8)
6230 .kr(4)
6231 .sr(1)
6232 .m(2)
6233 .n(8)
6234 .k(k)
6235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6236 }
6237 }
6238
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_gt_16_subtile)6239 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
6240 TEST_REQUIRES_ARM_NEON_V8;
6241 for (size_t k = 17; k < 32; k++) {
6242 for (uint32_t n = 1; n <= 8; n++) {
6243 for (uint32_t m = 1; m <= 2; m++) {
6244 GemmMicrokernelTester()
6245 .mr(2)
6246 .nr(8)
6247 .kr(4)
6248 .sr(1)
6249 .m(m)
6250 .n(n)
6251 .k(k)
6252 .iterations(1)
6253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6254 }
6255 }
6256 }
6257 }
6258
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_div_16)6259 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_div_16) {
6260 TEST_REQUIRES_ARM_NEON_V8;
6261 for (size_t k = 32; k <= 160; k += 16) {
6262 GemmMicrokernelTester()
6263 .mr(2)
6264 .nr(8)
6265 .kr(4)
6266 .sr(1)
6267 .m(2)
6268 .n(8)
6269 .k(k)
6270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6271 }
6272 }
6273
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_div_16_subtile)6274 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
6275 TEST_REQUIRES_ARM_NEON_V8;
6276 for (size_t k = 32; k <= 160; k += 16) {
6277 for (uint32_t n = 1; n <= 8; n++) {
6278 for (uint32_t m = 1; m <= 2; m++) {
6279 GemmMicrokernelTester()
6280 .mr(2)
6281 .nr(8)
6282 .kr(4)
6283 .sr(1)
6284 .m(m)
6285 .n(n)
6286 .k(k)
6287 .iterations(1)
6288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6289 }
6290 }
6291 }
6292 }
6293
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8)6294 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
6295 TEST_REQUIRES_ARM_NEON_V8;
6296 for (uint32_t n = 9; n < 16; n++) {
6297 for (size_t k = 1; k <= 80; k += 17) {
6298 GemmMicrokernelTester()
6299 .mr(2)
6300 .nr(8)
6301 .kr(4)
6302 .sr(1)
6303 .m(2)
6304 .n(n)
6305 .k(k)
6306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6307 }
6308 }
6309 }
6310
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)6311 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
6312 TEST_REQUIRES_ARM_NEON_V8;
6313 for (uint32_t n = 9; n < 16; n++) {
6314 for (size_t k = 1; k <= 80; k += 17) {
6315 GemmMicrokernelTester()
6316 .mr(2)
6317 .nr(8)
6318 .kr(4)
6319 .sr(1)
6320 .m(2)
6321 .n(n)
6322 .k(k)
6323 .cn_stride(11)
6324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6325 }
6326 }
6327 }
6328
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_subtile)6329 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
6330 TEST_REQUIRES_ARM_NEON_V8;
6331 for (uint32_t n = 9; n < 16; n++) {
6332 for (size_t k = 1; k <= 80; k += 17) {
6333 for (uint32_t m = 1; m <= 2; m++) {
6334 GemmMicrokernelTester()
6335 .mr(2)
6336 .nr(8)
6337 .kr(4)
6338 .sr(1)
6339 .m(m)
6340 .n(n)
6341 .k(k)
6342 .iterations(1)
6343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6344 }
6345 }
6346 }
6347 }
6348
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8)6349 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8) {
6350 TEST_REQUIRES_ARM_NEON_V8;
6351 for (uint32_t n = 16; n <= 24; n += 8) {
6352 for (size_t k = 1; k <= 80; k += 17) {
6353 GemmMicrokernelTester()
6354 .mr(2)
6355 .nr(8)
6356 .kr(4)
6357 .sr(1)
6358 .m(2)
6359 .n(n)
6360 .k(k)
6361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6362 }
6363 }
6364 }
6365
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_cn)6366 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
6367 TEST_REQUIRES_ARM_NEON_V8;
6368 for (uint32_t n = 16; n <= 24; n += 8) {
6369 for (size_t k = 1; k <= 80; k += 17) {
6370 GemmMicrokernelTester()
6371 .mr(2)
6372 .nr(8)
6373 .kr(4)
6374 .sr(1)
6375 .m(2)
6376 .n(n)
6377 .k(k)
6378 .cn_stride(11)
6379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6380 }
6381 }
6382 }
6383
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_subtile)6384 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
6385 TEST_REQUIRES_ARM_NEON_V8;
6386 for (uint32_t n = 16; n <= 24; n += 8) {
6387 for (size_t k = 1; k <= 80; k += 17) {
6388 for (uint32_t m = 1; m <= 2; m++) {
6389 GemmMicrokernelTester()
6390 .mr(2)
6391 .nr(8)
6392 .kr(4)
6393 .sr(1)
6394 .m(m)
6395 .n(n)
6396 .k(k)
6397 .iterations(1)
6398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6399 }
6400 }
6401 }
6402 }
6403
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,small_kernel)6404 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, small_kernel) {
6405 TEST_REQUIRES_ARM_NEON_V8;
6406 for (size_t k = 1; k <= 80; k += 17) {
6407 GemmMicrokernelTester()
6408 .mr(2)
6409 .nr(8)
6410 .kr(4)
6411 .sr(1)
6412 .m(2)
6413 .n(8)
6414 .k(k)
6415 .ks(3)
6416 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6417 }
6418 }
6419
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,small_kernel_subtile)6420 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, small_kernel_subtile) {
6421 TEST_REQUIRES_ARM_NEON_V8;
6422 for (size_t k = 1; k <= 80; k += 17) {
6423 for (uint32_t n = 1; n <= 8; n++) {
6424 for (uint32_t m = 1; m <= 2; m++) {
6425 GemmMicrokernelTester()
6426 .mr(2)
6427 .nr(8)
6428 .kr(4)
6429 .sr(1)
6430 .m(m)
6431 .n(n)
6432 .k(k)
6433 .ks(3)
6434 .iterations(1)
6435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6436 }
6437 }
6438 }
6439 }
6440
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)6441 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
6442 TEST_REQUIRES_ARM_NEON_V8;
6443 for (uint32_t n = 9; n < 16; n++) {
6444 for (size_t k = 1; k <= 80; k += 17) {
6445 GemmMicrokernelTester()
6446 .mr(2)
6447 .nr(8)
6448 .kr(4)
6449 .sr(1)
6450 .m(2)
6451 .n(n)
6452 .k(k)
6453 .ks(3)
6454 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6455 }
6456 }
6457 }
6458
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_small_kernel)6459 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
6460 TEST_REQUIRES_ARM_NEON_V8;
6461 for (uint32_t n = 16; n <= 24; n += 8) {
6462 for (size_t k = 1; k <= 80; k += 17) {
6463 GemmMicrokernelTester()
6464 .mr(2)
6465 .nr(8)
6466 .kr(4)
6467 .sr(1)
6468 .m(2)
6469 .n(n)
6470 .k(k)
6471 .ks(3)
6472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6473 }
6474 }
6475 }
6476
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cm_subtile)6477 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
6478 TEST_REQUIRES_ARM_NEON_V8;
6479 for (size_t k = 1; k <= 80; k += 17) {
6480 for (uint32_t n = 1; n <= 8; n++) {
6481 for (uint32_t m = 1; m <= 2; m++) {
6482 GemmMicrokernelTester()
6483 .mr(2)
6484 .nr(8)
6485 .kr(4)
6486 .sr(1)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .cm_stride(11)
6491 .iterations(1)
6492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6493 }
6494 }
6495 }
6496 }
6497
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,a_offset)6498 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, a_offset) {
6499 TEST_REQUIRES_ARM_NEON_V8;
6500 for (size_t k = 1; k <= 80; k += 17) {
6501 GemmMicrokernelTester()
6502 .mr(2)
6503 .nr(8)
6504 .kr(4)
6505 .sr(1)
6506 .m(2)
6507 .n(8)
6508 .k(k)
6509 .ks(3)
6510 .a_offset(163)
6511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6512 }
6513 }
6514
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,zero)6515 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, zero) {
6516 TEST_REQUIRES_ARM_NEON_V8;
6517 for (size_t k = 1; k <= 80; k += 17) {
6518 for (uint32_t mz = 0; mz < 2; mz++) {
6519 GemmMicrokernelTester()
6520 .mr(2)
6521 .nr(8)
6522 .kr(4)
6523 .sr(1)
6524 .m(2)
6525 .n(8)
6526 .k(k)
6527 .ks(3)
6528 .a_offset(163)
6529 .zero_index(mz)
6530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6531 }
6532 }
6533 }
6534
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,qmin)6535 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, qmin) {
6536 TEST_REQUIRES_ARM_NEON_V8;
6537 GemmMicrokernelTester()
6538 .mr(2)
6539 .nr(8)
6540 .kr(4)
6541 .sr(1)
6542 .m(2)
6543 .n(8)
6544 .k(16)
6545 .qmin(128)
6546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6547 }
6548
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,qmax)6549 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, qmax) {
6550 TEST_REQUIRES_ARM_NEON_V8;
6551 GemmMicrokernelTester()
6552 .mr(2)
6553 .nr(8)
6554 .kr(4)
6555 .sr(1)
6556 .m(2)
6557 .n(8)
6558 .k(16)
6559 .qmax(128)
6560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6561 }
6562
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cm)6563 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cm) {
6564 TEST_REQUIRES_ARM_NEON_V8;
6565 GemmMicrokernelTester()
6566 .mr(2)
6567 .nr(8)
6568 .kr(4)
6569 .sr(1)
6570 .m(2)
6571 .n(8)
6572 .k(16)
6573 .cm_stride(11)
6574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6575 }
6576 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6577
6578
6579 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16)6580 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16) {
6581 TEST_REQUIRES_ARM_NEON;
6582 GemmMicrokernelTester()
6583 .mr(1)
6584 .nr(8)
6585 .kr(8)
6586 .sr(1)
6587 .m(1)
6588 .n(8)
6589 .k(16)
6590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6591 }
6592
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cn)6593 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cn) {
6594 TEST_REQUIRES_ARM_NEON;
6595 GemmMicrokernelTester()
6596 .mr(1)
6597 .nr(8)
6598 .kr(8)
6599 .sr(1)
6600 .m(1)
6601 .n(8)
6602 .k(16)
6603 .cn_stride(11)
6604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6605 }
6606
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)6607 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
6608 TEST_REQUIRES_ARM_NEON;
6609 for (uint32_t n = 1; n <= 8; n++) {
6610 for (uint32_t m = 1; m <= 1; m++) {
6611 GemmMicrokernelTester()
6612 .mr(1)
6613 .nr(8)
6614 .kr(8)
6615 .sr(1)
6616 .m(m)
6617 .n(n)
6618 .k(16)
6619 .iterations(1)
6620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621 }
6622 }
6623 }
6624
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)6625 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
6626 TEST_REQUIRES_ARM_NEON;
6627 for (uint32_t m = 1; m <= 1; m++) {
6628 GemmMicrokernelTester()
6629 .mr(1)
6630 .nr(8)
6631 .kr(8)
6632 .sr(1)
6633 .m(m)
6634 .n(8)
6635 .k(16)
6636 .iterations(1)
6637 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6638 }
6639 }
6640
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)6641 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
6642 TEST_REQUIRES_ARM_NEON;
6643 for (uint32_t n = 1; n <= 8; n++) {
6644 GemmMicrokernelTester()
6645 .mr(1)
6646 .nr(8)
6647 .kr(8)
6648 .sr(1)
6649 .m(1)
6650 .n(n)
6651 .k(16)
6652 .iterations(1)
6653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6654 }
6655 }
6656
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_lt_16)6657 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_lt_16) {
6658 TEST_REQUIRES_ARM_NEON;
6659 for (size_t k = 1; k < 16; k++) {
6660 GemmMicrokernelTester()
6661 .mr(1)
6662 .nr(8)
6663 .kr(8)
6664 .sr(1)
6665 .m(1)
6666 .n(8)
6667 .k(k)
6668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6669 }
6670 }
6671
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)6672 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
6673 TEST_REQUIRES_ARM_NEON;
6674 for (size_t k = 1; k < 16; k++) {
6675 for (uint32_t n = 1; n <= 8; n++) {
6676 for (uint32_t m = 1; m <= 1; m++) {
6677 GemmMicrokernelTester()
6678 .mr(1)
6679 .nr(8)
6680 .kr(8)
6681 .sr(1)
6682 .m(m)
6683 .n(n)
6684 .k(k)
6685 .iterations(1)
6686 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689 }
6690 }
6691
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_gt_16)6692 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_gt_16) {
6693 TEST_REQUIRES_ARM_NEON;
6694 for (size_t k = 17; k < 32; k++) {
6695 GemmMicrokernelTester()
6696 .mr(1)
6697 .nr(8)
6698 .kr(8)
6699 .sr(1)
6700 .m(1)
6701 .n(8)
6702 .k(k)
6703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6704 }
6705 }
6706
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)6707 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
6708 TEST_REQUIRES_ARM_NEON;
6709 for (size_t k = 17; k < 32; k++) {
6710 for (uint32_t n = 1; n <= 8; n++) {
6711 for (uint32_t m = 1; m <= 1; m++) {
6712 GemmMicrokernelTester()
6713 .mr(1)
6714 .nr(8)
6715 .kr(8)
6716 .sr(1)
6717 .m(m)
6718 .n(n)
6719 .k(k)
6720 .iterations(1)
6721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6722 }
6723 }
6724 }
6725 }
6726
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_div_16)6727 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_div_16) {
6728 TEST_REQUIRES_ARM_NEON;
6729 for (size_t k = 32; k <= 160; k += 16) {
6730 GemmMicrokernelTester()
6731 .mr(1)
6732 .nr(8)
6733 .kr(8)
6734 .sr(1)
6735 .m(1)
6736 .n(8)
6737 .k(k)
6738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6739 }
6740 }
6741
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)6742 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
6743 TEST_REQUIRES_ARM_NEON;
6744 for (size_t k = 32; k <= 160; k += 16) {
6745 for (uint32_t n = 1; n <= 8; n++) {
6746 for (uint32_t m = 1; m <= 1; m++) {
6747 GemmMicrokernelTester()
6748 .mr(1)
6749 .nr(8)
6750 .kr(8)
6751 .sr(1)
6752 .m(m)
6753 .n(n)
6754 .k(k)
6755 .iterations(1)
6756 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6757 }
6758 }
6759 }
6760 }
6761
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8)6762 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8) {
6763 TEST_REQUIRES_ARM_NEON;
6764 for (uint32_t n = 9; n < 16; n++) {
6765 for (size_t k = 1; k <= 80; k += 17) {
6766 GemmMicrokernelTester()
6767 .mr(1)
6768 .nr(8)
6769 .kr(8)
6770 .sr(1)
6771 .m(1)
6772 .n(n)
6773 .k(k)
6774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6775 }
6776 }
6777 }
6778
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)6779 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
6780 TEST_REQUIRES_ARM_NEON;
6781 for (uint32_t n = 9; n < 16; n++) {
6782 for (size_t k = 1; k <= 80; k += 17) {
6783 GemmMicrokernelTester()
6784 .mr(1)
6785 .nr(8)
6786 .kr(8)
6787 .sr(1)
6788 .m(1)
6789 .n(n)
6790 .k(k)
6791 .cn_stride(11)
6792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6793 }
6794 }
6795 }
6796
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)6797 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
6798 TEST_REQUIRES_ARM_NEON;
6799 for (uint32_t n = 9; n < 16; n++) {
6800 for (size_t k = 1; k <= 80; k += 17) {
6801 for (uint32_t m = 1; m <= 1; m++) {
6802 GemmMicrokernelTester()
6803 .mr(1)
6804 .nr(8)
6805 .kr(8)
6806 .sr(1)
6807 .m(m)
6808 .n(n)
6809 .k(k)
6810 .iterations(1)
6811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6812 }
6813 }
6814 }
6815 }
6816
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8)6817 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8) {
6818 TEST_REQUIRES_ARM_NEON;
6819 for (uint32_t n = 16; n <= 24; n += 8) {
6820 for (size_t k = 1; k <= 80; k += 17) {
6821 GemmMicrokernelTester()
6822 .mr(1)
6823 .nr(8)
6824 .kr(8)
6825 .sr(1)
6826 .m(1)
6827 .n(n)
6828 .k(k)
6829 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6830 }
6831 }
6832 }
6833
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)6834 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
6835 TEST_REQUIRES_ARM_NEON;
6836 for (uint32_t n = 16; n <= 24; n += 8) {
6837 for (size_t k = 1; k <= 80; k += 17) {
6838 GemmMicrokernelTester()
6839 .mr(1)
6840 .nr(8)
6841 .kr(8)
6842 .sr(1)
6843 .m(1)
6844 .n(n)
6845 .k(k)
6846 .cn_stride(11)
6847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6848 }
6849 }
6850 }
6851
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)6852 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
6853 TEST_REQUIRES_ARM_NEON;
6854 for (uint32_t n = 16; n <= 24; n += 8) {
6855 for (size_t k = 1; k <= 80; k += 17) {
6856 for (uint32_t m = 1; m <= 1; m++) {
6857 GemmMicrokernelTester()
6858 .mr(1)
6859 .nr(8)
6860 .kr(8)
6861 .sr(1)
6862 .m(m)
6863 .n(n)
6864 .k(k)
6865 .iterations(1)
6866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6867 }
6868 }
6869 }
6870 }
6871
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,small_kernel)6872 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, small_kernel) {
6873 TEST_REQUIRES_ARM_NEON;
6874 for (size_t k = 1; k <= 80; k += 17) {
6875 GemmMicrokernelTester()
6876 .mr(1)
6877 .nr(8)
6878 .kr(8)
6879 .sr(1)
6880 .m(1)
6881 .n(8)
6882 .k(k)
6883 .ks(3)
6884 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885 }
6886 }
6887
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,small_kernel_subtile)6888 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, small_kernel_subtile) {
6889 TEST_REQUIRES_ARM_NEON;
6890 for (size_t k = 1; k <= 80; k += 17) {
6891 for (uint32_t n = 1; n <= 8; n++) {
6892 for (uint32_t m = 1; m <= 1; m++) {
6893 GemmMicrokernelTester()
6894 .mr(1)
6895 .nr(8)
6896 .kr(8)
6897 .sr(1)
6898 .m(m)
6899 .n(n)
6900 .k(k)
6901 .ks(3)
6902 .iterations(1)
6903 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6904 }
6905 }
6906 }
6907 }
6908
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_small_kernel)6909 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
6910 TEST_REQUIRES_ARM_NEON;
6911 for (uint32_t n = 9; n < 16; n++) {
6912 for (size_t k = 1; k <= 80; k += 17) {
6913 GemmMicrokernelTester()
6914 .mr(1)
6915 .nr(8)
6916 .kr(8)
6917 .sr(1)
6918 .m(1)
6919 .n(n)
6920 .k(k)
6921 .ks(3)
6922 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_small_kernel)6927 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
6928 TEST_REQUIRES_ARM_NEON;
6929 for (uint32_t n = 16; n <= 24; n += 8) {
6930 for (size_t k = 1; k <= 80; k += 17) {
6931 GemmMicrokernelTester()
6932 .mr(1)
6933 .nr(8)
6934 .kr(8)
6935 .sr(1)
6936 .m(1)
6937 .n(n)
6938 .k(k)
6939 .ks(3)
6940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6941 }
6942 }
6943 }
6944
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)6945 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
6946 TEST_REQUIRES_ARM_NEON;
6947 for (size_t k = 1; k <= 80; k += 17) {
6948 for (uint32_t n = 1; n <= 8; n++) {
6949 for (uint32_t m = 1; m <= 1; m++) {
6950 GemmMicrokernelTester()
6951 .mr(1)
6952 .nr(8)
6953 .kr(8)
6954 .sr(1)
6955 .m(m)
6956 .n(n)
6957 .k(k)
6958 .cm_stride(11)
6959 .iterations(1)
6960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6961 }
6962 }
6963 }
6964 }
6965
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,a_offset)6966 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, a_offset) {
6967 TEST_REQUIRES_ARM_NEON;
6968 for (size_t k = 1; k <= 80; k += 17) {
6969 GemmMicrokernelTester()
6970 .mr(1)
6971 .nr(8)
6972 .kr(8)
6973 .sr(1)
6974 .m(1)
6975 .n(8)
6976 .k(k)
6977 .ks(3)
6978 .a_offset(83)
6979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6980 }
6981 }
6982
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,zero)6983 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, zero) {
6984 TEST_REQUIRES_ARM_NEON;
6985 for (size_t k = 1; k <= 80; k += 17) {
6986 for (uint32_t mz = 0; mz < 1; mz++) {
6987 GemmMicrokernelTester()
6988 .mr(1)
6989 .nr(8)
6990 .kr(8)
6991 .sr(1)
6992 .m(1)
6993 .n(8)
6994 .k(k)
6995 .ks(3)
6996 .a_offset(83)
6997 .zero_index(mz)
6998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6999 }
7000 }
7001 }
7002
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,qmin)7003 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, qmin) {
7004 TEST_REQUIRES_ARM_NEON;
7005 GemmMicrokernelTester()
7006 .mr(1)
7007 .nr(8)
7008 .kr(8)
7009 .sr(1)
7010 .m(1)
7011 .n(8)
7012 .k(16)
7013 .qmin(128)
7014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7015 }
7016
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,qmax)7017 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, qmax) {
7018 TEST_REQUIRES_ARM_NEON;
7019 GemmMicrokernelTester()
7020 .mr(1)
7021 .nr(8)
7022 .kr(8)
7023 .sr(1)
7024 .m(1)
7025 .n(8)
7026 .k(16)
7027 .qmax(128)
7028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7029 }
7030
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cm)7031 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cm) {
7032 TEST_REQUIRES_ARM_NEON;
7033 GemmMicrokernelTester()
7034 .mr(1)
7035 .nr(8)
7036 .kr(8)
7037 .sr(1)
7038 .m(1)
7039 .n(8)
7040 .k(16)
7041 .cm_stride(11)
7042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7043 }
7044 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
7045
7046
7047 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16)7048 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
7049 TEST_REQUIRES_ARM_NEON;
7050 GemmMicrokernelTester()
7051 .mr(1)
7052 .nr(8)
7053 .kr(8)
7054 .sr(1)
7055 .m(1)
7056 .n(8)
7057 .k(16)
7058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7059 }
7060
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cn)7061 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
7062 TEST_REQUIRES_ARM_NEON;
7063 GemmMicrokernelTester()
7064 .mr(1)
7065 .nr(8)
7066 .kr(8)
7067 .sr(1)
7068 .m(1)
7069 .n(8)
7070 .k(16)
7071 .cn_stride(11)
7072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7073 }
7074
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile)7075 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
7076 TEST_REQUIRES_ARM_NEON;
7077 for (uint32_t n = 1; n <= 8; n++) {
7078 for (uint32_t m = 1; m <= 1; m++) {
7079 GemmMicrokernelTester()
7080 .mr(1)
7081 .nr(8)
7082 .kr(8)
7083 .sr(1)
7084 .m(m)
7085 .n(n)
7086 .k(16)
7087 .iterations(1)
7088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7089 }
7090 }
7091 }
7092
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_m)7093 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
7094 TEST_REQUIRES_ARM_NEON;
7095 for (uint32_t m = 1; m <= 1; m++) {
7096 GemmMicrokernelTester()
7097 .mr(1)
7098 .nr(8)
7099 .kr(8)
7100 .sr(1)
7101 .m(m)
7102 .n(8)
7103 .k(16)
7104 .iterations(1)
7105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7106 }
7107 }
7108
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_n)7109 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
7110 TEST_REQUIRES_ARM_NEON;
7111 for (uint32_t n = 1; n <= 8; n++) {
7112 GemmMicrokernelTester()
7113 .mr(1)
7114 .nr(8)
7115 .kr(8)
7116 .sr(1)
7117 .m(1)
7118 .n(n)
7119 .k(16)
7120 .iterations(1)
7121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7122 }
7123 }
7124
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16)7125 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
7126 TEST_REQUIRES_ARM_NEON;
7127 for (size_t k = 1; k < 16; k++) {
7128 GemmMicrokernelTester()
7129 .mr(1)
7130 .nr(8)
7131 .kr(8)
7132 .sr(1)
7133 .m(1)
7134 .n(8)
7135 .k(k)
7136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7137 }
7138 }
7139
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_subtile)7140 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
7141 TEST_REQUIRES_ARM_NEON;
7142 for (size_t k = 1; k < 16; k++) {
7143 for (uint32_t n = 1; n <= 8; n++) {
7144 for (uint32_t m = 1; m <= 1; m++) {
7145 GemmMicrokernelTester()
7146 .mr(1)
7147 .nr(8)
7148 .kr(8)
7149 .sr(1)
7150 .m(m)
7151 .n(n)
7152 .k(k)
7153 .iterations(1)
7154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7155 }
7156 }
7157 }
7158 }
7159
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16)7160 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
7161 TEST_REQUIRES_ARM_NEON;
7162 for (size_t k = 17; k < 32; k++) {
7163 GemmMicrokernelTester()
7164 .mr(1)
7165 .nr(8)
7166 .kr(8)
7167 .sr(1)
7168 .m(1)
7169 .n(8)
7170 .k(k)
7171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7172 }
7173 }
7174
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_subtile)7175 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
7176 TEST_REQUIRES_ARM_NEON;
7177 for (size_t k = 17; k < 32; k++) {
7178 for (uint32_t n = 1; n <= 8; n++) {
7179 for (uint32_t m = 1; m <= 1; m++) {
7180 GemmMicrokernelTester()
7181 .mr(1)
7182 .nr(8)
7183 .kr(8)
7184 .sr(1)
7185 .m(m)
7186 .n(n)
7187 .k(k)
7188 .iterations(1)
7189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7190 }
7191 }
7192 }
7193 }
7194
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16)7195 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
7196 TEST_REQUIRES_ARM_NEON;
7197 for (size_t k = 32; k <= 160; k += 16) {
7198 GemmMicrokernelTester()
7199 .mr(1)
7200 .nr(8)
7201 .kr(8)
7202 .sr(1)
7203 .m(1)
7204 .n(8)
7205 .k(k)
7206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7207 }
7208 }
7209
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_subtile)7210 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
7211 TEST_REQUIRES_ARM_NEON;
7212 for (size_t k = 32; k <= 160; k += 16) {
7213 for (uint32_t n = 1; n <= 8; n++) {
7214 for (uint32_t m = 1; m <= 1; m++) {
7215 GemmMicrokernelTester()
7216 .mr(1)
7217 .nr(8)
7218 .kr(8)
7219 .sr(1)
7220 .m(m)
7221 .n(n)
7222 .k(k)
7223 .iterations(1)
7224 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7225 }
7226 }
7227 }
7228 }
7229
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8)7230 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
7231 TEST_REQUIRES_ARM_NEON;
7232 for (uint32_t n = 9; n < 16; n++) {
7233 for (size_t k = 1; k <= 80; k += 17) {
7234 GemmMicrokernelTester()
7235 .mr(1)
7236 .nr(8)
7237 .kr(8)
7238 .sr(1)
7239 .m(1)
7240 .n(n)
7241 .k(k)
7242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7243 }
7244 }
7245 }
7246
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_cn)7247 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
7248 TEST_REQUIRES_ARM_NEON;
7249 for (uint32_t n = 9; n < 16; n++) {
7250 for (size_t k = 1; k <= 80; k += 17) {
7251 GemmMicrokernelTester()
7252 .mr(1)
7253 .nr(8)
7254 .kr(8)
7255 .sr(1)
7256 .m(1)
7257 .n(n)
7258 .k(k)
7259 .cn_stride(11)
7260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7261 }
7262 }
7263 }
7264
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_subtile)7265 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
7266 TEST_REQUIRES_ARM_NEON;
7267 for (uint32_t n = 9; n < 16; n++) {
7268 for (size_t k = 1; k <= 80; k += 17) {
7269 for (uint32_t m = 1; m <= 1; m++) {
7270 GemmMicrokernelTester()
7271 .mr(1)
7272 .nr(8)
7273 .kr(8)
7274 .sr(1)
7275 .m(m)
7276 .n(n)
7277 .k(k)
7278 .iterations(1)
7279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7280 }
7281 }
7282 }
7283 }
7284
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8)7285 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
7286 TEST_REQUIRES_ARM_NEON;
7287 for (uint32_t n = 16; n <= 24; n += 8) {
7288 for (size_t k = 1; k <= 80; k += 17) {
7289 GemmMicrokernelTester()
7290 .mr(1)
7291 .nr(8)
7292 .kr(8)
7293 .sr(1)
7294 .m(1)
7295 .n(n)
7296 .k(k)
7297 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7298 }
7299 }
7300 }
7301
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_cn)7302 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
7303 TEST_REQUIRES_ARM_NEON;
7304 for (uint32_t n = 16; n <= 24; n += 8) {
7305 for (size_t k = 1; k <= 80; k += 17) {
7306 GemmMicrokernelTester()
7307 .mr(1)
7308 .nr(8)
7309 .kr(8)
7310 .sr(1)
7311 .m(1)
7312 .n(n)
7313 .k(k)
7314 .cn_stride(11)
7315 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7316 }
7317 }
7318 }
7319
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_subtile)7320 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
7321 TEST_REQUIRES_ARM_NEON;
7322 for (uint32_t n = 16; n <= 24; n += 8) {
7323 for (size_t k = 1; k <= 80; k += 17) {
7324 for (uint32_t m = 1; m <= 1; m++) {
7325 GemmMicrokernelTester()
7326 .mr(1)
7327 .nr(8)
7328 .kr(8)
7329 .sr(1)
7330 .m(m)
7331 .n(n)
7332 .k(k)
7333 .iterations(1)
7334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7335 }
7336 }
7337 }
7338 }
7339
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel)7340 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel) {
7341 TEST_REQUIRES_ARM_NEON;
7342 for (size_t k = 1; k <= 80; k += 17) {
7343 GemmMicrokernelTester()
7344 .mr(1)
7345 .nr(8)
7346 .kr(8)
7347 .sr(1)
7348 .m(1)
7349 .n(8)
7350 .k(k)
7351 .ks(3)
7352 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7353 }
7354 }
7355
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel_subtile)7356 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel_subtile) {
7357 TEST_REQUIRES_ARM_NEON;
7358 for (size_t k = 1; k <= 80; k += 17) {
7359 for (uint32_t n = 1; n <= 8; n++) {
7360 for (uint32_t m = 1; m <= 1; m++) {
7361 GemmMicrokernelTester()
7362 .mr(1)
7363 .nr(8)
7364 .kr(8)
7365 .sr(1)
7366 .m(m)
7367 .n(n)
7368 .k(k)
7369 .ks(3)
7370 .iterations(1)
7371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7372 }
7373 }
7374 }
7375 }
7376
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_small_kernel)7377 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_small_kernel) {
7378 TEST_REQUIRES_ARM_NEON;
7379 for (uint32_t n = 9; n < 16; n++) {
7380 for (size_t k = 1; k <= 80; k += 17) {
7381 GemmMicrokernelTester()
7382 .mr(1)
7383 .nr(8)
7384 .kr(8)
7385 .sr(1)
7386 .m(1)
7387 .n(n)
7388 .k(k)
7389 .ks(3)
7390 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7391 }
7392 }
7393 }
7394
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_small_kernel)7395 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_small_kernel) {
7396 TEST_REQUIRES_ARM_NEON;
7397 for (uint32_t n = 16; n <= 24; n += 8) {
7398 for (size_t k = 1; k <= 80; k += 17) {
7399 GemmMicrokernelTester()
7400 .mr(1)
7401 .nr(8)
7402 .kr(8)
7403 .sr(1)
7404 .m(1)
7405 .n(n)
7406 .k(k)
7407 .ks(3)
7408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7409 }
7410 }
7411 }
7412
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm_subtile)7413 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
7414 TEST_REQUIRES_ARM_NEON;
7415 for (size_t k = 1; k <= 80; k += 17) {
7416 for (uint32_t n = 1; n <= 8; n++) {
7417 for (uint32_t m = 1; m <= 1; m++) {
7418 GemmMicrokernelTester()
7419 .mr(1)
7420 .nr(8)
7421 .kr(8)
7422 .sr(1)
7423 .m(m)
7424 .n(n)
7425 .k(k)
7426 .cm_stride(11)
7427 .iterations(1)
7428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7429 }
7430 }
7431 }
7432 }
7433
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,a_offset)7434 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, a_offset) {
7435 TEST_REQUIRES_ARM_NEON;
7436 for (size_t k = 1; k <= 80; k += 17) {
7437 GemmMicrokernelTester()
7438 .mr(1)
7439 .nr(8)
7440 .kr(8)
7441 .sr(1)
7442 .m(1)
7443 .n(8)
7444 .k(k)
7445 .ks(3)
7446 .a_offset(83)
7447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7448 }
7449 }
7450
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,zero)7451 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, zero) {
7452 TEST_REQUIRES_ARM_NEON;
7453 for (size_t k = 1; k <= 80; k += 17) {
7454 for (uint32_t mz = 0; mz < 1; mz++) {
7455 GemmMicrokernelTester()
7456 .mr(1)
7457 .nr(8)
7458 .kr(8)
7459 .sr(1)
7460 .m(1)
7461 .n(8)
7462 .k(k)
7463 .ks(3)
7464 .a_offset(83)
7465 .zero_index(mz)
7466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7467 }
7468 }
7469 }
7470
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmin)7471 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
7472 TEST_REQUIRES_ARM_NEON;
7473 GemmMicrokernelTester()
7474 .mr(1)
7475 .nr(8)
7476 .kr(8)
7477 .sr(1)
7478 .m(1)
7479 .n(8)
7480 .k(16)
7481 .qmin(128)
7482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7483 }
7484
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmax)7485 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
7486 TEST_REQUIRES_ARM_NEON;
7487 GemmMicrokernelTester()
7488 .mr(1)
7489 .nr(8)
7490 .kr(8)
7491 .sr(1)
7492 .m(1)
7493 .n(8)
7494 .k(16)
7495 .qmax(128)
7496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7497 }
7498
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm)7499 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
7500 TEST_REQUIRES_ARM_NEON;
7501 GemmMicrokernelTester()
7502 .mr(1)
7503 .nr(8)
7504 .kr(8)
7505 .sr(1)
7506 .m(1)
7507 .n(8)
7508 .k(16)
7509 .cm_stride(11)
7510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7511 }
7512 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
7513
7514
7515 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16)7516 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
7517 TEST_REQUIRES_ARM_NEON;
7518 GemmMicrokernelTester()
7519 .mr(1)
7520 .nr(8)
7521 .kr(8)
7522 .sr(1)
7523 .m(1)
7524 .n(8)
7525 .k(16)
7526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7527 }
7528
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cn)7529 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
7530 TEST_REQUIRES_ARM_NEON;
7531 GemmMicrokernelTester()
7532 .mr(1)
7533 .nr(8)
7534 .kr(8)
7535 .sr(1)
7536 .m(1)
7537 .n(8)
7538 .k(16)
7539 .cn_stride(11)
7540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7541 }
7542
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile)7543 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
7544 TEST_REQUIRES_ARM_NEON;
7545 for (uint32_t n = 1; n <= 8; n++) {
7546 for (uint32_t m = 1; m <= 1; m++) {
7547 GemmMicrokernelTester()
7548 .mr(1)
7549 .nr(8)
7550 .kr(8)
7551 .sr(1)
7552 .m(m)
7553 .n(n)
7554 .k(16)
7555 .iterations(1)
7556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7557 }
7558 }
7559 }
7560
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_m)7561 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
7562 TEST_REQUIRES_ARM_NEON;
7563 for (uint32_t m = 1; m <= 1; m++) {
7564 GemmMicrokernelTester()
7565 .mr(1)
7566 .nr(8)
7567 .kr(8)
7568 .sr(1)
7569 .m(m)
7570 .n(8)
7571 .k(16)
7572 .iterations(1)
7573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7574 }
7575 }
7576
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_n)7577 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
7578 TEST_REQUIRES_ARM_NEON;
7579 for (uint32_t n = 1; n <= 8; n++) {
7580 GemmMicrokernelTester()
7581 .mr(1)
7582 .nr(8)
7583 .kr(8)
7584 .sr(1)
7585 .m(1)
7586 .n(n)
7587 .k(16)
7588 .iterations(1)
7589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7590 }
7591 }
7592
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16)7593 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
7594 TEST_REQUIRES_ARM_NEON;
7595 for (size_t k = 1; k < 16; k++) {
7596 GemmMicrokernelTester()
7597 .mr(1)
7598 .nr(8)
7599 .kr(8)
7600 .sr(1)
7601 .m(1)
7602 .n(8)
7603 .k(k)
7604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7605 }
7606 }
7607
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_subtile)7608 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
7609 TEST_REQUIRES_ARM_NEON;
7610 for (size_t k = 1; k < 16; k++) {
7611 for (uint32_t n = 1; n <= 8; n++) {
7612 for (uint32_t m = 1; m <= 1; m++) {
7613 GemmMicrokernelTester()
7614 .mr(1)
7615 .nr(8)
7616 .kr(8)
7617 .sr(1)
7618 .m(m)
7619 .n(n)
7620 .k(k)
7621 .iterations(1)
7622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7623 }
7624 }
7625 }
7626 }
7627
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16)7628 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
7629 TEST_REQUIRES_ARM_NEON;
7630 for (size_t k = 17; k < 32; k++) {
7631 GemmMicrokernelTester()
7632 .mr(1)
7633 .nr(8)
7634 .kr(8)
7635 .sr(1)
7636 .m(1)
7637 .n(8)
7638 .k(k)
7639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7640 }
7641 }
7642
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_subtile)7643 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
7644 TEST_REQUIRES_ARM_NEON;
7645 for (size_t k = 17; k < 32; k++) {
7646 for (uint32_t n = 1; n <= 8; n++) {
7647 for (uint32_t m = 1; m <= 1; m++) {
7648 GemmMicrokernelTester()
7649 .mr(1)
7650 .nr(8)
7651 .kr(8)
7652 .sr(1)
7653 .m(m)
7654 .n(n)
7655 .k(k)
7656 .iterations(1)
7657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7658 }
7659 }
7660 }
7661 }
7662
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16)7663 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
7664 TEST_REQUIRES_ARM_NEON;
7665 for (size_t k = 32; k <= 160; k += 16) {
7666 GemmMicrokernelTester()
7667 .mr(1)
7668 .nr(8)
7669 .kr(8)
7670 .sr(1)
7671 .m(1)
7672 .n(8)
7673 .k(k)
7674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7675 }
7676 }
7677
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_subtile)7678 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
7679 TEST_REQUIRES_ARM_NEON;
7680 for (size_t k = 32; k <= 160; k += 16) {
7681 for (uint32_t n = 1; n <= 8; n++) {
7682 for (uint32_t m = 1; m <= 1; m++) {
7683 GemmMicrokernelTester()
7684 .mr(1)
7685 .nr(8)
7686 .kr(8)
7687 .sr(1)
7688 .m(m)
7689 .n(n)
7690 .k(k)
7691 .iterations(1)
7692 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7693 }
7694 }
7695 }
7696 }
7697
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8)7698 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
7699 TEST_REQUIRES_ARM_NEON;
7700 for (uint32_t n = 9; n < 16; n++) {
7701 for (size_t k = 1; k <= 80; k += 17) {
7702 GemmMicrokernelTester()
7703 .mr(1)
7704 .nr(8)
7705 .kr(8)
7706 .sr(1)
7707 .m(1)
7708 .n(n)
7709 .k(k)
7710 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7711 }
7712 }
7713 }
7714
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_cn)7715 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
7716 TEST_REQUIRES_ARM_NEON;
7717 for (uint32_t n = 9; n < 16; n++) {
7718 for (size_t k = 1; k <= 80; k += 17) {
7719 GemmMicrokernelTester()
7720 .mr(1)
7721 .nr(8)
7722 .kr(8)
7723 .sr(1)
7724 .m(1)
7725 .n(n)
7726 .k(k)
7727 .cn_stride(11)
7728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7729 }
7730 }
7731 }
7732
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_subtile)7733 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
7734 TEST_REQUIRES_ARM_NEON;
7735 for (uint32_t n = 9; n < 16; n++) {
7736 for (size_t k = 1; k <= 80; k += 17) {
7737 for (uint32_t m = 1; m <= 1; m++) {
7738 GemmMicrokernelTester()
7739 .mr(1)
7740 .nr(8)
7741 .kr(8)
7742 .sr(1)
7743 .m(m)
7744 .n(n)
7745 .k(k)
7746 .iterations(1)
7747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7748 }
7749 }
7750 }
7751 }
7752
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8)7753 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
7754 TEST_REQUIRES_ARM_NEON;
7755 for (uint32_t n = 16; n <= 24; n += 8) {
7756 for (size_t k = 1; k <= 80; k += 17) {
7757 GemmMicrokernelTester()
7758 .mr(1)
7759 .nr(8)
7760 .kr(8)
7761 .sr(1)
7762 .m(1)
7763 .n(n)
7764 .k(k)
7765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7766 }
7767 }
7768 }
7769
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_cn)7770 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
7771 TEST_REQUIRES_ARM_NEON;
7772 for (uint32_t n = 16; n <= 24; n += 8) {
7773 for (size_t k = 1; k <= 80; k += 17) {
7774 GemmMicrokernelTester()
7775 .mr(1)
7776 .nr(8)
7777 .kr(8)
7778 .sr(1)
7779 .m(1)
7780 .n(n)
7781 .k(k)
7782 .cn_stride(11)
7783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7784 }
7785 }
7786 }
7787
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_subtile)7788 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
7789 TEST_REQUIRES_ARM_NEON;
7790 for (uint32_t n = 16; n <= 24; n += 8) {
7791 for (size_t k = 1; k <= 80; k += 17) {
7792 for (uint32_t m = 1; m <= 1; m++) {
7793 GemmMicrokernelTester()
7794 .mr(1)
7795 .nr(8)
7796 .kr(8)
7797 .sr(1)
7798 .m(m)
7799 .n(n)
7800 .k(k)
7801 .iterations(1)
7802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7803 }
7804 }
7805 }
7806 }
7807
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel)7808 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel) {
7809 TEST_REQUIRES_ARM_NEON;
7810 for (size_t k = 1; k <= 80; k += 17) {
7811 GemmMicrokernelTester()
7812 .mr(1)
7813 .nr(8)
7814 .kr(8)
7815 .sr(1)
7816 .m(1)
7817 .n(8)
7818 .k(k)
7819 .ks(3)
7820 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7821 }
7822 }
7823
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel_subtile)7824 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel_subtile) {
7825 TEST_REQUIRES_ARM_NEON;
7826 for (size_t k = 1; k <= 80; k += 17) {
7827 for (uint32_t n = 1; n <= 8; n++) {
7828 for (uint32_t m = 1; m <= 1; m++) {
7829 GemmMicrokernelTester()
7830 .mr(1)
7831 .nr(8)
7832 .kr(8)
7833 .sr(1)
7834 .m(m)
7835 .n(n)
7836 .k(k)
7837 .ks(3)
7838 .iterations(1)
7839 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7840 }
7841 }
7842 }
7843 }
7844
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_small_kernel)7845 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
7846 TEST_REQUIRES_ARM_NEON;
7847 for (uint32_t n = 9; n < 16; n++) {
7848 for (size_t k = 1; k <= 80; k += 17) {
7849 GemmMicrokernelTester()
7850 .mr(1)
7851 .nr(8)
7852 .kr(8)
7853 .sr(1)
7854 .m(1)
7855 .n(n)
7856 .k(k)
7857 .ks(3)
7858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7859 }
7860 }
7861 }
7862
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_small_kernel)7863 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
7864 TEST_REQUIRES_ARM_NEON;
7865 for (uint32_t n = 16; n <= 24; n += 8) {
7866 for (size_t k = 1; k <= 80; k += 17) {
7867 GemmMicrokernelTester()
7868 .mr(1)
7869 .nr(8)
7870 .kr(8)
7871 .sr(1)
7872 .m(1)
7873 .n(n)
7874 .k(k)
7875 .ks(3)
7876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7877 }
7878 }
7879 }
7880
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm_subtile)7881 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
7882 TEST_REQUIRES_ARM_NEON;
7883 for (size_t k = 1; k <= 80; k += 17) {
7884 for (uint32_t n = 1; n <= 8; n++) {
7885 for (uint32_t m = 1; m <= 1; m++) {
7886 GemmMicrokernelTester()
7887 .mr(1)
7888 .nr(8)
7889 .kr(8)
7890 .sr(1)
7891 .m(m)
7892 .n(n)
7893 .k(k)
7894 .cm_stride(11)
7895 .iterations(1)
7896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7897 }
7898 }
7899 }
7900 }
7901
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,a_offset)7902 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, a_offset) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (size_t k = 1; k <= 80; k += 17) {
7905 GemmMicrokernelTester()
7906 .mr(1)
7907 .nr(8)
7908 .kr(8)
7909 .sr(1)
7910 .m(1)
7911 .n(8)
7912 .k(k)
7913 .ks(3)
7914 .a_offset(83)
7915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7916 }
7917 }
7918
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,zero)7919 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, zero) {
7920 TEST_REQUIRES_ARM_NEON;
7921 for (size_t k = 1; k <= 80; k += 17) {
7922 for (uint32_t mz = 0; mz < 1; mz++) {
7923 GemmMicrokernelTester()
7924 .mr(1)
7925 .nr(8)
7926 .kr(8)
7927 .sr(1)
7928 .m(1)
7929 .n(8)
7930 .k(k)
7931 .ks(3)
7932 .a_offset(83)
7933 .zero_index(mz)
7934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7935 }
7936 }
7937 }
7938
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmin)7939 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
7940 TEST_REQUIRES_ARM_NEON;
7941 GemmMicrokernelTester()
7942 .mr(1)
7943 .nr(8)
7944 .kr(8)
7945 .sr(1)
7946 .m(1)
7947 .n(8)
7948 .k(16)
7949 .qmin(128)
7950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7951 }
7952
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmax)7953 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
7954 TEST_REQUIRES_ARM_NEON;
7955 GemmMicrokernelTester()
7956 .mr(1)
7957 .nr(8)
7958 .kr(8)
7959 .sr(1)
7960 .m(1)
7961 .n(8)
7962 .k(16)
7963 .qmax(128)
7964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7965 }
7966
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm)7967 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
7968 TEST_REQUIRES_ARM_NEON;
7969 GemmMicrokernelTester()
7970 .mr(1)
7971 .nr(8)
7972 .kr(8)
7973 .sr(1)
7974 .m(1)
7975 .n(8)
7976 .k(16)
7977 .cm_stride(11)
7978 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7979 }
7980 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
7981
7982
7983 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16)7984 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16) {
7985 TEST_REQUIRES_ARM_NEON;
7986 GemmMicrokernelTester()
7987 .mr(2)
7988 .nr(8)
7989 .kr(8)
7990 .sr(1)
7991 .m(2)
7992 .n(8)
7993 .k(16)
7994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7995 }
7996
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cn)7997 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cn) {
7998 TEST_REQUIRES_ARM_NEON;
7999 GemmMicrokernelTester()
8000 .mr(2)
8001 .nr(8)
8002 .kr(8)
8003 .sr(1)
8004 .m(2)
8005 .n(8)
8006 .k(16)
8007 .cn_stride(11)
8008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8009 }
8010
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)8011 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
8012 TEST_REQUIRES_ARM_NEON;
8013 for (uint32_t n = 1; n <= 8; n++) {
8014 for (uint32_t m = 1; m <= 2; m++) {
8015 GemmMicrokernelTester()
8016 .mr(2)
8017 .nr(8)
8018 .kr(8)
8019 .sr(1)
8020 .m(m)
8021 .n(n)
8022 .k(16)
8023 .iterations(1)
8024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8025 }
8026 }
8027 }
8028
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)8029 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
8030 TEST_REQUIRES_ARM_NEON;
8031 for (uint32_t m = 1; m <= 2; m++) {
8032 GemmMicrokernelTester()
8033 .mr(2)
8034 .nr(8)
8035 .kr(8)
8036 .sr(1)
8037 .m(m)
8038 .n(8)
8039 .k(16)
8040 .iterations(1)
8041 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8042 }
8043 }
8044
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)8045 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
8046 TEST_REQUIRES_ARM_NEON;
8047 for (uint32_t n = 1; n <= 8; n++) {
8048 GemmMicrokernelTester()
8049 .mr(2)
8050 .nr(8)
8051 .kr(8)
8052 .sr(1)
8053 .m(2)
8054 .n(n)
8055 .k(16)
8056 .iterations(1)
8057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8058 }
8059 }
8060
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16)8061 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16) {
8062 TEST_REQUIRES_ARM_NEON;
8063 for (size_t k = 1; k < 16; k++) {
8064 GemmMicrokernelTester()
8065 .mr(2)
8066 .nr(8)
8067 .kr(8)
8068 .sr(1)
8069 .m(2)
8070 .n(8)
8071 .k(k)
8072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073 }
8074 }
8075
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)8076 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
8077 TEST_REQUIRES_ARM_NEON;
8078 for (size_t k = 1; k < 16; k++) {
8079 for (uint32_t n = 1; n <= 8; n++) {
8080 for (uint32_t m = 1; m <= 2; m++) {
8081 GemmMicrokernelTester()
8082 .mr(2)
8083 .nr(8)
8084 .kr(8)
8085 .sr(1)
8086 .m(m)
8087 .n(n)
8088 .k(k)
8089 .iterations(1)
8090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8091 }
8092 }
8093 }
8094 }
8095
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16)8096 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16) {
8097 TEST_REQUIRES_ARM_NEON;
8098 for (size_t k = 17; k < 32; k++) {
8099 GemmMicrokernelTester()
8100 .mr(2)
8101 .nr(8)
8102 .kr(8)
8103 .sr(1)
8104 .m(2)
8105 .n(8)
8106 .k(k)
8107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8108 }
8109 }
8110
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)8111 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
8112 TEST_REQUIRES_ARM_NEON;
8113 for (size_t k = 17; k < 32; k++) {
8114 for (uint32_t n = 1; n <= 8; n++) {
8115 for (uint32_t m = 1; m <= 2; m++) {
8116 GemmMicrokernelTester()
8117 .mr(2)
8118 .nr(8)
8119 .kr(8)
8120 .sr(1)
8121 .m(m)
8122 .n(n)
8123 .k(k)
8124 .iterations(1)
8125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8126 }
8127 }
8128 }
8129 }
8130
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16)8131 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16) {
8132 TEST_REQUIRES_ARM_NEON;
8133 for (size_t k = 32; k <= 160; k += 16) {
8134 GemmMicrokernelTester()
8135 .mr(2)
8136 .nr(8)
8137 .kr(8)
8138 .sr(1)
8139 .m(2)
8140 .n(8)
8141 .k(k)
8142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8143 }
8144 }
8145
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)8146 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
8147 TEST_REQUIRES_ARM_NEON;
8148 for (size_t k = 32; k <= 160; k += 16) {
8149 for (uint32_t n = 1; n <= 8; n++) {
8150 for (uint32_t m = 1; m <= 2; m++) {
8151 GemmMicrokernelTester()
8152 .mr(2)
8153 .nr(8)
8154 .kr(8)
8155 .sr(1)
8156 .m(m)
8157 .n(n)
8158 .k(k)
8159 .iterations(1)
8160 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8161 }
8162 }
8163 }
8164 }
8165
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8)8166 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8) {
8167 TEST_REQUIRES_ARM_NEON;
8168 for (uint32_t n = 9; n < 16; n++) {
8169 for (size_t k = 1; k <= 80; k += 17) {
8170 GemmMicrokernelTester()
8171 .mr(2)
8172 .nr(8)
8173 .kr(8)
8174 .sr(1)
8175 .m(2)
8176 .n(n)
8177 .k(k)
8178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8179 }
8180 }
8181 }
8182
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)8183 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
8184 TEST_REQUIRES_ARM_NEON;
8185 for (uint32_t n = 9; n < 16; n++) {
8186 for (size_t k = 1; k <= 80; k += 17) {
8187 GemmMicrokernelTester()
8188 .mr(2)
8189 .nr(8)
8190 .kr(8)
8191 .sr(1)
8192 .m(2)
8193 .n(n)
8194 .k(k)
8195 .cn_stride(11)
8196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8197 }
8198 }
8199 }
8200
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)8201 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
8202 TEST_REQUIRES_ARM_NEON;
8203 for (uint32_t n = 9; n < 16; n++) {
8204 for (size_t k = 1; k <= 80; k += 17) {
8205 for (uint32_t m = 1; m <= 2; m++) {
8206 GemmMicrokernelTester()
8207 .mr(2)
8208 .nr(8)
8209 .kr(8)
8210 .sr(1)
8211 .m(m)
8212 .n(n)
8213 .k(k)
8214 .iterations(1)
8215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8216 }
8217 }
8218 }
8219 }
8220
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8)8221 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8) {
8222 TEST_REQUIRES_ARM_NEON;
8223 for (uint32_t n = 16; n <= 24; n += 8) {
8224 for (size_t k = 1; k <= 80; k += 17) {
8225 GemmMicrokernelTester()
8226 .mr(2)
8227 .nr(8)
8228 .kr(8)
8229 .sr(1)
8230 .m(2)
8231 .n(n)
8232 .k(k)
8233 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8234 }
8235 }
8236 }
8237
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)8238 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
8239 TEST_REQUIRES_ARM_NEON;
8240 for (uint32_t n = 16; n <= 24; n += 8) {
8241 for (size_t k = 1; k <= 80; k += 17) {
8242 GemmMicrokernelTester()
8243 .mr(2)
8244 .nr(8)
8245 .kr(8)
8246 .sr(1)
8247 .m(2)
8248 .n(n)
8249 .k(k)
8250 .cn_stride(11)
8251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8252 }
8253 }
8254 }
8255
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)8256 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
8257 TEST_REQUIRES_ARM_NEON;
8258 for (uint32_t n = 16; n <= 24; n += 8) {
8259 for (size_t k = 1; k <= 80; k += 17) {
8260 for (uint32_t m = 1; m <= 2; m++) {
8261 GemmMicrokernelTester()
8262 .mr(2)
8263 .nr(8)
8264 .kr(8)
8265 .sr(1)
8266 .m(m)
8267 .n(n)
8268 .k(k)
8269 .iterations(1)
8270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8271 }
8272 }
8273 }
8274 }
8275
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,small_kernel)8276 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, small_kernel) {
8277 TEST_REQUIRES_ARM_NEON;
8278 for (size_t k = 1; k <= 80; k += 17) {
8279 GemmMicrokernelTester()
8280 .mr(2)
8281 .nr(8)
8282 .kr(8)
8283 .sr(1)
8284 .m(2)
8285 .n(8)
8286 .k(k)
8287 .ks(3)
8288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8289 }
8290 }
8291
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,small_kernel_subtile)8292 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, small_kernel_subtile) {
8293 TEST_REQUIRES_ARM_NEON;
8294 for (size_t k = 1; k <= 80; k += 17) {
8295 for (uint32_t n = 1; n <= 8; n++) {
8296 for (uint32_t m = 1; m <= 2; m++) {
8297 GemmMicrokernelTester()
8298 .mr(2)
8299 .nr(8)
8300 .kr(8)
8301 .sr(1)
8302 .m(m)
8303 .n(n)
8304 .k(k)
8305 .ks(3)
8306 .iterations(1)
8307 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308 }
8309 }
8310 }
8311 }
8312
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_small_kernel)8313 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
8314 TEST_REQUIRES_ARM_NEON;
8315 for (uint32_t n = 9; n < 16; n++) {
8316 for (size_t k = 1; k <= 80; k += 17) {
8317 GemmMicrokernelTester()
8318 .mr(2)
8319 .nr(8)
8320 .kr(8)
8321 .sr(1)
8322 .m(2)
8323 .n(n)
8324 .k(k)
8325 .ks(3)
8326 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8327 }
8328 }
8329 }
8330
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_small_kernel)8331 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
8332 TEST_REQUIRES_ARM_NEON;
8333 for (uint32_t n = 16; n <= 24; n += 8) {
8334 for (size_t k = 1; k <= 80; k += 17) {
8335 GemmMicrokernelTester()
8336 .mr(2)
8337 .nr(8)
8338 .kr(8)
8339 .sr(1)
8340 .m(2)
8341 .n(n)
8342 .k(k)
8343 .ks(3)
8344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8345 }
8346 }
8347 }
8348
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)8349 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
8350 TEST_REQUIRES_ARM_NEON;
8351 for (size_t k = 1; k <= 80; k += 17) {
8352 for (uint32_t n = 1; n <= 8; n++) {
8353 for (uint32_t m = 1; m <= 2; m++) {
8354 GemmMicrokernelTester()
8355 .mr(2)
8356 .nr(8)
8357 .kr(8)
8358 .sr(1)
8359 .m(m)
8360 .n(n)
8361 .k(k)
8362 .cm_stride(11)
8363 .iterations(1)
8364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8365 }
8366 }
8367 }
8368 }
8369
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,a_offset)8370 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, a_offset) {
8371 TEST_REQUIRES_ARM_NEON;
8372 for (size_t k = 1; k <= 80; k += 17) {
8373 GemmMicrokernelTester()
8374 .mr(2)
8375 .nr(8)
8376 .kr(8)
8377 .sr(1)
8378 .m(2)
8379 .n(8)
8380 .k(k)
8381 .ks(3)
8382 .a_offset(163)
8383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8384 }
8385 }
8386
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,zero)8387 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, zero) {
8388 TEST_REQUIRES_ARM_NEON;
8389 for (size_t k = 1; k <= 80; k += 17) {
8390 for (uint32_t mz = 0; mz < 2; mz++) {
8391 GemmMicrokernelTester()
8392 .mr(2)
8393 .nr(8)
8394 .kr(8)
8395 .sr(1)
8396 .m(2)
8397 .n(8)
8398 .k(k)
8399 .ks(3)
8400 .a_offset(163)
8401 .zero_index(mz)
8402 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8403 }
8404 }
8405 }
8406
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmin)8407 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmin) {
8408 TEST_REQUIRES_ARM_NEON;
8409 GemmMicrokernelTester()
8410 .mr(2)
8411 .nr(8)
8412 .kr(8)
8413 .sr(1)
8414 .m(2)
8415 .n(8)
8416 .k(16)
8417 .qmin(128)
8418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8419 }
8420
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmax)8421 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmax) {
8422 TEST_REQUIRES_ARM_NEON;
8423 GemmMicrokernelTester()
8424 .mr(2)
8425 .nr(8)
8426 .kr(8)
8427 .sr(1)
8428 .m(2)
8429 .n(8)
8430 .k(16)
8431 .qmax(128)
8432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8433 }
8434
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm)8435 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm) {
8436 TEST_REQUIRES_ARM_NEON;
8437 GemmMicrokernelTester()
8438 .mr(2)
8439 .nr(8)
8440 .kr(8)
8441 .sr(1)
8442 .m(2)
8443 .n(8)
8444 .k(16)
8445 .cm_stride(11)
8446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8447 }
8448 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8449
8450
8451 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)8452 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
8453 TEST_REQUIRES_ARM_NEON;
8454 GemmMicrokernelTester()
8455 .mr(2)
8456 .nr(8)
8457 .kr(8)
8458 .sr(1)
8459 .m(2)
8460 .n(8)
8461 .k(16)
8462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8463 }
8464
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)8465 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
8466 TEST_REQUIRES_ARM_NEON;
8467 GemmMicrokernelTester()
8468 .mr(2)
8469 .nr(8)
8470 .kr(8)
8471 .sr(1)
8472 .m(2)
8473 .n(8)
8474 .k(16)
8475 .cn_stride(11)
8476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8477 }
8478
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)8479 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
8480 TEST_REQUIRES_ARM_NEON;
8481 for (uint32_t n = 1; n <= 8; n++) {
8482 for (uint32_t m = 1; m <= 2; m++) {
8483 GemmMicrokernelTester()
8484 .mr(2)
8485 .nr(8)
8486 .kr(8)
8487 .sr(1)
8488 .m(m)
8489 .n(n)
8490 .k(16)
8491 .iterations(1)
8492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495 }
8496
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)8497 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
8498 TEST_REQUIRES_ARM_NEON;
8499 for (uint32_t m = 1; m <= 2; m++) {
8500 GemmMicrokernelTester()
8501 .mr(2)
8502 .nr(8)
8503 .kr(8)
8504 .sr(1)
8505 .m(m)
8506 .n(8)
8507 .k(16)
8508 .iterations(1)
8509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8510 }
8511 }
8512
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)8513 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
8514 TEST_REQUIRES_ARM_NEON;
8515 for (uint32_t n = 1; n <= 8; n++) {
8516 GemmMicrokernelTester()
8517 .mr(2)
8518 .nr(8)
8519 .kr(8)
8520 .sr(1)
8521 .m(2)
8522 .n(n)
8523 .k(16)
8524 .iterations(1)
8525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8526 }
8527 }
8528
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)8529 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
8530 TEST_REQUIRES_ARM_NEON;
8531 for (size_t k = 1; k < 16; k++) {
8532 GemmMicrokernelTester()
8533 .mr(2)
8534 .nr(8)
8535 .kr(8)
8536 .sr(1)
8537 .m(2)
8538 .n(8)
8539 .k(k)
8540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8541 }
8542 }
8543
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)8544 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
8545 TEST_REQUIRES_ARM_NEON;
8546 for (size_t k = 1; k < 16; k++) {
8547 for (uint32_t n = 1; n <= 8; n++) {
8548 for (uint32_t m = 1; m <= 2; m++) {
8549 GemmMicrokernelTester()
8550 .mr(2)
8551 .nr(8)
8552 .kr(8)
8553 .sr(1)
8554 .m(m)
8555 .n(n)
8556 .k(k)
8557 .iterations(1)
8558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8559 }
8560 }
8561 }
8562 }
8563
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)8564 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
8565 TEST_REQUIRES_ARM_NEON;
8566 for (size_t k = 17; k < 32; k++) {
8567 GemmMicrokernelTester()
8568 .mr(2)
8569 .nr(8)
8570 .kr(8)
8571 .sr(1)
8572 .m(2)
8573 .n(8)
8574 .k(k)
8575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8576 }
8577 }
8578
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)8579 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
8580 TEST_REQUIRES_ARM_NEON;
8581 for (size_t k = 17; k < 32; k++) {
8582 for (uint32_t n = 1; n <= 8; n++) {
8583 for (uint32_t m = 1; m <= 2; m++) {
8584 GemmMicrokernelTester()
8585 .mr(2)
8586 .nr(8)
8587 .kr(8)
8588 .sr(1)
8589 .m(m)
8590 .n(n)
8591 .k(k)
8592 .iterations(1)
8593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8594 }
8595 }
8596 }
8597 }
8598
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)8599 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
8600 TEST_REQUIRES_ARM_NEON;
8601 for (size_t k = 32; k <= 160; k += 16) {
8602 GemmMicrokernelTester()
8603 .mr(2)
8604 .nr(8)
8605 .kr(8)
8606 .sr(1)
8607 .m(2)
8608 .n(8)
8609 .k(k)
8610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8611 }
8612 }
8613
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)8614 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
8615 TEST_REQUIRES_ARM_NEON;
8616 for (size_t k = 32; k <= 160; k += 16) {
8617 for (uint32_t n = 1; n <= 8; n++) {
8618 for (uint32_t m = 1; m <= 2; m++) {
8619 GemmMicrokernelTester()
8620 .mr(2)
8621 .nr(8)
8622 .kr(8)
8623 .sr(1)
8624 .m(m)
8625 .n(n)
8626 .k(k)
8627 .iterations(1)
8628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8629 }
8630 }
8631 }
8632 }
8633
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)8634 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
8635 TEST_REQUIRES_ARM_NEON;
8636 for (uint32_t n = 9; n < 16; n++) {
8637 for (size_t k = 1; k <= 80; k += 17) {
8638 GemmMicrokernelTester()
8639 .mr(2)
8640 .nr(8)
8641 .kr(8)
8642 .sr(1)
8643 .m(2)
8644 .n(n)
8645 .k(k)
8646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8647 }
8648 }
8649 }
8650
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)8651 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
8652 TEST_REQUIRES_ARM_NEON;
8653 for (uint32_t n = 9; n < 16; n++) {
8654 for (size_t k = 1; k <= 80; k += 17) {
8655 GemmMicrokernelTester()
8656 .mr(2)
8657 .nr(8)
8658 .kr(8)
8659 .sr(1)
8660 .m(2)
8661 .n(n)
8662 .k(k)
8663 .cn_stride(11)
8664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8665 }
8666 }
8667 }
8668
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)8669 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
8670 TEST_REQUIRES_ARM_NEON;
8671 for (uint32_t n = 9; n < 16; n++) {
8672 for (size_t k = 1; k <= 80; k += 17) {
8673 for (uint32_t m = 1; m <= 2; m++) {
8674 GemmMicrokernelTester()
8675 .mr(2)
8676 .nr(8)
8677 .kr(8)
8678 .sr(1)
8679 .m(m)
8680 .n(n)
8681 .k(k)
8682 .iterations(1)
8683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8684 }
8685 }
8686 }
8687 }
8688
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)8689 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
8690 TEST_REQUIRES_ARM_NEON;
8691 for (uint32_t n = 16; n <= 24; n += 8) {
8692 for (size_t k = 1; k <= 80; k += 17) {
8693 GemmMicrokernelTester()
8694 .mr(2)
8695 .nr(8)
8696 .kr(8)
8697 .sr(1)
8698 .m(2)
8699 .n(n)
8700 .k(k)
8701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8702 }
8703 }
8704 }
8705
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)8706 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
8707 TEST_REQUIRES_ARM_NEON;
8708 for (uint32_t n = 16; n <= 24; n += 8) {
8709 for (size_t k = 1; k <= 80; k += 17) {
8710 GemmMicrokernelTester()
8711 .mr(2)
8712 .nr(8)
8713 .kr(8)
8714 .sr(1)
8715 .m(2)
8716 .n(n)
8717 .k(k)
8718 .cn_stride(11)
8719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8720 }
8721 }
8722 }
8723
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)8724 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
8725 TEST_REQUIRES_ARM_NEON;
8726 for (uint32_t n = 16; n <= 24; n += 8) {
8727 for (size_t k = 1; k <= 80; k += 17) {
8728 for (uint32_t m = 1; m <= 2; m++) {
8729 GemmMicrokernelTester()
8730 .mr(2)
8731 .nr(8)
8732 .kr(8)
8733 .sr(1)
8734 .m(m)
8735 .n(n)
8736 .k(k)
8737 .iterations(1)
8738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8739 }
8740 }
8741 }
8742 }
8743
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel)8744 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
8745 TEST_REQUIRES_ARM_NEON;
8746 for (size_t k = 1; k <= 80; k += 17) {
8747 GemmMicrokernelTester()
8748 .mr(2)
8749 .nr(8)
8750 .kr(8)
8751 .sr(1)
8752 .m(2)
8753 .n(8)
8754 .k(k)
8755 .ks(3)
8756 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8757 }
8758 }
8759
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel_subtile)8760 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
8761 TEST_REQUIRES_ARM_NEON;
8762 for (size_t k = 1; k <= 80; k += 17) {
8763 for (uint32_t n = 1; n <= 8; n++) {
8764 for (uint32_t m = 1; m <= 2; m++) {
8765 GemmMicrokernelTester()
8766 .mr(2)
8767 .nr(8)
8768 .kr(8)
8769 .sr(1)
8770 .m(m)
8771 .n(n)
8772 .k(k)
8773 .ks(3)
8774 .iterations(1)
8775 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8776 }
8777 }
8778 }
8779 }
8780
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_small_kernel)8781 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
8782 TEST_REQUIRES_ARM_NEON;
8783 for (uint32_t n = 9; n < 16; n++) {
8784 for (size_t k = 1; k <= 80; k += 17) {
8785 GemmMicrokernelTester()
8786 .mr(2)
8787 .nr(8)
8788 .kr(8)
8789 .sr(1)
8790 .m(2)
8791 .n(n)
8792 .k(k)
8793 .ks(3)
8794 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797 }
8798
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_small_kernel)8799 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
8800 TEST_REQUIRES_ARM_NEON;
8801 for (uint32_t n = 16; n <= 24; n += 8) {
8802 for (size_t k = 1; k <= 80; k += 17) {
8803 GemmMicrokernelTester()
8804 .mr(2)
8805 .nr(8)
8806 .kr(8)
8807 .sr(1)
8808 .m(2)
8809 .n(n)
8810 .k(k)
8811 .ks(3)
8812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8813 }
8814 }
8815 }
8816
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)8817 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
8818 TEST_REQUIRES_ARM_NEON;
8819 for (size_t k = 1; k <= 80; k += 17) {
8820 for (uint32_t n = 1; n <= 8; n++) {
8821 for (uint32_t m = 1; m <= 2; m++) {
8822 GemmMicrokernelTester()
8823 .mr(2)
8824 .nr(8)
8825 .kr(8)
8826 .sr(1)
8827 .m(m)
8828 .n(n)
8829 .k(k)
8830 .cm_stride(11)
8831 .iterations(1)
8832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8833 }
8834 }
8835 }
8836 }
8837
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,a_offset)8838 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
8839 TEST_REQUIRES_ARM_NEON;
8840 for (size_t k = 1; k <= 80; k += 17) {
8841 GemmMicrokernelTester()
8842 .mr(2)
8843 .nr(8)
8844 .kr(8)
8845 .sr(1)
8846 .m(2)
8847 .n(8)
8848 .k(k)
8849 .ks(3)
8850 .a_offset(163)
8851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8852 }
8853 }
8854
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,zero)8855 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
8856 TEST_REQUIRES_ARM_NEON;
8857 for (size_t k = 1; k <= 80; k += 17) {
8858 for (uint32_t mz = 0; mz < 2; mz++) {
8859 GemmMicrokernelTester()
8860 .mr(2)
8861 .nr(8)
8862 .kr(8)
8863 .sr(1)
8864 .m(2)
8865 .n(8)
8866 .k(k)
8867 .ks(3)
8868 .a_offset(163)
8869 .zero_index(mz)
8870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8871 }
8872 }
8873 }
8874
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)8875 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
8876 TEST_REQUIRES_ARM_NEON;
8877 GemmMicrokernelTester()
8878 .mr(2)
8879 .nr(8)
8880 .kr(8)
8881 .sr(1)
8882 .m(2)
8883 .n(8)
8884 .k(16)
8885 .qmin(128)
8886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8887 }
8888
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)8889 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
8890 TEST_REQUIRES_ARM_NEON;
8891 GemmMicrokernelTester()
8892 .mr(2)
8893 .nr(8)
8894 .kr(8)
8895 .sr(1)
8896 .m(2)
8897 .n(8)
8898 .k(16)
8899 .qmax(128)
8900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8901 }
8902
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)8903 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
8904 TEST_REQUIRES_ARM_NEON;
8905 GemmMicrokernelTester()
8906 .mr(2)
8907 .nr(8)
8908 .kr(8)
8909 .sr(1)
8910 .m(2)
8911 .n(8)
8912 .k(16)
8913 .cm_stride(11)
8914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8915 }
8916 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8917
8918
8919 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16)8920 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
8921 TEST_REQUIRES_ARM_NEON;
8922 GemmMicrokernelTester()
8923 .mr(2)
8924 .nr(8)
8925 .kr(8)
8926 .sr(1)
8927 .m(2)
8928 .n(8)
8929 .k(16)
8930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931 }
8932
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cn)8933 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
8934 TEST_REQUIRES_ARM_NEON;
8935 GemmMicrokernelTester()
8936 .mr(2)
8937 .nr(8)
8938 .kr(8)
8939 .sr(1)
8940 .m(2)
8941 .n(8)
8942 .k(16)
8943 .cn_stride(11)
8944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8945 }
8946
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile)8947 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
8948 TEST_REQUIRES_ARM_NEON;
8949 for (uint32_t n = 1; n <= 8; n++) {
8950 for (uint32_t m = 1; m <= 2; m++) {
8951 GemmMicrokernelTester()
8952 .mr(2)
8953 .nr(8)
8954 .kr(8)
8955 .sr(1)
8956 .m(m)
8957 .n(n)
8958 .k(16)
8959 .iterations(1)
8960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8961 }
8962 }
8963 }
8964
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_m)8965 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
8966 TEST_REQUIRES_ARM_NEON;
8967 for (uint32_t m = 1; m <= 2; m++) {
8968 GemmMicrokernelTester()
8969 .mr(2)
8970 .nr(8)
8971 .kr(8)
8972 .sr(1)
8973 .m(m)
8974 .n(8)
8975 .k(16)
8976 .iterations(1)
8977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8978 }
8979 }
8980
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_n)8981 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
8982 TEST_REQUIRES_ARM_NEON;
8983 for (uint32_t n = 1; n <= 8; n++) {
8984 GemmMicrokernelTester()
8985 .mr(2)
8986 .nr(8)
8987 .kr(8)
8988 .sr(1)
8989 .m(2)
8990 .n(n)
8991 .k(16)
8992 .iterations(1)
8993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8994 }
8995 }
8996
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16)8997 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
8998 TEST_REQUIRES_ARM_NEON;
8999 for (size_t k = 1; k < 16; k++) {
9000 GemmMicrokernelTester()
9001 .mr(2)
9002 .nr(8)
9003 .kr(8)
9004 .sr(1)
9005 .m(2)
9006 .n(8)
9007 .k(k)
9008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9009 }
9010 }
9011
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_subtile)9012 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
9013 TEST_REQUIRES_ARM_NEON;
9014 for (size_t k = 1; k < 16; k++) {
9015 for (uint32_t n = 1; n <= 8; n++) {
9016 for (uint32_t m = 1; m <= 2; m++) {
9017 GemmMicrokernelTester()
9018 .mr(2)
9019 .nr(8)
9020 .kr(8)
9021 .sr(1)
9022 .m(m)
9023 .n(n)
9024 .k(k)
9025 .iterations(1)
9026 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9027 }
9028 }
9029 }
9030 }
9031
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16)9032 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
9033 TEST_REQUIRES_ARM_NEON;
9034 for (size_t k = 17; k < 32; k++) {
9035 GemmMicrokernelTester()
9036 .mr(2)
9037 .nr(8)
9038 .kr(8)
9039 .sr(1)
9040 .m(2)
9041 .n(8)
9042 .k(k)
9043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9044 }
9045 }
9046
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_subtile)9047 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
9048 TEST_REQUIRES_ARM_NEON;
9049 for (size_t k = 17; k < 32; k++) {
9050 for (uint32_t n = 1; n <= 8; n++) {
9051 for (uint32_t m = 1; m <= 2; m++) {
9052 GemmMicrokernelTester()
9053 .mr(2)
9054 .nr(8)
9055 .kr(8)
9056 .sr(1)
9057 .m(m)
9058 .n(n)
9059 .k(k)
9060 .iterations(1)
9061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9062 }
9063 }
9064 }
9065 }
9066
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16)9067 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
9068 TEST_REQUIRES_ARM_NEON;
9069 for (size_t k = 32; k <= 160; k += 16) {
9070 GemmMicrokernelTester()
9071 .mr(2)
9072 .nr(8)
9073 .kr(8)
9074 .sr(1)
9075 .m(2)
9076 .n(8)
9077 .k(k)
9078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9079 }
9080 }
9081
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_subtile)9082 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
9083 TEST_REQUIRES_ARM_NEON;
9084 for (size_t k = 32; k <= 160; k += 16) {
9085 for (uint32_t n = 1; n <= 8; n++) {
9086 for (uint32_t m = 1; m <= 2; m++) {
9087 GemmMicrokernelTester()
9088 .mr(2)
9089 .nr(8)
9090 .kr(8)
9091 .sr(1)
9092 .m(m)
9093 .n(n)
9094 .k(k)
9095 .iterations(1)
9096 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9097 }
9098 }
9099 }
9100 }
9101
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8)9102 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
9103 TEST_REQUIRES_ARM_NEON;
9104 for (uint32_t n = 9; n < 16; n++) {
9105 for (size_t k = 1; k <= 80; k += 17) {
9106 GemmMicrokernelTester()
9107 .mr(2)
9108 .nr(8)
9109 .kr(8)
9110 .sr(1)
9111 .m(2)
9112 .n(n)
9113 .k(k)
9114 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9115 }
9116 }
9117 }
9118
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_cn)9119 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
9120 TEST_REQUIRES_ARM_NEON;
9121 for (uint32_t n = 9; n < 16; n++) {
9122 for (size_t k = 1; k <= 80; k += 17) {
9123 GemmMicrokernelTester()
9124 .mr(2)
9125 .nr(8)
9126 .kr(8)
9127 .sr(1)
9128 .m(2)
9129 .n(n)
9130 .k(k)
9131 .cn_stride(11)
9132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9133 }
9134 }
9135 }
9136
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_subtile)9137 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
9138 TEST_REQUIRES_ARM_NEON;
9139 for (uint32_t n = 9; n < 16; n++) {
9140 for (size_t k = 1; k <= 80; k += 17) {
9141 for (uint32_t m = 1; m <= 2; m++) {
9142 GemmMicrokernelTester()
9143 .mr(2)
9144 .nr(8)
9145 .kr(8)
9146 .sr(1)
9147 .m(m)
9148 .n(n)
9149 .k(k)
9150 .iterations(1)
9151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9152 }
9153 }
9154 }
9155 }
9156
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8)9157 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
9158 TEST_REQUIRES_ARM_NEON;
9159 for (uint32_t n = 16; n <= 24; n += 8) {
9160 for (size_t k = 1; k <= 80; k += 17) {
9161 GemmMicrokernelTester()
9162 .mr(2)
9163 .nr(8)
9164 .kr(8)
9165 .sr(1)
9166 .m(2)
9167 .n(n)
9168 .k(k)
9169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9170 }
9171 }
9172 }
9173
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_cn)9174 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
9175 TEST_REQUIRES_ARM_NEON;
9176 for (uint32_t n = 16; n <= 24; n += 8) {
9177 for (size_t k = 1; k <= 80; k += 17) {
9178 GemmMicrokernelTester()
9179 .mr(2)
9180 .nr(8)
9181 .kr(8)
9182 .sr(1)
9183 .m(2)
9184 .n(n)
9185 .k(k)
9186 .cn_stride(11)
9187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9188 }
9189 }
9190 }
9191
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_subtile)9192 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
9193 TEST_REQUIRES_ARM_NEON;
9194 for (uint32_t n = 16; n <= 24; n += 8) {
9195 for (size_t k = 1; k <= 80; k += 17) {
9196 for (uint32_t m = 1; m <= 2; m++) {
9197 GemmMicrokernelTester()
9198 .mr(2)
9199 .nr(8)
9200 .kr(8)
9201 .sr(1)
9202 .m(m)
9203 .n(n)
9204 .k(k)
9205 .iterations(1)
9206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9207 }
9208 }
9209 }
9210 }
9211
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel)9212 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel) {
9213 TEST_REQUIRES_ARM_NEON;
9214 for (size_t k = 1; k <= 80; k += 17) {
9215 GemmMicrokernelTester()
9216 .mr(2)
9217 .nr(8)
9218 .kr(8)
9219 .sr(1)
9220 .m(2)
9221 .n(8)
9222 .k(k)
9223 .ks(3)
9224 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9225 }
9226 }
9227
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel_subtile)9228 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel_subtile) {
9229 TEST_REQUIRES_ARM_NEON;
9230 for (size_t k = 1; k <= 80; k += 17) {
9231 for (uint32_t n = 1; n <= 8; n++) {
9232 for (uint32_t m = 1; m <= 2; m++) {
9233 GemmMicrokernelTester()
9234 .mr(2)
9235 .nr(8)
9236 .kr(8)
9237 .sr(1)
9238 .m(m)
9239 .n(n)
9240 .k(k)
9241 .ks(3)
9242 .iterations(1)
9243 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9244 }
9245 }
9246 }
9247 }
9248
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_small_kernel)9249 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_small_kernel) {
9250 TEST_REQUIRES_ARM_NEON;
9251 for (uint32_t n = 9; n < 16; n++) {
9252 for (size_t k = 1; k <= 80; k += 17) {
9253 GemmMicrokernelTester()
9254 .mr(2)
9255 .nr(8)
9256 .kr(8)
9257 .sr(1)
9258 .m(2)
9259 .n(n)
9260 .k(k)
9261 .ks(3)
9262 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9263 }
9264 }
9265 }
9266
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_small_kernel)9267 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_small_kernel) {
9268 TEST_REQUIRES_ARM_NEON;
9269 for (uint32_t n = 16; n <= 24; n += 8) {
9270 for (size_t k = 1; k <= 80; k += 17) {
9271 GemmMicrokernelTester()
9272 .mr(2)
9273 .nr(8)
9274 .kr(8)
9275 .sr(1)
9276 .m(2)
9277 .n(n)
9278 .k(k)
9279 .ks(3)
9280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9281 }
9282 }
9283 }
9284
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm_subtile)9285 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
9286 TEST_REQUIRES_ARM_NEON;
9287 for (size_t k = 1; k <= 80; k += 17) {
9288 for (uint32_t n = 1; n <= 8; n++) {
9289 for (uint32_t m = 1; m <= 2; m++) {
9290 GemmMicrokernelTester()
9291 .mr(2)
9292 .nr(8)
9293 .kr(8)
9294 .sr(1)
9295 .m(m)
9296 .n(n)
9297 .k(k)
9298 .cm_stride(11)
9299 .iterations(1)
9300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9301 }
9302 }
9303 }
9304 }
9305
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,a_offset)9306 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, a_offset) {
9307 TEST_REQUIRES_ARM_NEON;
9308 for (size_t k = 1; k <= 80; k += 17) {
9309 GemmMicrokernelTester()
9310 .mr(2)
9311 .nr(8)
9312 .kr(8)
9313 .sr(1)
9314 .m(2)
9315 .n(8)
9316 .k(k)
9317 .ks(3)
9318 .a_offset(163)
9319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9320 }
9321 }
9322
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,zero)9323 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, zero) {
9324 TEST_REQUIRES_ARM_NEON;
9325 for (size_t k = 1; k <= 80; k += 17) {
9326 for (uint32_t mz = 0; mz < 2; mz++) {
9327 GemmMicrokernelTester()
9328 .mr(2)
9329 .nr(8)
9330 .kr(8)
9331 .sr(1)
9332 .m(2)
9333 .n(8)
9334 .k(k)
9335 .ks(3)
9336 .a_offset(163)
9337 .zero_index(mz)
9338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9339 }
9340 }
9341 }
9342
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,qmin)9343 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
9344 TEST_REQUIRES_ARM_NEON;
9345 GemmMicrokernelTester()
9346 .mr(2)
9347 .nr(8)
9348 .kr(8)
9349 .sr(1)
9350 .m(2)
9351 .n(8)
9352 .k(16)
9353 .qmin(128)
9354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9355 }
9356
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,qmax)9357 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
9358 TEST_REQUIRES_ARM_NEON;
9359 GemmMicrokernelTester()
9360 .mr(2)
9361 .nr(8)
9362 .kr(8)
9363 .sr(1)
9364 .m(2)
9365 .n(8)
9366 .k(16)
9367 .qmax(128)
9368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9369 }
9370
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm)9371 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
9372 TEST_REQUIRES_ARM_NEON;
9373 GemmMicrokernelTester()
9374 .mr(2)
9375 .nr(8)
9376 .kr(8)
9377 .sr(1)
9378 .m(2)
9379 .n(8)
9380 .k(16)
9381 .cm_stride(11)
9382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9383 }
9384 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9385
9386
9387 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16)9388 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
9389 TEST_REQUIRES_ARM_NEON;
9390 GemmMicrokernelTester()
9391 .mr(2)
9392 .nr(8)
9393 .kr(8)
9394 .sr(1)
9395 .m(2)
9396 .n(8)
9397 .k(16)
9398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9399 }
9400
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cn)9401 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
9402 TEST_REQUIRES_ARM_NEON;
9403 GemmMicrokernelTester()
9404 .mr(2)
9405 .nr(8)
9406 .kr(8)
9407 .sr(1)
9408 .m(2)
9409 .n(8)
9410 .k(16)
9411 .cn_stride(11)
9412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9413 }
9414
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile)9415 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
9416 TEST_REQUIRES_ARM_NEON;
9417 for (uint32_t n = 1; n <= 8; n++) {
9418 for (uint32_t m = 1; m <= 2; m++) {
9419 GemmMicrokernelTester()
9420 .mr(2)
9421 .nr(8)
9422 .kr(8)
9423 .sr(1)
9424 .m(m)
9425 .n(n)
9426 .k(16)
9427 .iterations(1)
9428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9429 }
9430 }
9431 }
9432
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_m)9433 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
9434 TEST_REQUIRES_ARM_NEON;
9435 for (uint32_t m = 1; m <= 2; m++) {
9436 GemmMicrokernelTester()
9437 .mr(2)
9438 .nr(8)
9439 .kr(8)
9440 .sr(1)
9441 .m(m)
9442 .n(8)
9443 .k(16)
9444 .iterations(1)
9445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9446 }
9447 }
9448
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_n)9449 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
9450 TEST_REQUIRES_ARM_NEON;
9451 for (uint32_t n = 1; n <= 8; n++) {
9452 GemmMicrokernelTester()
9453 .mr(2)
9454 .nr(8)
9455 .kr(8)
9456 .sr(1)
9457 .m(2)
9458 .n(n)
9459 .k(16)
9460 .iterations(1)
9461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9462 }
9463 }
9464
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16)9465 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
9466 TEST_REQUIRES_ARM_NEON;
9467 for (size_t k = 1; k < 16; k++) {
9468 GemmMicrokernelTester()
9469 .mr(2)
9470 .nr(8)
9471 .kr(8)
9472 .sr(1)
9473 .m(2)
9474 .n(8)
9475 .k(k)
9476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9477 }
9478 }
9479
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_subtile)9480 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
9481 TEST_REQUIRES_ARM_NEON;
9482 for (size_t k = 1; k < 16; k++) {
9483 for (uint32_t n = 1; n <= 8; n++) {
9484 for (uint32_t m = 1; m <= 2; m++) {
9485 GemmMicrokernelTester()
9486 .mr(2)
9487 .nr(8)
9488 .kr(8)
9489 .sr(1)
9490 .m(m)
9491 .n(n)
9492 .k(k)
9493 .iterations(1)
9494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9495 }
9496 }
9497 }
9498 }
9499
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16)9500 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
9501 TEST_REQUIRES_ARM_NEON;
9502 for (size_t k = 17; k < 32; k++) {
9503 GemmMicrokernelTester()
9504 .mr(2)
9505 .nr(8)
9506 .kr(8)
9507 .sr(1)
9508 .m(2)
9509 .n(8)
9510 .k(k)
9511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9512 }
9513 }
9514
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_subtile)9515 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
9516 TEST_REQUIRES_ARM_NEON;
9517 for (size_t k = 17; k < 32; k++) {
9518 for (uint32_t n = 1; n <= 8; n++) {
9519 for (uint32_t m = 1; m <= 2; m++) {
9520 GemmMicrokernelTester()
9521 .mr(2)
9522 .nr(8)
9523 .kr(8)
9524 .sr(1)
9525 .m(m)
9526 .n(n)
9527 .k(k)
9528 .iterations(1)
9529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9530 }
9531 }
9532 }
9533 }
9534
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16)9535 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
9536 TEST_REQUIRES_ARM_NEON;
9537 for (size_t k = 32; k <= 160; k += 16) {
9538 GemmMicrokernelTester()
9539 .mr(2)
9540 .nr(8)
9541 .kr(8)
9542 .sr(1)
9543 .m(2)
9544 .n(8)
9545 .k(k)
9546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9547 }
9548 }
9549
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_subtile)9550 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
9551 TEST_REQUIRES_ARM_NEON;
9552 for (size_t k = 32; k <= 160; k += 16) {
9553 for (uint32_t n = 1; n <= 8; n++) {
9554 for (uint32_t m = 1; m <= 2; m++) {
9555 GemmMicrokernelTester()
9556 .mr(2)
9557 .nr(8)
9558 .kr(8)
9559 .sr(1)
9560 .m(m)
9561 .n(n)
9562 .k(k)
9563 .iterations(1)
9564 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9565 }
9566 }
9567 }
9568 }
9569
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8)9570 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
9571 TEST_REQUIRES_ARM_NEON;
9572 for (uint32_t n = 9; n < 16; n++) {
9573 for (size_t k = 1; k <= 80; k += 17) {
9574 GemmMicrokernelTester()
9575 .mr(2)
9576 .nr(8)
9577 .kr(8)
9578 .sr(1)
9579 .m(2)
9580 .n(n)
9581 .k(k)
9582 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9583 }
9584 }
9585 }
9586
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_cn)9587 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
9588 TEST_REQUIRES_ARM_NEON;
9589 for (uint32_t n = 9; n < 16; n++) {
9590 for (size_t k = 1; k <= 80; k += 17) {
9591 GemmMicrokernelTester()
9592 .mr(2)
9593 .nr(8)
9594 .kr(8)
9595 .sr(1)
9596 .m(2)
9597 .n(n)
9598 .k(k)
9599 .cn_stride(11)
9600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9601 }
9602 }
9603 }
9604
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_subtile)9605 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
9606 TEST_REQUIRES_ARM_NEON;
9607 for (uint32_t n = 9; n < 16; n++) {
9608 for (size_t k = 1; k <= 80; k += 17) {
9609 for (uint32_t m = 1; m <= 2; m++) {
9610 GemmMicrokernelTester()
9611 .mr(2)
9612 .nr(8)
9613 .kr(8)
9614 .sr(1)
9615 .m(m)
9616 .n(n)
9617 .k(k)
9618 .iterations(1)
9619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9620 }
9621 }
9622 }
9623 }
9624
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8)9625 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
9626 TEST_REQUIRES_ARM_NEON;
9627 for (uint32_t n = 16; n <= 24; n += 8) {
9628 for (size_t k = 1; k <= 80; k += 17) {
9629 GemmMicrokernelTester()
9630 .mr(2)
9631 .nr(8)
9632 .kr(8)
9633 .sr(1)
9634 .m(2)
9635 .n(n)
9636 .k(k)
9637 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9638 }
9639 }
9640 }
9641
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_cn)9642 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
9643 TEST_REQUIRES_ARM_NEON;
9644 for (uint32_t n = 16; n <= 24; n += 8) {
9645 for (size_t k = 1; k <= 80; k += 17) {
9646 GemmMicrokernelTester()
9647 .mr(2)
9648 .nr(8)
9649 .kr(8)
9650 .sr(1)
9651 .m(2)
9652 .n(n)
9653 .k(k)
9654 .cn_stride(11)
9655 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9656 }
9657 }
9658 }
9659
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_subtile)9660 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
9661 TEST_REQUIRES_ARM_NEON;
9662 for (uint32_t n = 16; n <= 24; n += 8) {
9663 for (size_t k = 1; k <= 80; k += 17) {
9664 for (uint32_t m = 1; m <= 2; m++) {
9665 GemmMicrokernelTester()
9666 .mr(2)
9667 .nr(8)
9668 .kr(8)
9669 .sr(1)
9670 .m(m)
9671 .n(n)
9672 .k(k)
9673 .iterations(1)
9674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9675 }
9676 }
9677 }
9678 }
9679
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel)9680 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel) {
9681 TEST_REQUIRES_ARM_NEON;
9682 for (size_t k = 1; k <= 80; k += 17) {
9683 GemmMicrokernelTester()
9684 .mr(2)
9685 .nr(8)
9686 .kr(8)
9687 .sr(1)
9688 .m(2)
9689 .n(8)
9690 .k(k)
9691 .ks(3)
9692 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9693 }
9694 }
9695
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel_subtile)9696 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel_subtile) {
9697 TEST_REQUIRES_ARM_NEON;
9698 for (size_t k = 1; k <= 80; k += 17) {
9699 for (uint32_t n = 1; n <= 8; n++) {
9700 for (uint32_t m = 1; m <= 2; m++) {
9701 GemmMicrokernelTester()
9702 .mr(2)
9703 .nr(8)
9704 .kr(8)
9705 .sr(1)
9706 .m(m)
9707 .n(n)
9708 .k(k)
9709 .ks(3)
9710 .iterations(1)
9711 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9712 }
9713 }
9714 }
9715 }
9716
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_small_kernel)9717 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
9718 TEST_REQUIRES_ARM_NEON;
9719 for (uint32_t n = 9; n < 16; n++) {
9720 for (size_t k = 1; k <= 80; k += 17) {
9721 GemmMicrokernelTester()
9722 .mr(2)
9723 .nr(8)
9724 .kr(8)
9725 .sr(1)
9726 .m(2)
9727 .n(n)
9728 .k(k)
9729 .ks(3)
9730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9731 }
9732 }
9733 }
9734
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_small_kernel)9735 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
9736 TEST_REQUIRES_ARM_NEON;
9737 for (uint32_t n = 16; n <= 24; n += 8) {
9738 for (size_t k = 1; k <= 80; k += 17) {
9739 GemmMicrokernelTester()
9740 .mr(2)
9741 .nr(8)
9742 .kr(8)
9743 .sr(1)
9744 .m(2)
9745 .n(n)
9746 .k(k)
9747 .ks(3)
9748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9749 }
9750 }
9751 }
9752
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm_subtile)9753 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
9754 TEST_REQUIRES_ARM_NEON;
9755 for (size_t k = 1; k <= 80; k += 17) {
9756 for (uint32_t n = 1; n <= 8; n++) {
9757 for (uint32_t m = 1; m <= 2; m++) {
9758 GemmMicrokernelTester()
9759 .mr(2)
9760 .nr(8)
9761 .kr(8)
9762 .sr(1)
9763 .m(m)
9764 .n(n)
9765 .k(k)
9766 .cm_stride(11)
9767 .iterations(1)
9768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9769 }
9770 }
9771 }
9772 }
9773
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,a_offset)9774 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, a_offset) {
9775 TEST_REQUIRES_ARM_NEON;
9776 for (size_t k = 1; k <= 80; k += 17) {
9777 GemmMicrokernelTester()
9778 .mr(2)
9779 .nr(8)
9780 .kr(8)
9781 .sr(1)
9782 .m(2)
9783 .n(8)
9784 .k(k)
9785 .ks(3)
9786 .a_offset(163)
9787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9788 }
9789 }
9790
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,zero)9791 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, zero) {
9792 TEST_REQUIRES_ARM_NEON;
9793 for (size_t k = 1; k <= 80; k += 17) {
9794 for (uint32_t mz = 0; mz < 2; mz++) {
9795 GemmMicrokernelTester()
9796 .mr(2)
9797 .nr(8)
9798 .kr(8)
9799 .sr(1)
9800 .m(2)
9801 .n(8)
9802 .k(k)
9803 .ks(3)
9804 .a_offset(163)
9805 .zero_index(mz)
9806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9807 }
9808 }
9809 }
9810
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmin)9811 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
9812 TEST_REQUIRES_ARM_NEON;
9813 GemmMicrokernelTester()
9814 .mr(2)
9815 .nr(8)
9816 .kr(8)
9817 .sr(1)
9818 .m(2)
9819 .n(8)
9820 .k(16)
9821 .qmin(128)
9822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9823 }
9824
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmax)9825 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
9826 TEST_REQUIRES_ARM_NEON;
9827 GemmMicrokernelTester()
9828 .mr(2)
9829 .nr(8)
9830 .kr(8)
9831 .sr(1)
9832 .m(2)
9833 .n(8)
9834 .k(16)
9835 .qmax(128)
9836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9837 }
9838
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm)9839 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
9840 TEST_REQUIRES_ARM_NEON;
9841 GemmMicrokernelTester()
9842 .mr(2)
9843 .nr(8)
9844 .kr(8)
9845 .sr(1)
9846 .m(2)
9847 .n(8)
9848 .k(16)
9849 .cm_stride(11)
9850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9851 }
9852 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9853
9854
9855 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)9856 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
9857 TEST_REQUIRES_ARM_NEON;
9858 GemmMicrokernelTester()
9859 .mr(4)
9860 .nr(16)
9861 .kr(1)
9862 .sr(1)
9863 .m(4)
9864 .n(16)
9865 .k(8)
9866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9867 }
9868
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cn)9869 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
9870 TEST_REQUIRES_ARM_NEON;
9871 GemmMicrokernelTester()
9872 .mr(4)
9873 .nr(16)
9874 .kr(1)
9875 .sr(1)
9876 .m(4)
9877 .n(16)
9878 .k(8)
9879 .cn_stride(19)
9880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9881 }
9882
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)9883 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (uint32_t n = 1; n <= 16; n++) {
9886 for (uint32_t m = 1; m <= 4; m++) {
9887 GemmMicrokernelTester()
9888 .mr(4)
9889 .nr(16)
9890 .kr(1)
9891 .sr(1)
9892 .m(m)
9893 .n(n)
9894 .k(8)
9895 .iterations(1)
9896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9897 }
9898 }
9899 }
9900
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)9901 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
9902 TEST_REQUIRES_ARM_NEON;
9903 for (uint32_t m = 1; m <= 4; m++) {
9904 GemmMicrokernelTester()
9905 .mr(4)
9906 .nr(16)
9907 .kr(1)
9908 .sr(1)
9909 .m(m)
9910 .n(16)
9911 .k(8)
9912 .iterations(1)
9913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9914 }
9915 }
9916
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)9917 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
9918 TEST_REQUIRES_ARM_NEON;
9919 for (uint32_t n = 1; n <= 16; n++) {
9920 GemmMicrokernelTester()
9921 .mr(4)
9922 .nr(16)
9923 .kr(1)
9924 .sr(1)
9925 .m(4)
9926 .n(n)
9927 .k(8)
9928 .iterations(1)
9929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9930 }
9931 }
9932
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)9933 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
9934 TEST_REQUIRES_ARM_NEON;
9935 for (size_t k = 1; k < 8; k++) {
9936 GemmMicrokernelTester()
9937 .mr(4)
9938 .nr(16)
9939 .kr(1)
9940 .sr(1)
9941 .m(4)
9942 .n(16)
9943 .k(k)
9944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9945 }
9946 }
9947
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)9948 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
9949 TEST_REQUIRES_ARM_NEON;
9950 for (size_t k = 1; k < 8; k++) {
9951 for (uint32_t n = 1; n <= 16; n++) {
9952 for (uint32_t m = 1; m <= 4; m++) {
9953 GemmMicrokernelTester()
9954 .mr(4)
9955 .nr(16)
9956 .kr(1)
9957 .sr(1)
9958 .m(m)
9959 .n(n)
9960 .k(k)
9961 .iterations(1)
9962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9963 }
9964 }
9965 }
9966 }
9967
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)9968 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
9969 TEST_REQUIRES_ARM_NEON;
9970 for (size_t k = 9; k < 16; k++) {
9971 GemmMicrokernelTester()
9972 .mr(4)
9973 .nr(16)
9974 .kr(1)
9975 .sr(1)
9976 .m(4)
9977 .n(16)
9978 .k(k)
9979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9980 }
9981 }
9982
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)9983 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
9984 TEST_REQUIRES_ARM_NEON;
9985 for (size_t k = 9; k < 16; k++) {
9986 for (uint32_t n = 1; n <= 16; n++) {
9987 for (uint32_t m = 1; m <= 4; m++) {
9988 GemmMicrokernelTester()
9989 .mr(4)
9990 .nr(16)
9991 .kr(1)
9992 .sr(1)
9993 .m(m)
9994 .n(n)
9995 .k(k)
9996 .iterations(1)
9997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9998 }
9999 }
10000 }
10001 }
10002
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8)10003 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
10004 TEST_REQUIRES_ARM_NEON;
10005 for (size_t k = 16; k <= 80; k += 8) {
10006 GemmMicrokernelTester()
10007 .mr(4)
10008 .nr(16)
10009 .kr(1)
10010 .sr(1)
10011 .m(4)
10012 .n(16)
10013 .k(k)
10014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10015 }
10016 }
10017
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)10018 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
10019 TEST_REQUIRES_ARM_NEON;
10020 for (size_t k = 16; k <= 80; k += 8) {
10021 for (uint32_t n = 1; n <= 16; n++) {
10022 for (uint32_t m = 1; m <= 4; m++) {
10023 GemmMicrokernelTester()
10024 .mr(4)
10025 .nr(16)
10026 .kr(1)
10027 .sr(1)
10028 .m(m)
10029 .n(n)
10030 .k(k)
10031 .iterations(1)
10032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10033 }
10034 }
10035 }
10036 }
10037
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16)10038 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
10039 TEST_REQUIRES_ARM_NEON;
10040 for (uint32_t n = 17; n < 32; n++) {
10041 for (size_t k = 1; k <= 40; k += 9) {
10042 GemmMicrokernelTester()
10043 .mr(4)
10044 .nr(16)
10045 .kr(1)
10046 .sr(1)
10047 .m(4)
10048 .n(n)
10049 .k(k)
10050 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10051 }
10052 }
10053 }
10054
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_strided_cn)10055 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
10056 TEST_REQUIRES_ARM_NEON;
10057 for (uint32_t n = 17; n < 32; n++) {
10058 for (size_t k = 1; k <= 40; k += 9) {
10059 GemmMicrokernelTester()
10060 .mr(4)
10061 .nr(16)
10062 .kr(1)
10063 .sr(1)
10064 .m(4)
10065 .n(n)
10066 .k(k)
10067 .cn_stride(19)
10068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10069 }
10070 }
10071 }
10072
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_subtile)10073 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
10074 TEST_REQUIRES_ARM_NEON;
10075 for (uint32_t n = 17; n < 32; n++) {
10076 for (size_t k = 1; k <= 40; k += 9) {
10077 for (uint32_t m = 1; m <= 4; m++) {
10078 GemmMicrokernelTester()
10079 .mr(4)
10080 .nr(16)
10081 .kr(1)
10082 .sr(1)
10083 .m(m)
10084 .n(n)
10085 .k(k)
10086 .iterations(1)
10087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10088 }
10089 }
10090 }
10091 }
10092
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16)10093 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
10094 TEST_REQUIRES_ARM_NEON;
10095 for (uint32_t n = 32; n <= 48; n += 16) {
10096 for (size_t k = 1; k <= 40; k += 9) {
10097 GemmMicrokernelTester()
10098 .mr(4)
10099 .nr(16)
10100 .kr(1)
10101 .sr(1)
10102 .m(4)
10103 .n(n)
10104 .k(k)
10105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10106 }
10107 }
10108 }
10109
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_strided_cn)10110 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
10111 TEST_REQUIRES_ARM_NEON;
10112 for (uint32_t n = 32; n <= 48; n += 16) {
10113 for (size_t k = 1; k <= 40; k += 9) {
10114 GemmMicrokernelTester()
10115 .mr(4)
10116 .nr(16)
10117 .kr(1)
10118 .sr(1)
10119 .m(4)
10120 .n(n)
10121 .k(k)
10122 .cn_stride(19)
10123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10124 }
10125 }
10126 }
10127
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_subtile)10128 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
10129 TEST_REQUIRES_ARM_NEON;
10130 for (uint32_t n = 32; n <= 48; n += 16) {
10131 for (size_t k = 1; k <= 40; k += 9) {
10132 for (uint32_t m = 1; m <= 4; m++) {
10133 GemmMicrokernelTester()
10134 .mr(4)
10135 .nr(16)
10136 .kr(1)
10137 .sr(1)
10138 .m(m)
10139 .n(n)
10140 .k(k)
10141 .iterations(1)
10142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10143 }
10144 }
10145 }
10146 }
10147
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel)10148 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel) {
10149 TEST_REQUIRES_ARM_NEON;
10150 for (size_t k = 1; k <= 40; k += 9) {
10151 GemmMicrokernelTester()
10152 .mr(4)
10153 .nr(16)
10154 .kr(1)
10155 .sr(1)
10156 .m(4)
10157 .n(16)
10158 .k(k)
10159 .ks(3)
10160 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10161 }
10162 }
10163
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel_subtile)10164 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
10165 TEST_REQUIRES_ARM_NEON;
10166 for (size_t k = 1; k <= 40; k += 9) {
10167 for (uint32_t n = 1; n <= 16; n++) {
10168 for (uint32_t m = 1; m <= 4; m++) {
10169 GemmMicrokernelTester()
10170 .mr(4)
10171 .nr(16)
10172 .kr(1)
10173 .sr(1)
10174 .m(m)
10175 .n(n)
10176 .k(k)
10177 .ks(3)
10178 .iterations(1)
10179 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10180 }
10181 }
10182 }
10183 }
10184
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_small_kernel)10185 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_small_kernel) {
10186 TEST_REQUIRES_ARM_NEON;
10187 for (uint32_t n = 17; n < 32; n++) {
10188 for (size_t k = 1; k <= 40; k += 9) {
10189 GemmMicrokernelTester()
10190 .mr(4)
10191 .nr(16)
10192 .kr(1)
10193 .sr(1)
10194 .m(4)
10195 .n(n)
10196 .k(k)
10197 .ks(3)
10198 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10199 }
10200 }
10201 }
10202
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_small_kernel)10203 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_small_kernel) {
10204 TEST_REQUIRES_ARM_NEON;
10205 for (uint32_t n = 32; n <= 48; n += 16) {
10206 for (size_t k = 1; k <= 40; k += 9) {
10207 GemmMicrokernelTester()
10208 .mr(4)
10209 .nr(16)
10210 .kr(1)
10211 .sr(1)
10212 .m(4)
10213 .n(n)
10214 .k(k)
10215 .ks(3)
10216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10217 }
10218 }
10219 }
10220
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)10221 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
10222 TEST_REQUIRES_ARM_NEON;
10223 for (size_t k = 1; k <= 40; k += 9) {
10224 for (uint32_t n = 1; n <= 16; n++) {
10225 for (uint32_t m = 1; m <= 4; m++) {
10226 GemmMicrokernelTester()
10227 .mr(4)
10228 .nr(16)
10229 .kr(1)
10230 .sr(1)
10231 .m(m)
10232 .n(n)
10233 .k(k)
10234 .cm_stride(19)
10235 .iterations(1)
10236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10237 }
10238 }
10239 }
10240 }
10241
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,a_offset)10242 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, a_offset) {
10243 TEST_REQUIRES_ARM_NEON;
10244 for (size_t k = 1; k <= 40; k += 9) {
10245 GemmMicrokernelTester()
10246 .mr(4)
10247 .nr(16)
10248 .kr(1)
10249 .sr(1)
10250 .m(4)
10251 .n(16)
10252 .k(k)
10253 .ks(3)
10254 .a_offset(163)
10255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10256 }
10257 }
10258
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,zero)10259 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, zero) {
10260 TEST_REQUIRES_ARM_NEON;
10261 for (size_t k = 1; k <= 40; k += 9) {
10262 for (uint32_t mz = 0; mz < 4; mz++) {
10263 GemmMicrokernelTester()
10264 .mr(4)
10265 .nr(16)
10266 .kr(1)
10267 .sr(1)
10268 .m(4)
10269 .n(16)
10270 .k(k)
10271 .ks(3)
10272 .a_offset(163)
10273 .zero_index(mz)
10274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10275 }
10276 }
10277 }
10278
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmin)10279 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
10280 TEST_REQUIRES_ARM_NEON;
10281 GemmMicrokernelTester()
10282 .mr(4)
10283 .nr(16)
10284 .kr(1)
10285 .sr(1)
10286 .m(4)
10287 .n(16)
10288 .k(8)
10289 .qmin(128)
10290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10291 }
10292
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmax)10293 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
10294 TEST_REQUIRES_ARM_NEON;
10295 GemmMicrokernelTester()
10296 .mr(4)
10297 .nr(16)
10298 .kr(1)
10299 .sr(1)
10300 .m(4)
10301 .n(16)
10302 .k(8)
10303 .qmax(128)
10304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10305 }
10306
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm)10307 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
10308 TEST_REQUIRES_ARM_NEON;
10309 GemmMicrokernelTester()
10310 .mr(4)
10311 .nr(16)
10312 .kr(1)
10313 .sr(1)
10314 .m(4)
10315 .n(16)
10316 .k(8)
10317 .cm_stride(19)
10318 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10319 }
10320 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
10321
10322
10323 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8)10324 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
10325 TEST_REQUIRES_ARM_NEON;
10326 GemmMicrokernelTester()
10327 .mr(4)
10328 .nr(16)
10329 .kr(1)
10330 .sr(1)
10331 .m(4)
10332 .n(16)
10333 .k(8)
10334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10335 }
10336
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cn)10337 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
10338 TEST_REQUIRES_ARM_NEON;
10339 GemmMicrokernelTester()
10340 .mr(4)
10341 .nr(16)
10342 .kr(1)
10343 .sr(1)
10344 .m(4)
10345 .n(16)
10346 .k(8)
10347 .cn_stride(19)
10348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10349 }
10350
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile)10351 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
10352 TEST_REQUIRES_ARM_NEON;
10353 for (uint32_t n = 1; n <= 16; n++) {
10354 for (uint32_t m = 1; m <= 4; m++) {
10355 GemmMicrokernelTester()
10356 .mr(4)
10357 .nr(16)
10358 .kr(1)
10359 .sr(1)
10360 .m(m)
10361 .n(n)
10362 .k(8)
10363 .iterations(1)
10364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10365 }
10366 }
10367 }
10368
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)10369 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
10370 TEST_REQUIRES_ARM_NEON;
10371 for (uint32_t m = 1; m <= 4; m++) {
10372 GemmMicrokernelTester()
10373 .mr(4)
10374 .nr(16)
10375 .kr(1)
10376 .sr(1)
10377 .m(m)
10378 .n(16)
10379 .k(8)
10380 .iterations(1)
10381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10382 }
10383 }
10384
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)10385 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
10386 TEST_REQUIRES_ARM_NEON;
10387 for (uint32_t n = 1; n <= 16; n++) {
10388 GemmMicrokernelTester()
10389 .mr(4)
10390 .nr(16)
10391 .kr(1)
10392 .sr(1)
10393 .m(4)
10394 .n(n)
10395 .k(8)
10396 .iterations(1)
10397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10398 }
10399 }
10400
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8)10401 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
10402 TEST_REQUIRES_ARM_NEON;
10403 for (size_t k = 1; k < 8; k++) {
10404 GemmMicrokernelTester()
10405 .mr(4)
10406 .nr(16)
10407 .kr(1)
10408 .sr(1)
10409 .m(4)
10410 .n(16)
10411 .k(k)
10412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10413 }
10414 }
10415
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_subtile)10416 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
10417 TEST_REQUIRES_ARM_NEON;
10418 for (size_t k = 1; k < 8; k++) {
10419 for (uint32_t n = 1; n <= 16; n++) {
10420 for (uint32_t m = 1; m <= 4; m++) {
10421 GemmMicrokernelTester()
10422 .mr(4)
10423 .nr(16)
10424 .kr(1)
10425 .sr(1)
10426 .m(m)
10427 .n(n)
10428 .k(k)
10429 .iterations(1)
10430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10431 }
10432 }
10433 }
10434 }
10435
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8)10436 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
10437 TEST_REQUIRES_ARM_NEON;
10438 for (size_t k = 9; k < 16; k++) {
10439 GemmMicrokernelTester()
10440 .mr(4)
10441 .nr(16)
10442 .kr(1)
10443 .sr(1)
10444 .m(4)
10445 .n(16)
10446 .k(k)
10447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10448 }
10449 }
10450
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_subtile)10451 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
10452 TEST_REQUIRES_ARM_NEON;
10453 for (size_t k = 9; k < 16; k++) {
10454 for (uint32_t n = 1; n <= 16; n++) {
10455 for (uint32_t m = 1; m <= 4; m++) {
10456 GemmMicrokernelTester()
10457 .mr(4)
10458 .nr(16)
10459 .kr(1)
10460 .sr(1)
10461 .m(m)
10462 .n(n)
10463 .k(k)
10464 .iterations(1)
10465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10466 }
10467 }
10468 }
10469 }
10470
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8)10471 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
10472 TEST_REQUIRES_ARM_NEON;
10473 for (size_t k = 16; k <= 80; k += 8) {
10474 GemmMicrokernelTester()
10475 .mr(4)
10476 .nr(16)
10477 .kr(1)
10478 .sr(1)
10479 .m(4)
10480 .n(16)
10481 .k(k)
10482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10483 }
10484 }
10485
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_subtile)10486 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
10487 TEST_REQUIRES_ARM_NEON;
10488 for (size_t k = 16; k <= 80; k += 8) {
10489 for (uint32_t n = 1; n <= 16; n++) {
10490 for (uint32_t m = 1; m <= 4; m++) {
10491 GemmMicrokernelTester()
10492 .mr(4)
10493 .nr(16)
10494 .kr(1)
10495 .sr(1)
10496 .m(m)
10497 .n(n)
10498 .k(k)
10499 .iterations(1)
10500 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10501 }
10502 }
10503 }
10504 }
10505
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16)10506 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
10507 TEST_REQUIRES_ARM_NEON;
10508 for (uint32_t n = 17; n < 32; n++) {
10509 for (size_t k = 1; k <= 40; k += 9) {
10510 GemmMicrokernelTester()
10511 .mr(4)
10512 .nr(16)
10513 .kr(1)
10514 .sr(1)
10515 .m(4)
10516 .n(n)
10517 .k(k)
10518 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10519 }
10520 }
10521 }
10522
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_strided_cn)10523 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
10524 TEST_REQUIRES_ARM_NEON;
10525 for (uint32_t n = 17; n < 32; n++) {
10526 for (size_t k = 1; k <= 40; k += 9) {
10527 GemmMicrokernelTester()
10528 .mr(4)
10529 .nr(16)
10530 .kr(1)
10531 .sr(1)
10532 .m(4)
10533 .n(n)
10534 .k(k)
10535 .cn_stride(19)
10536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10537 }
10538 }
10539 }
10540
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_subtile)10541 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
10542 TEST_REQUIRES_ARM_NEON;
10543 for (uint32_t n = 17; n < 32; n++) {
10544 for (size_t k = 1; k <= 40; k += 9) {
10545 for (uint32_t m = 1; m <= 4; m++) {
10546 GemmMicrokernelTester()
10547 .mr(4)
10548 .nr(16)
10549 .kr(1)
10550 .sr(1)
10551 .m(m)
10552 .n(n)
10553 .k(k)
10554 .iterations(1)
10555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10556 }
10557 }
10558 }
10559 }
10560
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16)10561 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
10562 TEST_REQUIRES_ARM_NEON;
10563 for (uint32_t n = 32; n <= 48; n += 16) {
10564 for (size_t k = 1; k <= 40; k += 9) {
10565 GemmMicrokernelTester()
10566 .mr(4)
10567 .nr(16)
10568 .kr(1)
10569 .sr(1)
10570 .m(4)
10571 .n(n)
10572 .k(k)
10573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10574 }
10575 }
10576 }
10577
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_strided_cn)10578 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
10579 TEST_REQUIRES_ARM_NEON;
10580 for (uint32_t n = 32; n <= 48; n += 16) {
10581 for (size_t k = 1; k <= 40; k += 9) {
10582 GemmMicrokernelTester()
10583 .mr(4)
10584 .nr(16)
10585 .kr(1)
10586 .sr(1)
10587 .m(4)
10588 .n(n)
10589 .k(k)
10590 .cn_stride(19)
10591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10592 }
10593 }
10594 }
10595
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_subtile)10596 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
10597 TEST_REQUIRES_ARM_NEON;
10598 for (uint32_t n = 32; n <= 48; n += 16) {
10599 for (size_t k = 1; k <= 40; k += 9) {
10600 for (uint32_t m = 1; m <= 4; m++) {
10601 GemmMicrokernelTester()
10602 .mr(4)
10603 .nr(16)
10604 .kr(1)
10605 .sr(1)
10606 .m(m)
10607 .n(n)
10608 .k(k)
10609 .iterations(1)
10610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10611 }
10612 }
10613 }
10614 }
10615
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel)10616 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
10617 TEST_REQUIRES_ARM_NEON;
10618 for (size_t k = 1; k <= 40; k += 9) {
10619 GemmMicrokernelTester()
10620 .mr(4)
10621 .nr(16)
10622 .kr(1)
10623 .sr(1)
10624 .m(4)
10625 .n(16)
10626 .k(k)
10627 .ks(3)
10628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10629 }
10630 }
10631
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel_subtile)10632 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
10633 TEST_REQUIRES_ARM_NEON;
10634 for (size_t k = 1; k <= 40; k += 9) {
10635 for (uint32_t n = 1; n <= 16; n++) {
10636 for (uint32_t m = 1; m <= 4; m++) {
10637 GemmMicrokernelTester()
10638 .mr(4)
10639 .nr(16)
10640 .kr(1)
10641 .sr(1)
10642 .m(m)
10643 .n(n)
10644 .k(k)
10645 .ks(3)
10646 .iterations(1)
10647 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10648 }
10649 }
10650 }
10651 }
10652
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_small_kernel)10653 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
10654 TEST_REQUIRES_ARM_NEON;
10655 for (uint32_t n = 17; n < 32; n++) {
10656 for (size_t k = 1; k <= 40; k += 9) {
10657 GemmMicrokernelTester()
10658 .mr(4)
10659 .nr(16)
10660 .kr(1)
10661 .sr(1)
10662 .m(4)
10663 .n(n)
10664 .k(k)
10665 .ks(3)
10666 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10667 }
10668 }
10669 }
10670
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_small_kernel)10671 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
10672 TEST_REQUIRES_ARM_NEON;
10673 for (uint32_t n = 32; n <= 48; n += 16) {
10674 for (size_t k = 1; k <= 40; k += 9) {
10675 GemmMicrokernelTester()
10676 .mr(4)
10677 .nr(16)
10678 .kr(1)
10679 .sr(1)
10680 .m(4)
10681 .n(n)
10682 .k(k)
10683 .ks(3)
10684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10685 }
10686 }
10687 }
10688
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm_subtile)10689 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
10690 TEST_REQUIRES_ARM_NEON;
10691 for (size_t k = 1; k <= 40; k += 9) {
10692 for (uint32_t n = 1; n <= 16; n++) {
10693 for (uint32_t m = 1; m <= 4; m++) {
10694 GemmMicrokernelTester()
10695 .mr(4)
10696 .nr(16)
10697 .kr(1)
10698 .sr(1)
10699 .m(m)
10700 .n(n)
10701 .k(k)
10702 .cm_stride(19)
10703 .iterations(1)
10704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10705 }
10706 }
10707 }
10708 }
10709
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,a_offset)10710 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
10711 TEST_REQUIRES_ARM_NEON;
10712 for (size_t k = 1; k <= 40; k += 9) {
10713 GemmMicrokernelTester()
10714 .mr(4)
10715 .nr(16)
10716 .kr(1)
10717 .sr(1)
10718 .m(4)
10719 .n(16)
10720 .k(k)
10721 .ks(3)
10722 .a_offset(163)
10723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10724 }
10725 }
10726
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,zero)10727 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
10728 TEST_REQUIRES_ARM_NEON;
10729 for (size_t k = 1; k <= 40; k += 9) {
10730 for (uint32_t mz = 0; mz < 4; mz++) {
10731 GemmMicrokernelTester()
10732 .mr(4)
10733 .nr(16)
10734 .kr(1)
10735 .sr(1)
10736 .m(4)
10737 .n(16)
10738 .k(k)
10739 .ks(3)
10740 .a_offset(163)
10741 .zero_index(mz)
10742 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10743 }
10744 }
10745 }
10746
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmin)10747 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
10748 TEST_REQUIRES_ARM_NEON;
10749 GemmMicrokernelTester()
10750 .mr(4)
10751 .nr(16)
10752 .kr(1)
10753 .sr(1)
10754 .m(4)
10755 .n(16)
10756 .k(8)
10757 .qmin(128)
10758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10759 }
10760
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmax)10761 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
10762 TEST_REQUIRES_ARM_NEON;
10763 GemmMicrokernelTester()
10764 .mr(4)
10765 .nr(16)
10766 .kr(1)
10767 .sr(1)
10768 .m(4)
10769 .n(16)
10770 .k(8)
10771 .qmax(128)
10772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10773 }
10774
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm)10775 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
10776 TEST_REQUIRES_ARM_NEON;
10777 GemmMicrokernelTester()
10778 .mr(4)
10779 .nr(16)
10780 .kr(1)
10781 .sr(1)
10782 .m(4)
10783 .n(16)
10784 .k(8)
10785 .cm_stride(19)
10786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10787 }
10788 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
10789
10790
10791 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)10792 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
10793 TEST_REQUIRES_ARM_NEON;
10794 GemmMicrokernelTester()
10795 .mr(4)
10796 .nr(16)
10797 .kr(1)
10798 .sr(1)
10799 .m(4)
10800 .n(16)
10801 .k(8)
10802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10803 }
10804
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cn)10805 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
10806 TEST_REQUIRES_ARM_NEON;
10807 GemmMicrokernelTester()
10808 .mr(4)
10809 .nr(16)
10810 .kr(1)
10811 .sr(1)
10812 .m(4)
10813 .n(16)
10814 .k(8)
10815 .cn_stride(19)
10816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10817 }
10818
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)10819 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
10820 TEST_REQUIRES_ARM_NEON;
10821 for (uint32_t n = 1; n <= 16; n++) {
10822 for (uint32_t m = 1; m <= 4; m++) {
10823 GemmMicrokernelTester()
10824 .mr(4)
10825 .nr(16)
10826 .kr(1)
10827 .sr(1)
10828 .m(m)
10829 .n(n)
10830 .k(8)
10831 .iterations(1)
10832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10833 }
10834 }
10835 }
10836
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)10837 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
10838 TEST_REQUIRES_ARM_NEON;
10839 for (uint32_t m = 1; m <= 4; m++) {
10840 GemmMicrokernelTester()
10841 .mr(4)
10842 .nr(16)
10843 .kr(1)
10844 .sr(1)
10845 .m(m)
10846 .n(16)
10847 .k(8)
10848 .iterations(1)
10849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10850 }
10851 }
10852
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)10853 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
10854 TEST_REQUIRES_ARM_NEON;
10855 for (uint32_t n = 1; n <= 16; n++) {
10856 GemmMicrokernelTester()
10857 .mr(4)
10858 .nr(16)
10859 .kr(1)
10860 .sr(1)
10861 .m(4)
10862 .n(n)
10863 .k(8)
10864 .iterations(1)
10865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10866 }
10867 }
10868
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)10869 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
10870 TEST_REQUIRES_ARM_NEON;
10871 for (size_t k = 1; k < 8; k++) {
10872 GemmMicrokernelTester()
10873 .mr(4)
10874 .nr(16)
10875 .kr(1)
10876 .sr(1)
10877 .m(4)
10878 .n(16)
10879 .k(k)
10880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10881 }
10882 }
10883
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)10884 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
10885 TEST_REQUIRES_ARM_NEON;
10886 for (size_t k = 1; k < 8; k++) {
10887 for (uint32_t n = 1; n <= 16; n++) {
10888 for (uint32_t m = 1; m <= 4; m++) {
10889 GemmMicrokernelTester()
10890 .mr(4)
10891 .nr(16)
10892 .kr(1)
10893 .sr(1)
10894 .m(m)
10895 .n(n)
10896 .k(k)
10897 .iterations(1)
10898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10899 }
10900 }
10901 }
10902 }
10903
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)10904 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
10905 TEST_REQUIRES_ARM_NEON;
10906 for (size_t k = 9; k < 16; k++) {
10907 GemmMicrokernelTester()
10908 .mr(4)
10909 .nr(16)
10910 .kr(1)
10911 .sr(1)
10912 .m(4)
10913 .n(16)
10914 .k(k)
10915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10916 }
10917 }
10918
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)10919 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
10920 TEST_REQUIRES_ARM_NEON;
10921 for (size_t k = 9; k < 16; k++) {
10922 for (uint32_t n = 1; n <= 16; n++) {
10923 for (uint32_t m = 1; m <= 4; m++) {
10924 GemmMicrokernelTester()
10925 .mr(4)
10926 .nr(16)
10927 .kr(1)
10928 .sr(1)
10929 .m(m)
10930 .n(n)
10931 .k(k)
10932 .iterations(1)
10933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10934 }
10935 }
10936 }
10937 }
10938
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8)10939 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
10940 TEST_REQUIRES_ARM_NEON;
10941 for (size_t k = 16; k <= 80; k += 8) {
10942 GemmMicrokernelTester()
10943 .mr(4)
10944 .nr(16)
10945 .kr(1)
10946 .sr(1)
10947 .m(4)
10948 .n(16)
10949 .k(k)
10950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10951 }
10952 }
10953
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)10954 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
10955 TEST_REQUIRES_ARM_NEON;
10956 for (size_t k = 16; k <= 80; k += 8) {
10957 for (uint32_t n = 1; n <= 16; n++) {
10958 for (uint32_t m = 1; m <= 4; m++) {
10959 GemmMicrokernelTester()
10960 .mr(4)
10961 .nr(16)
10962 .kr(1)
10963 .sr(1)
10964 .m(m)
10965 .n(n)
10966 .k(k)
10967 .iterations(1)
10968 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10969 }
10970 }
10971 }
10972 }
10973
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16)10974 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
10975 TEST_REQUIRES_ARM_NEON;
10976 for (uint32_t n = 17; n < 32; n++) {
10977 for (size_t k = 1; k <= 40; k += 9) {
10978 GemmMicrokernelTester()
10979 .mr(4)
10980 .nr(16)
10981 .kr(1)
10982 .sr(1)
10983 .m(4)
10984 .n(n)
10985 .k(k)
10986 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10987 }
10988 }
10989 }
10990
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_strided_cn)10991 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
10992 TEST_REQUIRES_ARM_NEON;
10993 for (uint32_t n = 17; n < 32; n++) {
10994 for (size_t k = 1; k <= 40; k += 9) {
10995 GemmMicrokernelTester()
10996 .mr(4)
10997 .nr(16)
10998 .kr(1)
10999 .sr(1)
11000 .m(4)
11001 .n(n)
11002 .k(k)
11003 .cn_stride(19)
11004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11005 }
11006 }
11007 }
11008
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_subtile)11009 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
11010 TEST_REQUIRES_ARM_NEON;
11011 for (uint32_t n = 17; n < 32; n++) {
11012 for (size_t k = 1; k <= 40; k += 9) {
11013 for (uint32_t m = 1; m <= 4; m++) {
11014 GemmMicrokernelTester()
11015 .mr(4)
11016 .nr(16)
11017 .kr(1)
11018 .sr(1)
11019 .m(m)
11020 .n(n)
11021 .k(k)
11022 .iterations(1)
11023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11024 }
11025 }
11026 }
11027 }
11028
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16)11029 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
11030 TEST_REQUIRES_ARM_NEON;
11031 for (uint32_t n = 32; n <= 48; n += 16) {
11032 for (size_t k = 1; k <= 40; k += 9) {
11033 GemmMicrokernelTester()
11034 .mr(4)
11035 .nr(16)
11036 .kr(1)
11037 .sr(1)
11038 .m(4)
11039 .n(n)
11040 .k(k)
11041 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11042 }
11043 }
11044 }
11045
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_strided_cn)11046 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
11047 TEST_REQUIRES_ARM_NEON;
11048 for (uint32_t n = 32; n <= 48; n += 16) {
11049 for (size_t k = 1; k <= 40; k += 9) {
11050 GemmMicrokernelTester()
11051 .mr(4)
11052 .nr(16)
11053 .kr(1)
11054 .sr(1)
11055 .m(4)
11056 .n(n)
11057 .k(k)
11058 .cn_stride(19)
11059 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11060 }
11061 }
11062 }
11063
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_subtile)11064 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
11065 TEST_REQUIRES_ARM_NEON;
11066 for (uint32_t n = 32; n <= 48; n += 16) {
11067 for (size_t k = 1; k <= 40; k += 9) {
11068 for (uint32_t m = 1; m <= 4; m++) {
11069 GemmMicrokernelTester()
11070 .mr(4)
11071 .nr(16)
11072 .kr(1)
11073 .sr(1)
11074 .m(m)
11075 .n(n)
11076 .k(k)
11077 .iterations(1)
11078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11079 }
11080 }
11081 }
11082 }
11083
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel)11084 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
11085 TEST_REQUIRES_ARM_NEON;
11086 for (size_t k = 1; k <= 40; k += 9) {
11087 GemmMicrokernelTester()
11088 .mr(4)
11089 .nr(16)
11090 .kr(1)
11091 .sr(1)
11092 .m(4)
11093 .n(16)
11094 .k(k)
11095 .ks(3)
11096 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11097 }
11098 }
11099
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)11100 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
11101 TEST_REQUIRES_ARM_NEON;
11102 for (size_t k = 1; k <= 40; k += 9) {
11103 for (uint32_t n = 1; n <= 16; n++) {
11104 for (uint32_t m = 1; m <= 4; m++) {
11105 GemmMicrokernelTester()
11106 .mr(4)
11107 .nr(16)
11108 .kr(1)
11109 .sr(1)
11110 .m(m)
11111 .n(n)
11112 .k(k)
11113 .ks(3)
11114 .iterations(1)
11115 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11116 }
11117 }
11118 }
11119 }
11120
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_small_kernel)11121 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_small_kernel) {
11122 TEST_REQUIRES_ARM_NEON;
11123 for (uint32_t n = 17; n < 32; n++) {
11124 for (size_t k = 1; k <= 40; k += 9) {
11125 GemmMicrokernelTester()
11126 .mr(4)
11127 .nr(16)
11128 .kr(1)
11129 .sr(1)
11130 .m(4)
11131 .n(n)
11132 .k(k)
11133 .ks(3)
11134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11135 }
11136 }
11137 }
11138
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_small_kernel)11139 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_small_kernel) {
11140 TEST_REQUIRES_ARM_NEON;
11141 for (uint32_t n = 32; n <= 48; n += 16) {
11142 for (size_t k = 1; k <= 40; k += 9) {
11143 GemmMicrokernelTester()
11144 .mr(4)
11145 .nr(16)
11146 .kr(1)
11147 .sr(1)
11148 .m(4)
11149 .n(n)
11150 .k(k)
11151 .ks(3)
11152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11153 }
11154 }
11155 }
11156
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)11157 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
11158 TEST_REQUIRES_ARM_NEON;
11159 for (size_t k = 1; k <= 40; k += 9) {
11160 for (uint32_t n = 1; n <= 16; n++) {
11161 for (uint32_t m = 1; m <= 4; m++) {
11162 GemmMicrokernelTester()
11163 .mr(4)
11164 .nr(16)
11165 .kr(1)
11166 .sr(1)
11167 .m(m)
11168 .n(n)
11169 .k(k)
11170 .cm_stride(19)
11171 .iterations(1)
11172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11173 }
11174 }
11175 }
11176 }
11177
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,a_offset)11178 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
11179 TEST_REQUIRES_ARM_NEON;
11180 for (size_t k = 1; k <= 40; k += 9) {
11181 GemmMicrokernelTester()
11182 .mr(4)
11183 .nr(16)
11184 .kr(1)
11185 .sr(1)
11186 .m(4)
11187 .n(16)
11188 .k(k)
11189 .ks(3)
11190 .a_offset(163)
11191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11192 }
11193 }
11194
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,zero)11195 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
11196 TEST_REQUIRES_ARM_NEON;
11197 for (size_t k = 1; k <= 40; k += 9) {
11198 for (uint32_t mz = 0; mz < 4; mz++) {
11199 GemmMicrokernelTester()
11200 .mr(4)
11201 .nr(16)
11202 .kr(1)
11203 .sr(1)
11204 .m(4)
11205 .n(16)
11206 .k(k)
11207 .ks(3)
11208 .a_offset(163)
11209 .zero_index(mz)
11210 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11211 }
11212 }
11213 }
11214
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmin)11215 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
11216 TEST_REQUIRES_ARM_NEON;
11217 GemmMicrokernelTester()
11218 .mr(4)
11219 .nr(16)
11220 .kr(1)
11221 .sr(1)
11222 .m(4)
11223 .n(16)
11224 .k(8)
11225 .qmin(128)
11226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11227 }
11228
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmax)11229 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
11230 TEST_REQUIRES_ARM_NEON;
11231 GemmMicrokernelTester()
11232 .mr(4)
11233 .nr(16)
11234 .kr(1)
11235 .sr(1)
11236 .m(4)
11237 .n(16)
11238 .k(8)
11239 .qmax(128)
11240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11241 }
11242
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm)11243 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
11244 TEST_REQUIRES_ARM_NEON;
11245 GemmMicrokernelTester()
11246 .mr(4)
11247 .nr(16)
11248 .kr(1)
11249 .sr(1)
11250 .m(4)
11251 .n(16)
11252 .k(8)
11253 .cm_stride(19)
11254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11255 }
11256 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
11257
11258
11259 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8)11260 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
11261 TEST_REQUIRES_ARM_NEON_DOT;
11262 GemmMicrokernelTester()
11263 .mr(4)
11264 .nr(16)
11265 .kr(4)
11266 .sr(1)
11267 .m(4)
11268 .n(16)
11269 .k(8)
11270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11271 }
11272
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cn)11273 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
11274 TEST_REQUIRES_ARM_NEON_DOT;
11275 GemmMicrokernelTester()
11276 .mr(4)
11277 .nr(16)
11278 .kr(4)
11279 .sr(1)
11280 .m(4)
11281 .n(16)
11282 .k(8)
11283 .cn_stride(19)
11284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11285 }
11286
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile)11287 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
11288 TEST_REQUIRES_ARM_NEON_DOT;
11289 for (uint32_t n = 1; n <= 16; n++) {
11290 for (uint32_t m = 1; m <= 4; m++) {
11291 GemmMicrokernelTester()
11292 .mr(4)
11293 .nr(16)
11294 .kr(4)
11295 .sr(1)
11296 .m(m)
11297 .n(n)
11298 .k(8)
11299 .iterations(1)
11300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11301 }
11302 }
11303 }
11304
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_m)11305 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
11306 TEST_REQUIRES_ARM_NEON_DOT;
11307 for (uint32_t m = 1; m <= 4; m++) {
11308 GemmMicrokernelTester()
11309 .mr(4)
11310 .nr(16)
11311 .kr(4)
11312 .sr(1)
11313 .m(m)
11314 .n(16)
11315 .k(8)
11316 .iterations(1)
11317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11318 }
11319 }
11320
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_n)11321 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
11322 TEST_REQUIRES_ARM_NEON_DOT;
11323 for (uint32_t n = 1; n <= 16; n++) {
11324 GemmMicrokernelTester()
11325 .mr(4)
11326 .nr(16)
11327 .kr(4)
11328 .sr(1)
11329 .m(4)
11330 .n(n)
11331 .k(8)
11332 .iterations(1)
11333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11334 }
11335 }
11336
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8)11337 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
11338 TEST_REQUIRES_ARM_NEON_DOT;
11339 for (size_t k = 1; k < 8; k++) {
11340 GemmMicrokernelTester()
11341 .mr(4)
11342 .nr(16)
11343 .kr(4)
11344 .sr(1)
11345 .m(4)
11346 .n(16)
11347 .k(k)
11348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11349 }
11350 }
11351
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8_subtile)11352 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
11353 TEST_REQUIRES_ARM_NEON_DOT;
11354 for (size_t k = 1; k < 8; k++) {
11355 for (uint32_t n = 1; n <= 16; n++) {
11356 for (uint32_t m = 1; m <= 4; m++) {
11357 GemmMicrokernelTester()
11358 .mr(4)
11359 .nr(16)
11360 .kr(4)
11361 .sr(1)
11362 .m(m)
11363 .n(n)
11364 .k(k)
11365 .iterations(1)
11366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11367 }
11368 }
11369 }
11370 }
11371
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8)11372 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
11373 TEST_REQUIRES_ARM_NEON_DOT;
11374 for (size_t k = 9; k < 16; k++) {
11375 GemmMicrokernelTester()
11376 .mr(4)
11377 .nr(16)
11378 .kr(4)
11379 .sr(1)
11380 .m(4)
11381 .n(16)
11382 .k(k)
11383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11384 }
11385 }
11386
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8_subtile)11387 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
11388 TEST_REQUIRES_ARM_NEON_DOT;
11389 for (size_t k = 9; k < 16; k++) {
11390 for (uint32_t n = 1; n <= 16; n++) {
11391 for (uint32_t m = 1; m <= 4; m++) {
11392 GemmMicrokernelTester()
11393 .mr(4)
11394 .nr(16)
11395 .kr(4)
11396 .sr(1)
11397 .m(m)
11398 .n(n)
11399 .k(k)
11400 .iterations(1)
11401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11402 }
11403 }
11404 }
11405 }
11406
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_div_8)11407 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
11408 TEST_REQUIRES_ARM_NEON_DOT;
11409 for (size_t k = 16; k <= 80; k += 8) {
11410 GemmMicrokernelTester()
11411 .mr(4)
11412 .nr(16)
11413 .kr(4)
11414 .sr(1)
11415 .m(4)
11416 .n(16)
11417 .k(k)
11418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11419 }
11420 }
11421
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_div_8_subtile)11422 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
11423 TEST_REQUIRES_ARM_NEON_DOT;
11424 for (size_t k = 16; k <= 80; k += 8) {
11425 for (uint32_t n = 1; n <= 16; n++) {
11426 for (uint32_t m = 1; m <= 4; m++) {
11427 GemmMicrokernelTester()
11428 .mr(4)
11429 .nr(16)
11430 .kr(4)
11431 .sr(1)
11432 .m(m)
11433 .n(n)
11434 .k(k)
11435 .iterations(1)
11436 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11437 }
11438 }
11439 }
11440 }
11441
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16)11442 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
11443 TEST_REQUIRES_ARM_NEON_DOT;
11444 for (uint32_t n = 17; n < 32; n++) {
11445 for (size_t k = 1; k <= 40; k += 9) {
11446 GemmMicrokernelTester()
11447 .mr(4)
11448 .nr(16)
11449 .kr(4)
11450 .sr(1)
11451 .m(4)
11452 .n(n)
11453 .k(k)
11454 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11455 }
11456 }
11457 }
11458
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_strided_cn)11459 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
11460 TEST_REQUIRES_ARM_NEON_DOT;
11461 for (uint32_t n = 17; n < 32; n++) {
11462 for (size_t k = 1; k <= 40; k += 9) {
11463 GemmMicrokernelTester()
11464 .mr(4)
11465 .nr(16)
11466 .kr(4)
11467 .sr(1)
11468 .m(4)
11469 .n(n)
11470 .k(k)
11471 .cn_stride(19)
11472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11473 }
11474 }
11475 }
11476
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_subtile)11477 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
11478 TEST_REQUIRES_ARM_NEON_DOT;
11479 for (uint32_t n = 17; n < 32; n++) {
11480 for (size_t k = 1; k <= 40; k += 9) {
11481 for (uint32_t m = 1; m <= 4; m++) {
11482 GemmMicrokernelTester()
11483 .mr(4)
11484 .nr(16)
11485 .kr(4)
11486 .sr(1)
11487 .m(m)
11488 .n(n)
11489 .k(k)
11490 .iterations(1)
11491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11492 }
11493 }
11494 }
11495 }
11496
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16)11497 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
11498 TEST_REQUIRES_ARM_NEON_DOT;
11499 for (uint32_t n = 32; n <= 48; n += 16) {
11500 for (size_t k = 1; k <= 40; k += 9) {
11501 GemmMicrokernelTester()
11502 .mr(4)
11503 .nr(16)
11504 .kr(4)
11505 .sr(1)
11506 .m(4)
11507 .n(n)
11508 .k(k)
11509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11510 }
11511 }
11512 }
11513
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_strided_cn)11514 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
11515 TEST_REQUIRES_ARM_NEON_DOT;
11516 for (uint32_t n = 32; n <= 48; n += 16) {
11517 for (size_t k = 1; k <= 40; k += 9) {
11518 GemmMicrokernelTester()
11519 .mr(4)
11520 .nr(16)
11521 .kr(4)
11522 .sr(1)
11523 .m(4)
11524 .n(n)
11525 .k(k)
11526 .cn_stride(19)
11527 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11528 }
11529 }
11530 }
11531
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_subtile)11532 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
11533 TEST_REQUIRES_ARM_NEON_DOT;
11534 for (uint32_t n = 32; n <= 48; n += 16) {
11535 for (size_t k = 1; k <= 40; k += 9) {
11536 for (uint32_t m = 1; m <= 4; m++) {
11537 GemmMicrokernelTester()
11538 .mr(4)
11539 .nr(16)
11540 .kr(4)
11541 .sr(1)
11542 .m(m)
11543 .n(n)
11544 .k(k)
11545 .iterations(1)
11546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11547 }
11548 }
11549 }
11550 }
11551
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,small_kernel)11552 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, small_kernel) {
11553 TEST_REQUIRES_ARM_NEON_DOT;
11554 for (size_t k = 1; k <= 40; k += 9) {
11555 GemmMicrokernelTester()
11556 .mr(4)
11557 .nr(16)
11558 .kr(4)
11559 .sr(1)
11560 .m(4)
11561 .n(16)
11562 .k(k)
11563 .ks(3)
11564 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11565 }
11566 }
11567
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,small_kernel_subtile)11568 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, small_kernel_subtile) {
11569 TEST_REQUIRES_ARM_NEON_DOT;
11570 for (size_t k = 1; k <= 40; k += 9) {
11571 for (uint32_t n = 1; n <= 16; n++) {
11572 for (uint32_t m = 1; m <= 4; m++) {
11573 GemmMicrokernelTester()
11574 .mr(4)
11575 .nr(16)
11576 .kr(4)
11577 .sr(1)
11578 .m(m)
11579 .n(n)
11580 .k(k)
11581 .ks(3)
11582 .iterations(1)
11583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11584 }
11585 }
11586 }
11587 }
11588
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_small_kernel)11589 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_small_kernel) {
11590 TEST_REQUIRES_ARM_NEON_DOT;
11591 for (uint32_t n = 17; n < 32; n++) {
11592 for (size_t k = 1; k <= 40; k += 9) {
11593 GemmMicrokernelTester()
11594 .mr(4)
11595 .nr(16)
11596 .kr(4)
11597 .sr(1)
11598 .m(4)
11599 .n(n)
11600 .k(k)
11601 .ks(3)
11602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11603 }
11604 }
11605 }
11606
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_small_kernel)11607 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_small_kernel) {
11608 TEST_REQUIRES_ARM_NEON_DOT;
11609 for (uint32_t n = 32; n <= 48; n += 16) {
11610 for (size_t k = 1; k <= 40; k += 9) {
11611 GemmMicrokernelTester()
11612 .mr(4)
11613 .nr(16)
11614 .kr(4)
11615 .sr(1)
11616 .m(4)
11617 .n(n)
11618 .k(k)
11619 .ks(3)
11620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11621 }
11622 }
11623 }
11624
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cm_subtile)11625 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
11626 TEST_REQUIRES_ARM_NEON_DOT;
11627 for (size_t k = 1; k <= 40; k += 9) {
11628 for (uint32_t n = 1; n <= 16; n++) {
11629 for (uint32_t m = 1; m <= 4; m++) {
11630 GemmMicrokernelTester()
11631 .mr(4)
11632 .nr(16)
11633 .kr(4)
11634 .sr(1)
11635 .m(m)
11636 .n(n)
11637 .k(k)
11638 .cm_stride(19)
11639 .iterations(1)
11640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11641 }
11642 }
11643 }
11644 }
11645
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,a_offset)11646 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, a_offset) {
11647 TEST_REQUIRES_ARM_NEON_DOT;
11648 for (size_t k = 1; k <= 40; k += 9) {
11649 GemmMicrokernelTester()
11650 .mr(4)
11651 .nr(16)
11652 .kr(4)
11653 .sr(1)
11654 .m(4)
11655 .n(16)
11656 .k(k)
11657 .ks(3)
11658 .a_offset(163)
11659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11660 }
11661 }
11662
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,zero)11663 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, zero) {
11664 TEST_REQUIRES_ARM_NEON_DOT;
11665 for (size_t k = 1; k <= 40; k += 9) {
11666 for (uint32_t mz = 0; mz < 4; mz++) {
11667 GemmMicrokernelTester()
11668 .mr(4)
11669 .nr(16)
11670 .kr(4)
11671 .sr(1)
11672 .m(4)
11673 .n(16)
11674 .k(k)
11675 .ks(3)
11676 .a_offset(163)
11677 .zero_index(mz)
11678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11679 }
11680 }
11681 }
11682
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,qmin)11683 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
11684 TEST_REQUIRES_ARM_NEON_DOT;
11685 GemmMicrokernelTester()
11686 .mr(4)
11687 .nr(16)
11688 .kr(4)
11689 .sr(1)
11690 .m(4)
11691 .n(16)
11692 .k(8)
11693 .qmin(128)
11694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11695 }
11696
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,qmax)11697 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
11698 TEST_REQUIRES_ARM_NEON_DOT;
11699 GemmMicrokernelTester()
11700 .mr(4)
11701 .nr(16)
11702 .kr(4)
11703 .sr(1)
11704 .m(4)
11705 .n(16)
11706 .k(8)
11707 .qmax(128)
11708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11709 }
11710
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cm)11711 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
11712 TEST_REQUIRES_ARM_NEON_DOT;
11713 GemmMicrokernelTester()
11714 .mr(4)
11715 .nr(16)
11716 .kr(4)
11717 .sr(1)
11718 .m(4)
11719 .n(16)
11720 .k(8)
11721 .cm_stride(19)
11722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11723 }
11724 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
11725
11726
11727 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)11728 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
11729 TEST_REQUIRES_ARM_NEON_DOT;
11730 GemmMicrokernelTester()
11731 .mr(4)
11732 .nr(16)
11733 .kr(4)
11734 .sr(1)
11735 .m(4)
11736 .n(16)
11737 .k(16)
11738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11739 }
11740
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)11741 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
11742 TEST_REQUIRES_ARM_NEON_DOT;
11743 GemmMicrokernelTester()
11744 .mr(4)
11745 .nr(16)
11746 .kr(4)
11747 .sr(1)
11748 .m(4)
11749 .n(16)
11750 .k(16)
11751 .cn_stride(19)
11752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11753 }
11754
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)11755 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
11756 TEST_REQUIRES_ARM_NEON_DOT;
11757 for (uint32_t n = 1; n <= 16; n++) {
11758 for (uint32_t m = 1; m <= 4; m++) {
11759 GemmMicrokernelTester()
11760 .mr(4)
11761 .nr(16)
11762 .kr(4)
11763 .sr(1)
11764 .m(m)
11765 .n(n)
11766 .k(16)
11767 .iterations(1)
11768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11769 }
11770 }
11771 }
11772
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)11773 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
11774 TEST_REQUIRES_ARM_NEON_DOT;
11775 for (uint32_t m = 1; m <= 4; m++) {
11776 GemmMicrokernelTester()
11777 .mr(4)
11778 .nr(16)
11779 .kr(4)
11780 .sr(1)
11781 .m(m)
11782 .n(16)
11783 .k(16)
11784 .iterations(1)
11785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11786 }
11787 }
11788
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)11789 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
11790 TEST_REQUIRES_ARM_NEON_DOT;
11791 for (uint32_t n = 1; n <= 16; n++) {
11792 GemmMicrokernelTester()
11793 .mr(4)
11794 .nr(16)
11795 .kr(4)
11796 .sr(1)
11797 .m(4)
11798 .n(n)
11799 .k(16)
11800 .iterations(1)
11801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11802 }
11803 }
11804
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)11805 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
11806 TEST_REQUIRES_ARM_NEON_DOT;
11807 for (size_t k = 1; k < 16; k++) {
11808 GemmMicrokernelTester()
11809 .mr(4)
11810 .nr(16)
11811 .kr(4)
11812 .sr(1)
11813 .m(4)
11814 .n(16)
11815 .k(k)
11816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11817 }
11818 }
11819
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)11820 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
11821 TEST_REQUIRES_ARM_NEON_DOT;
11822 for (size_t k = 1; k < 16; k++) {
11823 for (uint32_t n = 1; n <= 16; n++) {
11824 for (uint32_t m = 1; m <= 4; m++) {
11825 GemmMicrokernelTester()
11826 .mr(4)
11827 .nr(16)
11828 .kr(4)
11829 .sr(1)
11830 .m(m)
11831 .n(n)
11832 .k(k)
11833 .iterations(1)
11834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11835 }
11836 }
11837 }
11838 }
11839
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)11840 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
11841 TEST_REQUIRES_ARM_NEON_DOT;
11842 for (size_t k = 17; k < 32; k++) {
11843 GemmMicrokernelTester()
11844 .mr(4)
11845 .nr(16)
11846 .kr(4)
11847 .sr(1)
11848 .m(4)
11849 .n(16)
11850 .k(k)
11851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11852 }
11853 }
11854
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)11855 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
11856 TEST_REQUIRES_ARM_NEON_DOT;
11857 for (size_t k = 17; k < 32; k++) {
11858 for (uint32_t n = 1; n <= 16; n++) {
11859 for (uint32_t m = 1; m <= 4; m++) {
11860 GemmMicrokernelTester()
11861 .mr(4)
11862 .nr(16)
11863 .kr(4)
11864 .sr(1)
11865 .m(m)
11866 .n(n)
11867 .k(k)
11868 .iterations(1)
11869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11870 }
11871 }
11872 }
11873 }
11874
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)11875 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
11876 TEST_REQUIRES_ARM_NEON_DOT;
11877 for (size_t k = 32; k <= 160; k += 16) {
11878 GemmMicrokernelTester()
11879 .mr(4)
11880 .nr(16)
11881 .kr(4)
11882 .sr(1)
11883 .m(4)
11884 .n(16)
11885 .k(k)
11886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11887 }
11888 }
11889
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)11890 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
11891 TEST_REQUIRES_ARM_NEON_DOT;
11892 for (size_t k = 32; k <= 160; k += 16) {
11893 for (uint32_t n = 1; n <= 16; n++) {
11894 for (uint32_t m = 1; m <= 4; m++) {
11895 GemmMicrokernelTester()
11896 .mr(4)
11897 .nr(16)
11898 .kr(4)
11899 .sr(1)
11900 .m(m)
11901 .n(n)
11902 .k(k)
11903 .iterations(1)
11904 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11905 }
11906 }
11907 }
11908 }
11909
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)11910 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
11911 TEST_REQUIRES_ARM_NEON_DOT;
11912 for (uint32_t n = 17; n < 32; n++) {
11913 for (size_t k = 1; k <= 80; k += 17) {
11914 GemmMicrokernelTester()
11915 .mr(4)
11916 .nr(16)
11917 .kr(4)
11918 .sr(1)
11919 .m(4)
11920 .n(n)
11921 .k(k)
11922 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11923 }
11924 }
11925 }
11926
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)11927 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
11928 TEST_REQUIRES_ARM_NEON_DOT;
11929 for (uint32_t n = 17; n < 32; n++) {
11930 for (size_t k = 1; k <= 80; k += 17) {
11931 GemmMicrokernelTester()
11932 .mr(4)
11933 .nr(16)
11934 .kr(4)
11935 .sr(1)
11936 .m(4)
11937 .n(n)
11938 .k(k)
11939 .cn_stride(19)
11940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11941 }
11942 }
11943 }
11944
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)11945 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
11946 TEST_REQUIRES_ARM_NEON_DOT;
11947 for (uint32_t n = 17; n < 32; n++) {
11948 for (size_t k = 1; k <= 80; k += 17) {
11949 for (uint32_t m = 1; m <= 4; m++) {
11950 GemmMicrokernelTester()
11951 .mr(4)
11952 .nr(16)
11953 .kr(4)
11954 .sr(1)
11955 .m(m)
11956 .n(n)
11957 .k(k)
11958 .iterations(1)
11959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11960 }
11961 }
11962 }
11963 }
11964
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)11965 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
11966 TEST_REQUIRES_ARM_NEON_DOT;
11967 for (uint32_t n = 32; n <= 48; n += 16) {
11968 for (size_t k = 1; k <= 80; k += 17) {
11969 GemmMicrokernelTester()
11970 .mr(4)
11971 .nr(16)
11972 .kr(4)
11973 .sr(1)
11974 .m(4)
11975 .n(n)
11976 .k(k)
11977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11978 }
11979 }
11980 }
11981
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)11982 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
11983 TEST_REQUIRES_ARM_NEON_DOT;
11984 for (uint32_t n = 32; n <= 48; n += 16) {
11985 for (size_t k = 1; k <= 80; k += 17) {
11986 GemmMicrokernelTester()
11987 .mr(4)
11988 .nr(16)
11989 .kr(4)
11990 .sr(1)
11991 .m(4)
11992 .n(n)
11993 .k(k)
11994 .cn_stride(19)
11995 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11996 }
11997 }
11998 }
11999
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)12000 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
12001 TEST_REQUIRES_ARM_NEON_DOT;
12002 for (uint32_t n = 32; n <= 48; n += 16) {
12003 for (size_t k = 1; k <= 80; k += 17) {
12004 for (uint32_t m = 1; m <= 4; m++) {
12005 GemmMicrokernelTester()
12006 .mr(4)
12007 .nr(16)
12008 .kr(4)
12009 .sr(1)
12010 .m(m)
12011 .n(n)
12012 .k(k)
12013 .iterations(1)
12014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12015 }
12016 }
12017 }
12018 }
12019
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel)12020 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) {
12021 TEST_REQUIRES_ARM_NEON_DOT;
12022 for (size_t k = 1; k <= 80; k += 17) {
12023 GemmMicrokernelTester()
12024 .mr(4)
12025 .nr(16)
12026 .kr(4)
12027 .sr(1)
12028 .m(4)
12029 .n(16)
12030 .k(k)
12031 .ks(3)
12032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12033 }
12034 }
12035
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)12036 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
12037 TEST_REQUIRES_ARM_NEON_DOT;
12038 for (size_t k = 1; k <= 80; k += 17) {
12039 for (uint32_t n = 1; n <= 16; n++) {
12040 for (uint32_t m = 1; m <= 4; m++) {
12041 GemmMicrokernelTester()
12042 .mr(4)
12043 .nr(16)
12044 .kr(4)
12045 .sr(1)
12046 .m(m)
12047 .n(n)
12048 .k(k)
12049 .ks(3)
12050 .iterations(1)
12051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12052 }
12053 }
12054 }
12055 }
12056
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_small_kernel)12057 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) {
12058 TEST_REQUIRES_ARM_NEON_DOT;
12059 for (uint32_t n = 17; n < 32; n++) {
12060 for (size_t k = 1; k <= 80; k += 17) {
12061 GemmMicrokernelTester()
12062 .mr(4)
12063 .nr(16)
12064 .kr(4)
12065 .sr(1)
12066 .m(4)
12067 .n(n)
12068 .k(k)
12069 .ks(3)
12070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12071 }
12072 }
12073 }
12074
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_small_kernel)12075 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) {
12076 TEST_REQUIRES_ARM_NEON_DOT;
12077 for (uint32_t n = 32; n <= 48; n += 16) {
12078 for (size_t k = 1; k <= 80; k += 17) {
12079 GemmMicrokernelTester()
12080 .mr(4)
12081 .nr(16)
12082 .kr(4)
12083 .sr(1)
12084 .m(4)
12085 .n(n)
12086 .k(k)
12087 .ks(3)
12088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12089 }
12090 }
12091 }
12092
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)12093 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
12094 TEST_REQUIRES_ARM_NEON_DOT;
12095 for (size_t k = 1; k <= 80; k += 17) {
12096 for (uint32_t n = 1; n <= 16; n++) {
12097 for (uint32_t m = 1; m <= 4; m++) {
12098 GemmMicrokernelTester()
12099 .mr(4)
12100 .nr(16)
12101 .kr(4)
12102 .sr(1)
12103 .m(m)
12104 .n(n)
12105 .k(k)
12106 .cm_stride(19)
12107 .iterations(1)
12108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12109 }
12110 }
12111 }
12112 }
12113
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,a_offset)12114 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, a_offset) {
12115 TEST_REQUIRES_ARM_NEON_DOT;
12116 for (size_t k = 1; k <= 80; k += 17) {
12117 GemmMicrokernelTester()
12118 .mr(4)
12119 .nr(16)
12120 .kr(4)
12121 .sr(1)
12122 .m(4)
12123 .n(16)
12124 .k(k)
12125 .ks(3)
12126 .a_offset(331)
12127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12128 }
12129 }
12130
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,zero)12131 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, zero) {
12132 TEST_REQUIRES_ARM_NEON_DOT;
12133 for (size_t k = 1; k <= 80; k += 17) {
12134 for (uint32_t mz = 0; mz < 4; mz++) {
12135 GemmMicrokernelTester()
12136 .mr(4)
12137 .nr(16)
12138 .kr(4)
12139 .sr(1)
12140 .m(4)
12141 .n(16)
12142 .k(k)
12143 .ks(3)
12144 .a_offset(331)
12145 .zero_index(mz)
12146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12147 }
12148 }
12149 }
12150
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmin)12151 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
12152 TEST_REQUIRES_ARM_NEON_DOT;
12153 GemmMicrokernelTester()
12154 .mr(4)
12155 .nr(16)
12156 .kr(4)
12157 .sr(1)
12158 .m(4)
12159 .n(16)
12160 .k(16)
12161 .qmin(128)
12162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12163 }
12164
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmax)12165 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
12166 TEST_REQUIRES_ARM_NEON_DOT;
12167 GemmMicrokernelTester()
12168 .mr(4)
12169 .nr(16)
12170 .kr(4)
12171 .sr(1)
12172 .m(4)
12173 .n(16)
12174 .k(16)
12175 .qmax(128)
12176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12177 }
12178
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)12179 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
12180 TEST_REQUIRES_ARM_NEON_DOT;
12181 GemmMicrokernelTester()
12182 .mr(4)
12183 .nr(16)
12184 .kr(4)
12185 .sr(1)
12186 .m(4)
12187 .n(16)
12188 .k(16)
12189 .cm_stride(19)
12190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12191 }
12192 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
12193
12194
12195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16)12196 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16) {
12197 TEST_REQUIRES_ARM_NEON;
12198 GemmMicrokernelTester()
12199 .mr(1)
12200 .nr(8)
12201 .kr(8)
12202 .sr(1)
12203 .m(1)
12204 .n(8)
12205 .k(16)
12206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12207 }
12208
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cn)12209 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cn) {
12210 TEST_REQUIRES_ARM_NEON;
12211 GemmMicrokernelTester()
12212 .mr(1)
12213 .nr(8)
12214 .kr(8)
12215 .sr(1)
12216 .m(1)
12217 .n(8)
12218 .k(16)
12219 .cn_stride(11)
12220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12221 }
12222
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile)12223 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile) {
12224 TEST_REQUIRES_ARM_NEON;
12225 for (uint32_t n = 1; n <= 8; n++) {
12226 for (uint32_t m = 1; m <= 1; m++) {
12227 GemmMicrokernelTester()
12228 .mr(1)
12229 .nr(8)
12230 .kr(8)
12231 .sr(1)
12232 .m(m)
12233 .n(n)
12234 .k(16)
12235 .iterations(1)
12236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12237 }
12238 }
12239 }
12240
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile_m)12241 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_m) {
12242 TEST_REQUIRES_ARM_NEON;
12243 for (uint32_t m = 1; m <= 1; m++) {
12244 GemmMicrokernelTester()
12245 .mr(1)
12246 .nr(8)
12247 .kr(8)
12248 .sr(1)
12249 .m(m)
12250 .n(8)
12251 .k(16)
12252 .iterations(1)
12253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12254 }
12255 }
12256
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile_n)12257 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_n) {
12258 TEST_REQUIRES_ARM_NEON;
12259 for (uint32_t n = 1; n <= 8; n++) {
12260 GemmMicrokernelTester()
12261 .mr(1)
12262 .nr(8)
12263 .kr(8)
12264 .sr(1)
12265 .m(1)
12266 .n(n)
12267 .k(16)
12268 .iterations(1)
12269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12270 }
12271 }
12272
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_lt_16)12273 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16) {
12274 TEST_REQUIRES_ARM_NEON;
12275 for (size_t k = 1; k < 16; k++) {
12276 GemmMicrokernelTester()
12277 .mr(1)
12278 .nr(8)
12279 .kr(8)
12280 .sr(1)
12281 .m(1)
12282 .n(8)
12283 .k(k)
12284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12285 }
12286 }
12287
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_lt_16_subtile)12288 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16_subtile) {
12289 TEST_REQUIRES_ARM_NEON;
12290 for (size_t k = 1; k < 16; k++) {
12291 for (uint32_t n = 1; n <= 8; n++) {
12292 for (uint32_t m = 1; m <= 1; m++) {
12293 GemmMicrokernelTester()
12294 .mr(1)
12295 .nr(8)
12296 .kr(8)
12297 .sr(1)
12298 .m(m)
12299 .n(n)
12300 .k(k)
12301 .iterations(1)
12302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12303 }
12304 }
12305 }
12306 }
12307
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_gt_16)12308 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16) {
12309 TEST_REQUIRES_ARM_NEON;
12310 for (size_t k = 17; k < 32; k++) {
12311 GemmMicrokernelTester()
12312 .mr(1)
12313 .nr(8)
12314 .kr(8)
12315 .sr(1)
12316 .m(1)
12317 .n(8)
12318 .k(k)
12319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12320 }
12321 }
12322
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_gt_16_subtile)12323 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16_subtile) {
12324 TEST_REQUIRES_ARM_NEON;
12325 for (size_t k = 17; k < 32; k++) {
12326 for (uint32_t n = 1; n <= 8; n++) {
12327 for (uint32_t m = 1; m <= 1; m++) {
12328 GemmMicrokernelTester()
12329 .mr(1)
12330 .nr(8)
12331 .kr(8)
12332 .sr(1)
12333 .m(m)
12334 .n(n)
12335 .k(k)
12336 .iterations(1)
12337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12338 }
12339 }
12340 }
12341 }
12342
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_div_16)12343 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16) {
12344 TEST_REQUIRES_ARM_NEON;
12345 for (size_t k = 32; k <= 160; k += 16) {
12346 GemmMicrokernelTester()
12347 .mr(1)
12348 .nr(8)
12349 .kr(8)
12350 .sr(1)
12351 .m(1)
12352 .n(8)
12353 .k(k)
12354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12355 }
12356 }
12357
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_div_16_subtile)12358 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16_subtile) {
12359 TEST_REQUIRES_ARM_NEON;
12360 for (size_t k = 32; k <= 160; k += 16) {
12361 for (uint32_t n = 1; n <= 8; n++) {
12362 for (uint32_t m = 1; m <= 1; m++) {
12363 GemmMicrokernelTester()
12364 .mr(1)
12365 .nr(8)
12366 .kr(8)
12367 .sr(1)
12368 .m(m)
12369 .n(n)
12370 .k(k)
12371 .iterations(1)
12372 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12373 }
12374 }
12375 }
12376 }
12377
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8)12378 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8) {
12379 TEST_REQUIRES_ARM_NEON;
12380 for (uint32_t n = 9; n < 16; n++) {
12381 for (size_t k = 1; k <= 80; k += 17) {
12382 GemmMicrokernelTester()
12383 .mr(1)
12384 .nr(8)
12385 .kr(8)
12386 .sr(1)
12387 .m(1)
12388 .n(n)
12389 .k(k)
12390 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12391 }
12392 }
12393 }
12394
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_strided_cn)12395 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_strided_cn) {
12396 TEST_REQUIRES_ARM_NEON;
12397 for (uint32_t n = 9; n < 16; n++) {
12398 for (size_t k = 1; k <= 80; k += 17) {
12399 GemmMicrokernelTester()
12400 .mr(1)
12401 .nr(8)
12402 .kr(8)
12403 .sr(1)
12404 .m(1)
12405 .n(n)
12406 .k(k)
12407 .cn_stride(11)
12408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12409 }
12410 }
12411 }
12412
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_subtile)12413 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_subtile) {
12414 TEST_REQUIRES_ARM_NEON;
12415 for (uint32_t n = 9; n < 16; n++) {
12416 for (size_t k = 1; k <= 80; k += 17) {
12417 for (uint32_t m = 1; m <= 1; m++) {
12418 GemmMicrokernelTester()
12419 .mr(1)
12420 .nr(8)
12421 .kr(8)
12422 .sr(1)
12423 .m(m)
12424 .n(n)
12425 .k(k)
12426 .iterations(1)
12427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12428 }
12429 }
12430 }
12431 }
12432
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8)12433 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8) {
12434 TEST_REQUIRES_ARM_NEON;
12435 for (uint32_t n = 16; n <= 24; n += 8) {
12436 for (size_t k = 1; k <= 80; k += 17) {
12437 GemmMicrokernelTester()
12438 .mr(1)
12439 .nr(8)
12440 .kr(8)
12441 .sr(1)
12442 .m(1)
12443 .n(n)
12444 .k(k)
12445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12446 }
12447 }
12448 }
12449
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_strided_cn)12450 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_strided_cn) {
12451 TEST_REQUIRES_ARM_NEON;
12452 for (uint32_t n = 16; n <= 24; n += 8) {
12453 for (size_t k = 1; k <= 80; k += 17) {
12454 GemmMicrokernelTester()
12455 .mr(1)
12456 .nr(8)
12457 .kr(8)
12458 .sr(1)
12459 .m(1)
12460 .n(n)
12461 .k(k)
12462 .cn_stride(11)
12463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12464 }
12465 }
12466 }
12467
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_subtile)12468 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_subtile) {
12469 TEST_REQUIRES_ARM_NEON;
12470 for (uint32_t n = 16; n <= 24; n += 8) {
12471 for (size_t k = 1; k <= 80; k += 17) {
12472 for (uint32_t m = 1; m <= 1; m++) {
12473 GemmMicrokernelTester()
12474 .mr(1)
12475 .nr(8)
12476 .kr(8)
12477 .sr(1)
12478 .m(m)
12479 .n(n)
12480 .k(k)
12481 .iterations(1)
12482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12483 }
12484 }
12485 }
12486 }
12487
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,small_kernel)12488 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, small_kernel) {
12489 TEST_REQUIRES_ARM_NEON;
12490 for (size_t k = 1; k <= 80; k += 17) {
12491 GemmMicrokernelTester()
12492 .mr(1)
12493 .nr(8)
12494 .kr(8)
12495 .sr(1)
12496 .m(1)
12497 .n(8)
12498 .k(k)
12499 .ks(3)
12500 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12501 }
12502 }
12503
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,small_kernel_subtile)12504 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, small_kernel_subtile) {
12505 TEST_REQUIRES_ARM_NEON;
12506 for (size_t k = 1; k <= 80; k += 17) {
12507 for (uint32_t n = 1; n <= 8; n++) {
12508 for (uint32_t m = 1; m <= 1; m++) {
12509 GemmMicrokernelTester()
12510 .mr(1)
12511 .nr(8)
12512 .kr(8)
12513 .sr(1)
12514 .m(m)
12515 .n(n)
12516 .k(k)
12517 .ks(3)
12518 .iterations(1)
12519 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12520 }
12521 }
12522 }
12523 }
12524
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_small_kernel)12525 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_small_kernel) {
12526 TEST_REQUIRES_ARM_NEON;
12527 for (uint32_t n = 9; n < 16; n++) {
12528 for (size_t k = 1; k <= 80; k += 17) {
12529 GemmMicrokernelTester()
12530 .mr(1)
12531 .nr(8)
12532 .kr(8)
12533 .sr(1)
12534 .m(1)
12535 .n(n)
12536 .k(k)
12537 .ks(3)
12538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12539 }
12540 }
12541 }
12542
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_small_kernel)12543 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_small_kernel) {
12544 TEST_REQUIRES_ARM_NEON;
12545 for (uint32_t n = 16; n <= 24; n += 8) {
12546 for (size_t k = 1; k <= 80; k += 17) {
12547 GemmMicrokernelTester()
12548 .mr(1)
12549 .nr(8)
12550 .kr(8)
12551 .sr(1)
12552 .m(1)
12553 .n(n)
12554 .k(k)
12555 .ks(3)
12556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12557 }
12558 }
12559 }
12560
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cm_subtile)12561 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm_subtile) {
12562 TEST_REQUIRES_ARM_NEON;
12563 for (size_t k = 1; k <= 80; k += 17) {
12564 for (uint32_t n = 1; n <= 8; n++) {
12565 for (uint32_t m = 1; m <= 1; m++) {
12566 GemmMicrokernelTester()
12567 .mr(1)
12568 .nr(8)
12569 .kr(8)
12570 .sr(1)
12571 .m(m)
12572 .n(n)
12573 .k(k)
12574 .cm_stride(11)
12575 .iterations(1)
12576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12577 }
12578 }
12579 }
12580 }
12581
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,a_offset)12582 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, a_offset) {
12583 TEST_REQUIRES_ARM_NEON;
12584 for (size_t k = 1; k <= 80; k += 17) {
12585 GemmMicrokernelTester()
12586 .mr(1)
12587 .nr(8)
12588 .kr(8)
12589 .sr(1)
12590 .m(1)
12591 .n(8)
12592 .k(k)
12593 .ks(3)
12594 .a_offset(83)
12595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12596 }
12597 }
12598
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,zero)12599 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, zero) {
12600 TEST_REQUIRES_ARM_NEON;
12601 for (size_t k = 1; k <= 80; k += 17) {
12602 for (uint32_t mz = 0; mz < 1; mz++) {
12603 GemmMicrokernelTester()
12604 .mr(1)
12605 .nr(8)
12606 .kr(8)
12607 .sr(1)
12608 .m(1)
12609 .n(8)
12610 .k(k)
12611 .ks(3)
12612 .a_offset(83)
12613 .zero_index(mz)
12614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12615 }
12616 }
12617 }
12618
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,qmin)12619 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmin) {
12620 TEST_REQUIRES_ARM_NEON;
12621 GemmMicrokernelTester()
12622 .mr(1)
12623 .nr(8)
12624 .kr(8)
12625 .sr(1)
12626 .m(1)
12627 .n(8)
12628 .k(16)
12629 .qmin(128)
12630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12631 }
12632
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,qmax)12633 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmax) {
12634 TEST_REQUIRES_ARM_NEON;
12635 GemmMicrokernelTester()
12636 .mr(1)
12637 .nr(8)
12638 .kr(8)
12639 .sr(1)
12640 .m(1)
12641 .n(8)
12642 .k(16)
12643 .qmax(128)
12644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12645 }
12646
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cm)12647 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm) {
12648 TEST_REQUIRES_ARM_NEON;
12649 GemmMicrokernelTester()
12650 .mr(1)
12651 .nr(8)
12652 .kr(8)
12653 .sr(1)
12654 .m(1)
12655 .n(8)
12656 .k(16)
12657 .cm_stride(11)
12658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12659 }
12660 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12661
12662
12663 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8)12664 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
12665 TEST_REQUIRES_ARM_NEON;
12666 GemmMicrokernelTester()
12667 .mr(1)
12668 .nr(16)
12669 .kr(1)
12670 .sr(1)
12671 .m(1)
12672 .n(16)
12673 .k(8)
12674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12675 }
12676
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cn)12677 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
12678 TEST_REQUIRES_ARM_NEON;
12679 GemmMicrokernelTester()
12680 .mr(1)
12681 .nr(16)
12682 .kr(1)
12683 .sr(1)
12684 .m(1)
12685 .n(16)
12686 .k(8)
12687 .cn_stride(19)
12688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12689 }
12690
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile)12691 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
12692 TEST_REQUIRES_ARM_NEON;
12693 for (uint32_t n = 1; n <= 16; n++) {
12694 for (uint32_t m = 1; m <= 1; m++) {
12695 GemmMicrokernelTester()
12696 .mr(1)
12697 .nr(16)
12698 .kr(1)
12699 .sr(1)
12700 .m(m)
12701 .n(n)
12702 .k(8)
12703 .iterations(1)
12704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12705 }
12706 }
12707 }
12708
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)12709 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
12710 TEST_REQUIRES_ARM_NEON;
12711 for (uint32_t m = 1; m <= 1; m++) {
12712 GemmMicrokernelTester()
12713 .mr(1)
12714 .nr(16)
12715 .kr(1)
12716 .sr(1)
12717 .m(m)
12718 .n(16)
12719 .k(8)
12720 .iterations(1)
12721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12722 }
12723 }
12724
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)12725 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
12726 TEST_REQUIRES_ARM_NEON;
12727 for (uint32_t n = 1; n <= 16; n++) {
12728 GemmMicrokernelTester()
12729 .mr(1)
12730 .nr(16)
12731 .kr(1)
12732 .sr(1)
12733 .m(1)
12734 .n(n)
12735 .k(8)
12736 .iterations(1)
12737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12738 }
12739 }
12740
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8)12741 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
12742 TEST_REQUIRES_ARM_NEON;
12743 for (size_t k = 1; k < 8; k++) {
12744 GemmMicrokernelTester()
12745 .mr(1)
12746 .nr(16)
12747 .kr(1)
12748 .sr(1)
12749 .m(1)
12750 .n(16)
12751 .k(k)
12752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12753 }
12754 }
12755
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8_subtile)12756 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
12757 TEST_REQUIRES_ARM_NEON;
12758 for (size_t k = 1; k < 8; k++) {
12759 for (uint32_t n = 1; n <= 16; n++) {
12760 for (uint32_t m = 1; m <= 1; m++) {
12761 GemmMicrokernelTester()
12762 .mr(1)
12763 .nr(16)
12764 .kr(1)
12765 .sr(1)
12766 .m(m)
12767 .n(n)
12768 .k(k)
12769 .iterations(1)
12770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12771 }
12772 }
12773 }
12774 }
12775
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8)12776 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
12777 TEST_REQUIRES_ARM_NEON;
12778 for (size_t k = 9; k < 16; k++) {
12779 GemmMicrokernelTester()
12780 .mr(1)
12781 .nr(16)
12782 .kr(1)
12783 .sr(1)
12784 .m(1)
12785 .n(16)
12786 .k(k)
12787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12788 }
12789 }
12790
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8_subtile)12791 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
12792 TEST_REQUIRES_ARM_NEON;
12793 for (size_t k = 9; k < 16; k++) {
12794 for (uint32_t n = 1; n <= 16; n++) {
12795 for (uint32_t m = 1; m <= 1; m++) {
12796 GemmMicrokernelTester()
12797 .mr(1)
12798 .nr(16)
12799 .kr(1)
12800 .sr(1)
12801 .m(m)
12802 .n(n)
12803 .k(k)
12804 .iterations(1)
12805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12806 }
12807 }
12808 }
12809 }
12810
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8)12811 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
12812 TEST_REQUIRES_ARM_NEON;
12813 for (size_t k = 16; k <= 80; k += 8) {
12814 GemmMicrokernelTester()
12815 .mr(1)
12816 .nr(16)
12817 .kr(1)
12818 .sr(1)
12819 .m(1)
12820 .n(16)
12821 .k(k)
12822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12823 }
12824 }
12825
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8_subtile)12826 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
12827 TEST_REQUIRES_ARM_NEON;
12828 for (size_t k = 16; k <= 80; k += 8) {
12829 for (uint32_t n = 1; n <= 16; n++) {
12830 for (uint32_t m = 1; m <= 1; m++) {
12831 GemmMicrokernelTester()
12832 .mr(1)
12833 .nr(16)
12834 .kr(1)
12835 .sr(1)
12836 .m(m)
12837 .n(n)
12838 .k(k)
12839 .iterations(1)
12840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12841 }
12842 }
12843 }
12844 }
12845
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16)12846 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
12847 TEST_REQUIRES_ARM_NEON;
12848 for (uint32_t n = 17; n < 32; n++) {
12849 for (size_t k = 1; k <= 40; k += 9) {
12850 GemmMicrokernelTester()
12851 .mr(1)
12852 .nr(16)
12853 .kr(1)
12854 .sr(1)
12855 .m(1)
12856 .n(n)
12857 .k(k)
12858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12859 }
12860 }
12861 }
12862
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)12863 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
12864 TEST_REQUIRES_ARM_NEON;
12865 for (uint32_t n = 17; n < 32; n++) {
12866 for (size_t k = 1; k <= 40; k += 9) {
12867 GemmMicrokernelTester()
12868 .mr(1)
12869 .nr(16)
12870 .kr(1)
12871 .sr(1)
12872 .m(1)
12873 .n(n)
12874 .k(k)
12875 .cn_stride(19)
12876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12877 }
12878 }
12879 }
12880
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_subtile)12881 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
12882 TEST_REQUIRES_ARM_NEON;
12883 for (uint32_t n = 17; n < 32; n++) {
12884 for (size_t k = 1; k <= 40; k += 9) {
12885 for (uint32_t m = 1; m <= 1; m++) {
12886 GemmMicrokernelTester()
12887 .mr(1)
12888 .nr(16)
12889 .kr(1)
12890 .sr(1)
12891 .m(m)
12892 .n(n)
12893 .k(k)
12894 .iterations(1)
12895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12896 }
12897 }
12898 }
12899 }
12900
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16)12901 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
12902 TEST_REQUIRES_ARM_NEON;
12903 for (uint32_t n = 32; n <= 48; n += 16) {
12904 for (size_t k = 1; k <= 40; k += 9) {
12905 GemmMicrokernelTester()
12906 .mr(1)
12907 .nr(16)
12908 .kr(1)
12909 .sr(1)
12910 .m(1)
12911 .n(n)
12912 .k(k)
12913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12914 }
12915 }
12916 }
12917
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)12918 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
12919 TEST_REQUIRES_ARM_NEON;
12920 for (uint32_t n = 32; n <= 48; n += 16) {
12921 for (size_t k = 1; k <= 40; k += 9) {
12922 GemmMicrokernelTester()
12923 .mr(1)
12924 .nr(16)
12925 .kr(1)
12926 .sr(1)
12927 .m(1)
12928 .n(n)
12929 .k(k)
12930 .cn_stride(19)
12931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12932 }
12933 }
12934 }
12935
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_subtile)12936 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
12937 TEST_REQUIRES_ARM_NEON;
12938 for (uint32_t n = 32; n <= 48; n += 16) {
12939 for (size_t k = 1; k <= 40; k += 9) {
12940 for (uint32_t m = 1; m <= 1; m++) {
12941 GemmMicrokernelTester()
12942 .mr(1)
12943 .nr(16)
12944 .kr(1)
12945 .sr(1)
12946 .m(m)
12947 .n(n)
12948 .k(k)
12949 .iterations(1)
12950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12951 }
12952 }
12953 }
12954 }
12955
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel)12956 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel) {
12957 TEST_REQUIRES_ARM_NEON;
12958 for (size_t k = 1; k <= 40; k += 9) {
12959 GemmMicrokernelTester()
12960 .mr(1)
12961 .nr(16)
12962 .kr(1)
12963 .sr(1)
12964 .m(1)
12965 .n(16)
12966 .k(k)
12967 .ks(3)
12968 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12969 }
12970 }
12971
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel_subtile)12972 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
12973 TEST_REQUIRES_ARM_NEON;
12974 for (size_t k = 1; k <= 40; k += 9) {
12975 for (uint32_t n = 1; n <= 16; n++) {
12976 for (uint32_t m = 1; m <= 1; m++) {
12977 GemmMicrokernelTester()
12978 .mr(1)
12979 .nr(16)
12980 .kr(1)
12981 .sr(1)
12982 .m(m)
12983 .n(n)
12984 .k(k)
12985 .ks(3)
12986 .iterations(1)
12987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12988 }
12989 }
12990 }
12991 }
12992
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)12993 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
12994 TEST_REQUIRES_ARM_NEON;
12995 for (uint32_t n = 17; n < 32; n++) {
12996 for (size_t k = 1; k <= 40; k += 9) {
12997 GemmMicrokernelTester()
12998 .mr(1)
12999 .nr(16)
13000 .kr(1)
13001 .sr(1)
13002 .m(1)
13003 .n(n)
13004 .k(k)
13005 .ks(3)
13006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13007 }
13008 }
13009 }
13010
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)13011 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
13012 TEST_REQUIRES_ARM_NEON;
13013 for (uint32_t n = 32; n <= 48; n += 16) {
13014 for (size_t k = 1; k <= 40; k += 9) {
13015 GemmMicrokernelTester()
13016 .mr(1)
13017 .nr(16)
13018 .kr(1)
13019 .sr(1)
13020 .m(1)
13021 .n(n)
13022 .k(k)
13023 .ks(3)
13024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13025 }
13026 }
13027 }
13028
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm_subtile)13029 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
13030 TEST_REQUIRES_ARM_NEON;
13031 for (size_t k = 1; k <= 40; k += 9) {
13032 for (uint32_t n = 1; n <= 16; n++) {
13033 for (uint32_t m = 1; m <= 1; m++) {
13034 GemmMicrokernelTester()
13035 .mr(1)
13036 .nr(16)
13037 .kr(1)
13038 .sr(1)
13039 .m(m)
13040 .n(n)
13041 .k(k)
13042 .cm_stride(19)
13043 .iterations(1)
13044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13045 }
13046 }
13047 }
13048 }
13049
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,a_offset)13050 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, a_offset) {
13051 TEST_REQUIRES_ARM_NEON;
13052 for (size_t k = 1; k <= 40; k += 9) {
13053 GemmMicrokernelTester()
13054 .mr(1)
13055 .nr(16)
13056 .kr(1)
13057 .sr(1)
13058 .m(1)
13059 .n(16)
13060 .k(k)
13061 .ks(3)
13062 .a_offset(43)
13063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13064 }
13065 }
13066
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,zero)13067 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, zero) {
13068 TEST_REQUIRES_ARM_NEON;
13069 for (size_t k = 1; k <= 40; k += 9) {
13070 for (uint32_t mz = 0; mz < 1; mz++) {
13071 GemmMicrokernelTester()
13072 .mr(1)
13073 .nr(16)
13074 .kr(1)
13075 .sr(1)
13076 .m(1)
13077 .n(16)
13078 .k(k)
13079 .ks(3)
13080 .a_offset(43)
13081 .zero_index(mz)
13082 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13083 }
13084 }
13085 }
13086
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmin)13087 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
13088 TEST_REQUIRES_ARM_NEON;
13089 GemmMicrokernelTester()
13090 .mr(1)
13091 .nr(16)
13092 .kr(1)
13093 .sr(1)
13094 .m(1)
13095 .n(16)
13096 .k(8)
13097 .qmin(128)
13098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13099 }
13100
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmax)13101 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
13102 TEST_REQUIRES_ARM_NEON;
13103 GemmMicrokernelTester()
13104 .mr(1)
13105 .nr(16)
13106 .kr(1)
13107 .sr(1)
13108 .m(1)
13109 .n(16)
13110 .k(8)
13111 .qmax(128)
13112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13113 }
13114
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm)13115 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
13116 TEST_REQUIRES_ARM_NEON;
13117 GemmMicrokernelTester()
13118 .mr(1)
13119 .nr(16)
13120 .kr(1)
13121 .sr(1)
13122 .m(1)
13123 .n(16)
13124 .k(8)
13125 .cm_stride(19)
13126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13127 }
13128 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13129
13130
13131 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8)13132 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
13133 TEST_REQUIRES_ARM_NEON_V8;
13134 GemmMicrokernelTester()
13135 .mr(1)
13136 .nr(16)
13137 .kr(1)
13138 .sr(1)
13139 .m(1)
13140 .n(16)
13141 .k(8)
13142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13143 }
13144
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cn)13145 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
13146 TEST_REQUIRES_ARM_NEON_V8;
13147 GemmMicrokernelTester()
13148 .mr(1)
13149 .nr(16)
13150 .kr(1)
13151 .sr(1)
13152 .m(1)
13153 .n(16)
13154 .k(8)
13155 .cn_stride(19)
13156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13157 }
13158
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile)13159 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
13160 TEST_REQUIRES_ARM_NEON_V8;
13161 for (uint32_t n = 1; n <= 16; n++) {
13162 for (uint32_t m = 1; m <= 1; m++) {
13163 GemmMicrokernelTester()
13164 .mr(1)
13165 .nr(16)
13166 .kr(1)
13167 .sr(1)
13168 .m(m)
13169 .n(n)
13170 .k(8)
13171 .iterations(1)
13172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13173 }
13174 }
13175 }
13176
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)13177 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
13178 TEST_REQUIRES_ARM_NEON_V8;
13179 for (uint32_t m = 1; m <= 1; m++) {
13180 GemmMicrokernelTester()
13181 .mr(1)
13182 .nr(16)
13183 .kr(1)
13184 .sr(1)
13185 .m(m)
13186 .n(16)
13187 .k(8)
13188 .iterations(1)
13189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13190 }
13191 }
13192
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)13193 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
13194 TEST_REQUIRES_ARM_NEON_V8;
13195 for (uint32_t n = 1; n <= 16; n++) {
13196 GemmMicrokernelTester()
13197 .mr(1)
13198 .nr(16)
13199 .kr(1)
13200 .sr(1)
13201 .m(1)
13202 .n(n)
13203 .k(8)
13204 .iterations(1)
13205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13206 }
13207 }
13208
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8)13209 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
13210 TEST_REQUIRES_ARM_NEON_V8;
13211 for (size_t k = 1; k < 8; k++) {
13212 GemmMicrokernelTester()
13213 .mr(1)
13214 .nr(16)
13215 .kr(1)
13216 .sr(1)
13217 .m(1)
13218 .n(16)
13219 .k(k)
13220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13221 }
13222 }
13223
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8_subtile)13224 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
13225 TEST_REQUIRES_ARM_NEON_V8;
13226 for (size_t k = 1; k < 8; k++) {
13227 for (uint32_t n = 1; n <= 16; n++) {
13228 for (uint32_t m = 1; m <= 1; m++) {
13229 GemmMicrokernelTester()
13230 .mr(1)
13231 .nr(16)
13232 .kr(1)
13233 .sr(1)
13234 .m(m)
13235 .n(n)
13236 .k(k)
13237 .iterations(1)
13238 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13239 }
13240 }
13241 }
13242 }
13243
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8)13244 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
13245 TEST_REQUIRES_ARM_NEON_V8;
13246 for (size_t k = 9; k < 16; k++) {
13247 GemmMicrokernelTester()
13248 .mr(1)
13249 .nr(16)
13250 .kr(1)
13251 .sr(1)
13252 .m(1)
13253 .n(16)
13254 .k(k)
13255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13256 }
13257 }
13258
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8_subtile)13259 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
13260 TEST_REQUIRES_ARM_NEON_V8;
13261 for (size_t k = 9; k < 16; k++) {
13262 for (uint32_t n = 1; n <= 16; n++) {
13263 for (uint32_t m = 1; m <= 1; m++) {
13264 GemmMicrokernelTester()
13265 .mr(1)
13266 .nr(16)
13267 .kr(1)
13268 .sr(1)
13269 .m(m)
13270 .n(n)
13271 .k(k)
13272 .iterations(1)
13273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13274 }
13275 }
13276 }
13277 }
13278
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8)13279 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
13280 TEST_REQUIRES_ARM_NEON_V8;
13281 for (size_t k = 16; k <= 80; k += 8) {
13282 GemmMicrokernelTester()
13283 .mr(1)
13284 .nr(16)
13285 .kr(1)
13286 .sr(1)
13287 .m(1)
13288 .n(16)
13289 .k(k)
13290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13291 }
13292 }
13293
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8_subtile)13294 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
13295 TEST_REQUIRES_ARM_NEON_V8;
13296 for (size_t k = 16; k <= 80; k += 8) {
13297 for (uint32_t n = 1; n <= 16; n++) {
13298 for (uint32_t m = 1; m <= 1; m++) {
13299 GemmMicrokernelTester()
13300 .mr(1)
13301 .nr(16)
13302 .kr(1)
13303 .sr(1)
13304 .m(m)
13305 .n(n)
13306 .k(k)
13307 .iterations(1)
13308 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13309 }
13310 }
13311 }
13312 }
13313
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16)13314 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
13315 TEST_REQUIRES_ARM_NEON_V8;
13316 for (uint32_t n = 17; n < 32; n++) {
13317 for (size_t k = 1; k <= 40; k += 9) {
13318 GemmMicrokernelTester()
13319 .mr(1)
13320 .nr(16)
13321 .kr(1)
13322 .sr(1)
13323 .m(1)
13324 .n(n)
13325 .k(k)
13326 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13327 }
13328 }
13329 }
13330
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)13331 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
13332 TEST_REQUIRES_ARM_NEON_V8;
13333 for (uint32_t n = 17; n < 32; n++) {
13334 for (size_t k = 1; k <= 40; k += 9) {
13335 GemmMicrokernelTester()
13336 .mr(1)
13337 .nr(16)
13338 .kr(1)
13339 .sr(1)
13340 .m(1)
13341 .n(n)
13342 .k(k)
13343 .cn_stride(19)
13344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13345 }
13346 }
13347 }
13348
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_subtile)13349 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
13350 TEST_REQUIRES_ARM_NEON_V8;
13351 for (uint32_t n = 17; n < 32; n++) {
13352 for (size_t k = 1; k <= 40; k += 9) {
13353 for (uint32_t m = 1; m <= 1; m++) {
13354 GemmMicrokernelTester()
13355 .mr(1)
13356 .nr(16)
13357 .kr(1)
13358 .sr(1)
13359 .m(m)
13360 .n(n)
13361 .k(k)
13362 .iterations(1)
13363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13364 }
13365 }
13366 }
13367 }
13368
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16)13369 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
13370 TEST_REQUIRES_ARM_NEON_V8;
13371 for (uint32_t n = 32; n <= 48; n += 16) {
13372 for (size_t k = 1; k <= 40; k += 9) {
13373 GemmMicrokernelTester()
13374 .mr(1)
13375 .nr(16)
13376 .kr(1)
13377 .sr(1)
13378 .m(1)
13379 .n(n)
13380 .k(k)
13381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13382 }
13383 }
13384 }
13385
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)13386 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
13387 TEST_REQUIRES_ARM_NEON_V8;
13388 for (uint32_t n = 32; n <= 48; n += 16) {
13389 for (size_t k = 1; k <= 40; k += 9) {
13390 GemmMicrokernelTester()
13391 .mr(1)
13392 .nr(16)
13393 .kr(1)
13394 .sr(1)
13395 .m(1)
13396 .n(n)
13397 .k(k)
13398 .cn_stride(19)
13399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13400 }
13401 }
13402 }
13403
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_subtile)13404 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
13405 TEST_REQUIRES_ARM_NEON_V8;
13406 for (uint32_t n = 32; n <= 48; n += 16) {
13407 for (size_t k = 1; k <= 40; k += 9) {
13408 for (uint32_t m = 1; m <= 1; m++) {
13409 GemmMicrokernelTester()
13410 .mr(1)
13411 .nr(16)
13412 .kr(1)
13413 .sr(1)
13414 .m(m)
13415 .n(n)
13416 .k(k)
13417 .iterations(1)
13418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13419 }
13420 }
13421 }
13422 }
13423
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel)13424 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel) {
13425 TEST_REQUIRES_ARM_NEON_V8;
13426 for (size_t k = 1; k <= 40; k += 9) {
13427 GemmMicrokernelTester()
13428 .mr(1)
13429 .nr(16)
13430 .kr(1)
13431 .sr(1)
13432 .m(1)
13433 .n(16)
13434 .k(k)
13435 .ks(3)
13436 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13437 }
13438 }
13439
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel_subtile)13440 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
13441 TEST_REQUIRES_ARM_NEON_V8;
13442 for (size_t k = 1; k <= 40; k += 9) {
13443 for (uint32_t n = 1; n <= 16; n++) {
13444 for (uint32_t m = 1; m <= 1; m++) {
13445 GemmMicrokernelTester()
13446 .mr(1)
13447 .nr(16)
13448 .kr(1)
13449 .sr(1)
13450 .m(m)
13451 .n(n)
13452 .k(k)
13453 .ks(3)
13454 .iterations(1)
13455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13456 }
13457 }
13458 }
13459 }
13460
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)13461 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
13462 TEST_REQUIRES_ARM_NEON_V8;
13463 for (uint32_t n = 17; n < 32; n++) {
13464 for (size_t k = 1; k <= 40; k += 9) {
13465 GemmMicrokernelTester()
13466 .mr(1)
13467 .nr(16)
13468 .kr(1)
13469 .sr(1)
13470 .m(1)
13471 .n(n)
13472 .k(k)
13473 .ks(3)
13474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13475 }
13476 }
13477 }
13478
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)13479 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
13480 TEST_REQUIRES_ARM_NEON_V8;
13481 for (uint32_t n = 32; n <= 48; n += 16) {
13482 for (size_t k = 1; k <= 40; k += 9) {
13483 GemmMicrokernelTester()
13484 .mr(1)
13485 .nr(16)
13486 .kr(1)
13487 .sr(1)
13488 .m(1)
13489 .n(n)
13490 .k(k)
13491 .ks(3)
13492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13493 }
13494 }
13495 }
13496
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm_subtile)13497 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
13498 TEST_REQUIRES_ARM_NEON_V8;
13499 for (size_t k = 1; k <= 40; k += 9) {
13500 for (uint32_t n = 1; n <= 16; n++) {
13501 for (uint32_t m = 1; m <= 1; m++) {
13502 GemmMicrokernelTester()
13503 .mr(1)
13504 .nr(16)
13505 .kr(1)
13506 .sr(1)
13507 .m(m)
13508 .n(n)
13509 .k(k)
13510 .cm_stride(19)
13511 .iterations(1)
13512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13513 }
13514 }
13515 }
13516 }
13517
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,a_offset)13518 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, a_offset) {
13519 TEST_REQUIRES_ARM_NEON_V8;
13520 for (size_t k = 1; k <= 40; k += 9) {
13521 GemmMicrokernelTester()
13522 .mr(1)
13523 .nr(16)
13524 .kr(1)
13525 .sr(1)
13526 .m(1)
13527 .n(16)
13528 .k(k)
13529 .ks(3)
13530 .a_offset(43)
13531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13532 }
13533 }
13534
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,zero)13535 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, zero) {
13536 TEST_REQUIRES_ARM_NEON_V8;
13537 for (size_t k = 1; k <= 40; k += 9) {
13538 for (uint32_t mz = 0; mz < 1; mz++) {
13539 GemmMicrokernelTester()
13540 .mr(1)
13541 .nr(16)
13542 .kr(1)
13543 .sr(1)
13544 .m(1)
13545 .n(16)
13546 .k(k)
13547 .ks(3)
13548 .a_offset(43)
13549 .zero_index(mz)
13550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13551 }
13552 }
13553 }
13554
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmin)13555 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
13556 TEST_REQUIRES_ARM_NEON_V8;
13557 GemmMicrokernelTester()
13558 .mr(1)
13559 .nr(16)
13560 .kr(1)
13561 .sr(1)
13562 .m(1)
13563 .n(16)
13564 .k(8)
13565 .qmin(128)
13566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13567 }
13568
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmax)13569 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
13570 TEST_REQUIRES_ARM_NEON_V8;
13571 GemmMicrokernelTester()
13572 .mr(1)
13573 .nr(16)
13574 .kr(1)
13575 .sr(1)
13576 .m(1)
13577 .n(16)
13578 .k(8)
13579 .qmax(128)
13580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13581 }
13582
TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm)13583 TEST(QS8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
13584 TEST_REQUIRES_ARM_NEON_V8;
13585 GemmMicrokernelTester()
13586 .mr(1)
13587 .nr(16)
13588 .kr(1)
13589 .sr(1)
13590 .m(1)
13591 .n(16)
13592 .k(8)
13593 .cm_stride(19)
13594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13595 }
13596 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13597
13598
13599 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16)13600 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16) {
13601 TEST_REQUIRES_ARM_NEON;
13602 GemmMicrokernelTester()
13603 .mr(2)
13604 .nr(8)
13605 .kr(8)
13606 .sr(1)
13607 .m(2)
13608 .n(8)
13609 .k(16)
13610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13611 }
13612
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cn)13613 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cn) {
13614 TEST_REQUIRES_ARM_NEON;
13615 GemmMicrokernelTester()
13616 .mr(2)
13617 .nr(8)
13618 .kr(8)
13619 .sr(1)
13620 .m(2)
13621 .n(8)
13622 .k(16)
13623 .cn_stride(11)
13624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13625 }
13626
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile)13627 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile) {
13628 TEST_REQUIRES_ARM_NEON;
13629 for (uint32_t n = 1; n <= 8; n++) {
13630 for (uint32_t m = 1; m <= 2; m++) {
13631 GemmMicrokernelTester()
13632 .mr(2)
13633 .nr(8)
13634 .kr(8)
13635 .sr(1)
13636 .m(m)
13637 .n(n)
13638 .k(16)
13639 .iterations(1)
13640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13641 }
13642 }
13643 }
13644
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_m)13645 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
13646 TEST_REQUIRES_ARM_NEON;
13647 for (uint32_t m = 1; m <= 2; m++) {
13648 GemmMicrokernelTester()
13649 .mr(2)
13650 .nr(8)
13651 .kr(8)
13652 .sr(1)
13653 .m(m)
13654 .n(8)
13655 .k(16)
13656 .iterations(1)
13657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13658 }
13659 }
13660
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_n)13661 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
13662 TEST_REQUIRES_ARM_NEON;
13663 for (uint32_t n = 1; n <= 8; n++) {
13664 GemmMicrokernelTester()
13665 .mr(2)
13666 .nr(8)
13667 .kr(8)
13668 .sr(1)
13669 .m(2)
13670 .n(n)
13671 .k(16)
13672 .iterations(1)
13673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13674 }
13675 }
13676
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16)13677 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16) {
13678 TEST_REQUIRES_ARM_NEON;
13679 for (size_t k = 1; k < 16; k++) {
13680 GemmMicrokernelTester()
13681 .mr(2)
13682 .nr(8)
13683 .kr(8)
13684 .sr(1)
13685 .m(2)
13686 .n(8)
13687 .k(k)
13688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13689 }
13690 }
13691
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16_subtile)13692 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16_subtile) {
13693 TEST_REQUIRES_ARM_NEON;
13694 for (size_t k = 1; k < 16; k++) {
13695 for (uint32_t n = 1; n <= 8; n++) {
13696 for (uint32_t m = 1; m <= 2; m++) {
13697 GemmMicrokernelTester()
13698 .mr(2)
13699 .nr(8)
13700 .kr(8)
13701 .sr(1)
13702 .m(m)
13703 .n(n)
13704 .k(k)
13705 .iterations(1)
13706 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13707 }
13708 }
13709 }
13710 }
13711
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16)13712 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16) {
13713 TEST_REQUIRES_ARM_NEON;
13714 for (size_t k = 17; k < 32; k++) {
13715 GemmMicrokernelTester()
13716 .mr(2)
13717 .nr(8)
13718 .kr(8)
13719 .sr(1)
13720 .m(2)
13721 .n(8)
13722 .k(k)
13723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13724 }
13725 }
13726
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16_subtile)13727 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16_subtile) {
13728 TEST_REQUIRES_ARM_NEON;
13729 for (size_t k = 17; k < 32; k++) {
13730 for (uint32_t n = 1; n <= 8; n++) {
13731 for (uint32_t m = 1; m <= 2; m++) {
13732 GemmMicrokernelTester()
13733 .mr(2)
13734 .nr(8)
13735 .kr(8)
13736 .sr(1)
13737 .m(m)
13738 .n(n)
13739 .k(k)
13740 .iterations(1)
13741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13742 }
13743 }
13744 }
13745 }
13746
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16)13747 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16) {
13748 TEST_REQUIRES_ARM_NEON;
13749 for (size_t k = 32; k <= 160; k += 16) {
13750 GemmMicrokernelTester()
13751 .mr(2)
13752 .nr(8)
13753 .kr(8)
13754 .sr(1)
13755 .m(2)
13756 .n(8)
13757 .k(k)
13758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13759 }
13760 }
13761
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16_subtile)13762 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16_subtile) {
13763 TEST_REQUIRES_ARM_NEON;
13764 for (size_t k = 32; k <= 160; k += 16) {
13765 for (uint32_t n = 1; n <= 8; n++) {
13766 for (uint32_t m = 1; m <= 2; m++) {
13767 GemmMicrokernelTester()
13768 .mr(2)
13769 .nr(8)
13770 .kr(8)
13771 .sr(1)
13772 .m(m)
13773 .n(n)
13774 .k(k)
13775 .iterations(1)
13776 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13777 }
13778 }
13779 }
13780 }
13781
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8)13782 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8) {
13783 TEST_REQUIRES_ARM_NEON;
13784 for (uint32_t n = 9; n < 16; n++) {
13785 for (size_t k = 1; k <= 80; k += 17) {
13786 GemmMicrokernelTester()
13787 .mr(2)
13788 .nr(8)
13789 .kr(8)
13790 .sr(1)
13791 .m(2)
13792 .n(n)
13793 .k(k)
13794 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13795 }
13796 }
13797 }
13798
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_strided_cn)13799 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
13800 TEST_REQUIRES_ARM_NEON;
13801 for (uint32_t n = 9; n < 16; n++) {
13802 for (size_t k = 1; k <= 80; k += 17) {
13803 GemmMicrokernelTester()
13804 .mr(2)
13805 .nr(8)
13806 .kr(8)
13807 .sr(1)
13808 .m(2)
13809 .n(n)
13810 .k(k)
13811 .cn_stride(11)
13812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13813 }
13814 }
13815 }
13816
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_subtile)13817 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_subtile) {
13818 TEST_REQUIRES_ARM_NEON;
13819 for (uint32_t n = 9; n < 16; n++) {
13820 for (size_t k = 1; k <= 80; k += 17) {
13821 for (uint32_t m = 1; m <= 2; m++) {
13822 GemmMicrokernelTester()
13823 .mr(2)
13824 .nr(8)
13825 .kr(8)
13826 .sr(1)
13827 .m(m)
13828 .n(n)
13829 .k(k)
13830 .iterations(1)
13831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13832 }
13833 }
13834 }
13835 }
13836
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8)13837 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8) {
13838 TEST_REQUIRES_ARM_NEON;
13839 for (uint32_t n = 16; n <= 24; n += 8) {
13840 for (size_t k = 1; k <= 80; k += 17) {
13841 GemmMicrokernelTester()
13842 .mr(2)
13843 .nr(8)
13844 .kr(8)
13845 .sr(1)
13846 .m(2)
13847 .n(n)
13848 .k(k)
13849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13850 }
13851 }
13852 }
13853
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_strided_cn)13854 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
13855 TEST_REQUIRES_ARM_NEON;
13856 for (uint32_t n = 16; n <= 24; n += 8) {
13857 for (size_t k = 1; k <= 80; k += 17) {
13858 GemmMicrokernelTester()
13859 .mr(2)
13860 .nr(8)
13861 .kr(8)
13862 .sr(1)
13863 .m(2)
13864 .n(n)
13865 .k(k)
13866 .cn_stride(11)
13867 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13868 }
13869 }
13870 }
13871
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_subtile)13872 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_subtile) {
13873 TEST_REQUIRES_ARM_NEON;
13874 for (uint32_t n = 16; n <= 24; n += 8) {
13875 for (size_t k = 1; k <= 80; k += 17) {
13876 for (uint32_t m = 1; m <= 2; m++) {
13877 GemmMicrokernelTester()
13878 .mr(2)
13879 .nr(8)
13880 .kr(8)
13881 .sr(1)
13882 .m(m)
13883 .n(n)
13884 .k(k)
13885 .iterations(1)
13886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13887 }
13888 }
13889 }
13890 }
13891
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,small_kernel)13892 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, small_kernel) {
13893 TEST_REQUIRES_ARM_NEON;
13894 for (size_t k = 1; k <= 80; k += 17) {
13895 GemmMicrokernelTester()
13896 .mr(2)
13897 .nr(8)
13898 .kr(8)
13899 .sr(1)
13900 .m(2)
13901 .n(8)
13902 .k(k)
13903 .ks(3)
13904 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13905 }
13906 }
13907
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,small_kernel_subtile)13908 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, small_kernel_subtile) {
13909 TEST_REQUIRES_ARM_NEON;
13910 for (size_t k = 1; k <= 80; k += 17) {
13911 for (uint32_t n = 1; n <= 8; n++) {
13912 for (uint32_t m = 1; m <= 2; m++) {
13913 GemmMicrokernelTester()
13914 .mr(2)
13915 .nr(8)
13916 .kr(8)
13917 .sr(1)
13918 .m(m)
13919 .n(n)
13920 .k(k)
13921 .ks(3)
13922 .iterations(1)
13923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13924 }
13925 }
13926 }
13927 }
13928
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_small_kernel)13929 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_small_kernel) {
13930 TEST_REQUIRES_ARM_NEON;
13931 for (uint32_t n = 9; n < 16; n++) {
13932 for (size_t k = 1; k <= 80; k += 17) {
13933 GemmMicrokernelTester()
13934 .mr(2)
13935 .nr(8)
13936 .kr(8)
13937 .sr(1)
13938 .m(2)
13939 .n(n)
13940 .k(k)
13941 .ks(3)
13942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13943 }
13944 }
13945 }
13946
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_small_kernel)13947 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_small_kernel) {
13948 TEST_REQUIRES_ARM_NEON;
13949 for (uint32_t n = 16; n <= 24; n += 8) {
13950 for (size_t k = 1; k <= 80; k += 17) {
13951 GemmMicrokernelTester()
13952 .mr(2)
13953 .nr(8)
13954 .kr(8)
13955 .sr(1)
13956 .m(2)
13957 .n(n)
13958 .k(k)
13959 .ks(3)
13960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13961 }
13962 }
13963 }
13964
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm_subtile)13965 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm_subtile) {
13966 TEST_REQUIRES_ARM_NEON;
13967 for (size_t k = 1; k <= 80; k += 17) {
13968 for (uint32_t n = 1; n <= 8; n++) {
13969 for (uint32_t m = 1; m <= 2; m++) {
13970 GemmMicrokernelTester()
13971 .mr(2)
13972 .nr(8)
13973 .kr(8)
13974 .sr(1)
13975 .m(m)
13976 .n(n)
13977 .k(k)
13978 .cm_stride(11)
13979 .iterations(1)
13980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13981 }
13982 }
13983 }
13984 }
13985
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,a_offset)13986 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, a_offset) {
13987 TEST_REQUIRES_ARM_NEON;
13988 for (size_t k = 1; k <= 80; k += 17) {
13989 GemmMicrokernelTester()
13990 .mr(2)
13991 .nr(8)
13992 .kr(8)
13993 .sr(1)
13994 .m(2)
13995 .n(8)
13996 .k(k)
13997 .ks(3)
13998 .a_offset(163)
13999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14000 }
14001 }
14002
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,zero)14003 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, zero) {
14004 TEST_REQUIRES_ARM_NEON;
14005 for (size_t k = 1; k <= 80; k += 17) {
14006 for (uint32_t mz = 0; mz < 2; mz++) {
14007 GemmMicrokernelTester()
14008 .mr(2)
14009 .nr(8)
14010 .kr(8)
14011 .sr(1)
14012 .m(2)
14013 .n(8)
14014 .k(k)
14015 .ks(3)
14016 .a_offset(163)
14017 .zero_index(mz)
14018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14019 }
14020 }
14021 }
14022
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmin)14023 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmin) {
14024 TEST_REQUIRES_ARM_NEON;
14025 GemmMicrokernelTester()
14026 .mr(2)
14027 .nr(8)
14028 .kr(8)
14029 .sr(1)
14030 .m(2)
14031 .n(8)
14032 .k(16)
14033 .qmin(128)
14034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14035 }
14036
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmax)14037 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmax) {
14038 TEST_REQUIRES_ARM_NEON;
14039 GemmMicrokernelTester()
14040 .mr(2)
14041 .nr(8)
14042 .kr(8)
14043 .sr(1)
14044 .m(2)
14045 .n(8)
14046 .k(16)
14047 .qmax(128)
14048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14049 }
14050
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm)14051 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm) {
14052 TEST_REQUIRES_ARM_NEON;
14053 GemmMicrokernelTester()
14054 .mr(2)
14055 .nr(8)
14056 .kr(8)
14057 .sr(1)
14058 .m(2)
14059 .n(8)
14060 .k(16)
14061 .cm_stride(11)
14062 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14063 }
14064 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14065
14066
14067 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8)14068 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
14069 TEST_REQUIRES_ARM_NEON_V8;
14070 GemmMicrokernelTester()
14071 .mr(4)
14072 .nr(16)
14073 .kr(1)
14074 .sr(1)
14075 .m(4)
14076 .n(16)
14077 .k(8)
14078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14079 }
14080
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cn)14081 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
14082 TEST_REQUIRES_ARM_NEON_V8;
14083 GemmMicrokernelTester()
14084 .mr(4)
14085 .nr(16)
14086 .kr(1)
14087 .sr(1)
14088 .m(4)
14089 .n(16)
14090 .k(8)
14091 .cn_stride(19)
14092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093 }
14094
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile)14095 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
14096 TEST_REQUIRES_ARM_NEON_V8;
14097 for (uint32_t n = 1; n <= 16; n++) {
14098 for (uint32_t m = 1; m <= 4; m++) {
14099 GemmMicrokernelTester()
14100 .mr(4)
14101 .nr(16)
14102 .kr(1)
14103 .sr(1)
14104 .m(m)
14105 .n(n)
14106 .k(8)
14107 .iterations(1)
14108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14109 }
14110 }
14111 }
14112
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)14113 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
14114 TEST_REQUIRES_ARM_NEON_V8;
14115 for (uint32_t m = 1; m <= 4; m++) {
14116 GemmMicrokernelTester()
14117 .mr(4)
14118 .nr(16)
14119 .kr(1)
14120 .sr(1)
14121 .m(m)
14122 .n(16)
14123 .k(8)
14124 .iterations(1)
14125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14126 }
14127 }
14128
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)14129 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
14130 TEST_REQUIRES_ARM_NEON_V8;
14131 for (uint32_t n = 1; n <= 16; n++) {
14132 GemmMicrokernelTester()
14133 .mr(4)
14134 .nr(16)
14135 .kr(1)
14136 .sr(1)
14137 .m(4)
14138 .n(n)
14139 .k(8)
14140 .iterations(1)
14141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14142 }
14143 }
14144
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8)14145 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
14146 TEST_REQUIRES_ARM_NEON_V8;
14147 for (size_t k = 1; k < 8; k++) {
14148 GemmMicrokernelTester()
14149 .mr(4)
14150 .nr(16)
14151 .kr(1)
14152 .sr(1)
14153 .m(4)
14154 .n(16)
14155 .k(k)
14156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14157 }
14158 }
14159
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8_subtile)14160 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
14161 TEST_REQUIRES_ARM_NEON_V8;
14162 for (size_t k = 1; k < 8; k++) {
14163 for (uint32_t n = 1; n <= 16; n++) {
14164 for (uint32_t m = 1; m <= 4; m++) {
14165 GemmMicrokernelTester()
14166 .mr(4)
14167 .nr(16)
14168 .kr(1)
14169 .sr(1)
14170 .m(m)
14171 .n(n)
14172 .k(k)
14173 .iterations(1)
14174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175 }
14176 }
14177 }
14178 }
14179
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8)14180 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
14181 TEST_REQUIRES_ARM_NEON_V8;
14182 for (size_t k = 9; k < 16; k++) {
14183 GemmMicrokernelTester()
14184 .mr(4)
14185 .nr(16)
14186 .kr(1)
14187 .sr(1)
14188 .m(4)
14189 .n(16)
14190 .k(k)
14191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14192 }
14193 }
14194
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8_subtile)14195 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
14196 TEST_REQUIRES_ARM_NEON_V8;
14197 for (size_t k = 9; k < 16; k++) {
14198 for (uint32_t n = 1; n <= 16; n++) {
14199 for (uint32_t m = 1; m <= 4; m++) {
14200 GemmMicrokernelTester()
14201 .mr(4)
14202 .nr(16)
14203 .kr(1)
14204 .sr(1)
14205 .m(m)
14206 .n(n)
14207 .k(k)
14208 .iterations(1)
14209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14210 }
14211 }
14212 }
14213 }
14214
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8)14215 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
14216 TEST_REQUIRES_ARM_NEON_V8;
14217 for (size_t k = 16; k <= 80; k += 8) {
14218 GemmMicrokernelTester()
14219 .mr(4)
14220 .nr(16)
14221 .kr(1)
14222 .sr(1)
14223 .m(4)
14224 .n(16)
14225 .k(k)
14226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14227 }
14228 }
14229
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8_subtile)14230 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
14231 TEST_REQUIRES_ARM_NEON_V8;
14232 for (size_t k = 16; k <= 80; k += 8) {
14233 for (uint32_t n = 1; n <= 16; n++) {
14234 for (uint32_t m = 1; m <= 4; m++) {
14235 GemmMicrokernelTester()
14236 .mr(4)
14237 .nr(16)
14238 .kr(1)
14239 .sr(1)
14240 .m(m)
14241 .n(n)
14242 .k(k)
14243 .iterations(1)
14244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14245 }
14246 }
14247 }
14248 }
14249
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16)14250 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
14251 TEST_REQUIRES_ARM_NEON_V8;
14252 for (uint32_t n = 17; n < 32; n++) {
14253 for (size_t k = 1; k <= 40; k += 9) {
14254 GemmMicrokernelTester()
14255 .mr(4)
14256 .nr(16)
14257 .kr(1)
14258 .sr(1)
14259 .m(4)
14260 .n(n)
14261 .k(k)
14262 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14263 }
14264 }
14265 }
14266
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)14267 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
14268 TEST_REQUIRES_ARM_NEON_V8;
14269 for (uint32_t n = 17; n < 32; n++) {
14270 for (size_t k = 1; k <= 40; k += 9) {
14271 GemmMicrokernelTester()
14272 .mr(4)
14273 .nr(16)
14274 .kr(1)
14275 .sr(1)
14276 .m(4)
14277 .n(n)
14278 .k(k)
14279 .cn_stride(19)
14280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14281 }
14282 }
14283 }
14284
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_subtile)14285 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
14286 TEST_REQUIRES_ARM_NEON_V8;
14287 for (uint32_t n = 17; n < 32; n++) {
14288 for (size_t k = 1; k <= 40; k += 9) {
14289 for (uint32_t m = 1; m <= 4; m++) {
14290 GemmMicrokernelTester()
14291 .mr(4)
14292 .nr(16)
14293 .kr(1)
14294 .sr(1)
14295 .m(m)
14296 .n(n)
14297 .k(k)
14298 .iterations(1)
14299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14300 }
14301 }
14302 }
14303 }
14304
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16)14305 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
14306 TEST_REQUIRES_ARM_NEON_V8;
14307 for (uint32_t n = 32; n <= 48; n += 16) {
14308 for (size_t k = 1; k <= 40; k += 9) {
14309 GemmMicrokernelTester()
14310 .mr(4)
14311 .nr(16)
14312 .kr(1)
14313 .sr(1)
14314 .m(4)
14315 .n(n)
14316 .k(k)
14317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318 }
14319 }
14320 }
14321
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)14322 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
14323 TEST_REQUIRES_ARM_NEON_V8;
14324 for (uint32_t n = 32; n <= 48; n += 16) {
14325 for (size_t k = 1; k <= 40; k += 9) {
14326 GemmMicrokernelTester()
14327 .mr(4)
14328 .nr(16)
14329 .kr(1)
14330 .sr(1)
14331 .m(4)
14332 .n(n)
14333 .k(k)
14334 .cn_stride(19)
14335 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14336 }
14337 }
14338 }
14339
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_subtile)14340 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
14341 TEST_REQUIRES_ARM_NEON_V8;
14342 for (uint32_t n = 32; n <= 48; n += 16) {
14343 for (size_t k = 1; k <= 40; k += 9) {
14344 for (uint32_t m = 1; m <= 4; m++) {
14345 GemmMicrokernelTester()
14346 .mr(4)
14347 .nr(16)
14348 .kr(1)
14349 .sr(1)
14350 .m(m)
14351 .n(n)
14352 .k(k)
14353 .iterations(1)
14354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14355 }
14356 }
14357 }
14358 }
14359
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel)14360 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel) {
14361 TEST_REQUIRES_ARM_NEON_V8;
14362 for (size_t k = 1; k <= 40; k += 9) {
14363 GemmMicrokernelTester()
14364 .mr(4)
14365 .nr(16)
14366 .kr(1)
14367 .sr(1)
14368 .m(4)
14369 .n(16)
14370 .k(k)
14371 .ks(3)
14372 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14373 }
14374 }
14375
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel_subtile)14376 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
14377 TEST_REQUIRES_ARM_NEON_V8;
14378 for (size_t k = 1; k <= 40; k += 9) {
14379 for (uint32_t n = 1; n <= 16; n++) {
14380 for (uint32_t m = 1; m <= 4; m++) {
14381 GemmMicrokernelTester()
14382 .mr(4)
14383 .nr(16)
14384 .kr(1)
14385 .sr(1)
14386 .m(m)
14387 .n(n)
14388 .k(k)
14389 .ks(3)
14390 .iterations(1)
14391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14392 }
14393 }
14394 }
14395 }
14396
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)14397 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
14398 TEST_REQUIRES_ARM_NEON_V8;
14399 for (uint32_t n = 17; n < 32; n++) {
14400 for (size_t k = 1; k <= 40; k += 9) {
14401 GemmMicrokernelTester()
14402 .mr(4)
14403 .nr(16)
14404 .kr(1)
14405 .sr(1)
14406 .m(4)
14407 .n(n)
14408 .k(k)
14409 .ks(3)
14410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14411 }
14412 }
14413 }
14414
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)14415 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
14416 TEST_REQUIRES_ARM_NEON_V8;
14417 for (uint32_t n = 32; n <= 48; n += 16) {
14418 for (size_t k = 1; k <= 40; k += 9) {
14419 GemmMicrokernelTester()
14420 .mr(4)
14421 .nr(16)
14422 .kr(1)
14423 .sr(1)
14424 .m(4)
14425 .n(n)
14426 .k(k)
14427 .ks(3)
14428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14429 }
14430 }
14431 }
14432
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm_subtile)14433 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
14434 TEST_REQUIRES_ARM_NEON_V8;
14435 for (size_t k = 1; k <= 40; k += 9) {
14436 for (uint32_t n = 1; n <= 16; n++) {
14437 for (uint32_t m = 1; m <= 4; m++) {
14438 GemmMicrokernelTester()
14439 .mr(4)
14440 .nr(16)
14441 .kr(1)
14442 .sr(1)
14443 .m(m)
14444 .n(n)
14445 .k(k)
14446 .cm_stride(19)
14447 .iterations(1)
14448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14449 }
14450 }
14451 }
14452 }
14453
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,a_offset)14454 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, a_offset) {
14455 TEST_REQUIRES_ARM_NEON_V8;
14456 for (size_t k = 1; k <= 40; k += 9) {
14457 GemmMicrokernelTester()
14458 .mr(4)
14459 .nr(16)
14460 .kr(1)
14461 .sr(1)
14462 .m(4)
14463 .n(16)
14464 .k(k)
14465 .ks(3)
14466 .a_offset(163)
14467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14468 }
14469 }
14470
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,zero)14471 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, zero) {
14472 TEST_REQUIRES_ARM_NEON_V8;
14473 for (size_t k = 1; k <= 40; k += 9) {
14474 for (uint32_t mz = 0; mz < 4; mz++) {
14475 GemmMicrokernelTester()
14476 .mr(4)
14477 .nr(16)
14478 .kr(1)
14479 .sr(1)
14480 .m(4)
14481 .n(16)
14482 .k(k)
14483 .ks(3)
14484 .a_offset(163)
14485 .zero_index(mz)
14486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14487 }
14488 }
14489 }
14490
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmin)14491 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
14492 TEST_REQUIRES_ARM_NEON_V8;
14493 GemmMicrokernelTester()
14494 .mr(4)
14495 .nr(16)
14496 .kr(1)
14497 .sr(1)
14498 .m(4)
14499 .n(16)
14500 .k(8)
14501 .qmin(128)
14502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14503 }
14504
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmax)14505 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
14506 TEST_REQUIRES_ARM_NEON_V8;
14507 GemmMicrokernelTester()
14508 .mr(4)
14509 .nr(16)
14510 .kr(1)
14511 .sr(1)
14512 .m(4)
14513 .n(16)
14514 .k(8)
14515 .qmax(128)
14516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14517 }
14518
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm)14519 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
14520 TEST_REQUIRES_ARM_NEON_V8;
14521 GemmMicrokernelTester()
14522 .mr(4)
14523 .nr(16)
14524 .kr(1)
14525 .sr(1)
14526 .m(4)
14527 .n(16)
14528 .k(8)
14529 .cm_stride(19)
14530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14531 }
14532 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14533
14534
14535 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8)14536 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
14537 TEST_REQUIRES_X86_SSE2;
14538 GemmMicrokernelTester()
14539 .mr(1)
14540 .nr(4)
14541 .kr(2)
14542 .sr(1)
14543 .m(1)
14544 .n(4)
14545 .k(8)
14546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14547 }
14548
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cn)14549 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
14550 TEST_REQUIRES_X86_SSE2;
14551 GemmMicrokernelTester()
14552 .mr(1)
14553 .nr(4)
14554 .kr(2)
14555 .sr(1)
14556 .m(1)
14557 .n(4)
14558 .k(8)
14559 .cn_stride(7)
14560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14561 }
14562
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile)14563 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
14564 TEST_REQUIRES_X86_SSE2;
14565 for (uint32_t n = 1; n <= 4; n++) {
14566 for (uint32_t m = 1; m <= 1; m++) {
14567 GemmMicrokernelTester()
14568 .mr(1)
14569 .nr(4)
14570 .kr(2)
14571 .sr(1)
14572 .m(m)
14573 .n(n)
14574 .k(8)
14575 .iterations(1)
14576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14577 }
14578 }
14579 }
14580
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_m)14581 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
14582 TEST_REQUIRES_X86_SSE2;
14583 for (uint32_t m = 1; m <= 1; m++) {
14584 GemmMicrokernelTester()
14585 .mr(1)
14586 .nr(4)
14587 .kr(2)
14588 .sr(1)
14589 .m(m)
14590 .n(4)
14591 .k(8)
14592 .iterations(1)
14593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14594 }
14595 }
14596
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_n)14597 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
14598 TEST_REQUIRES_X86_SSE2;
14599 for (uint32_t n = 1; n <= 4; n++) {
14600 GemmMicrokernelTester()
14601 .mr(1)
14602 .nr(4)
14603 .kr(2)
14604 .sr(1)
14605 .m(1)
14606 .n(n)
14607 .k(8)
14608 .iterations(1)
14609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14610 }
14611 }
14612
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8)14613 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
14614 TEST_REQUIRES_X86_SSE2;
14615 for (size_t k = 1; k < 8; k++) {
14616 GemmMicrokernelTester()
14617 .mr(1)
14618 .nr(4)
14619 .kr(2)
14620 .sr(1)
14621 .m(1)
14622 .n(4)
14623 .k(k)
14624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14625 }
14626 }
14627
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8_subtile)14628 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
14629 TEST_REQUIRES_X86_SSE2;
14630 for (size_t k = 1; k < 8; k++) {
14631 for (uint32_t n = 1; n <= 4; n++) {
14632 for (uint32_t m = 1; m <= 1; m++) {
14633 GemmMicrokernelTester()
14634 .mr(1)
14635 .nr(4)
14636 .kr(2)
14637 .sr(1)
14638 .m(m)
14639 .n(n)
14640 .k(k)
14641 .iterations(1)
14642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14643 }
14644 }
14645 }
14646 }
14647
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8)14648 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
14649 TEST_REQUIRES_X86_SSE2;
14650 for (size_t k = 9; k < 16; k++) {
14651 GemmMicrokernelTester()
14652 .mr(1)
14653 .nr(4)
14654 .kr(2)
14655 .sr(1)
14656 .m(1)
14657 .n(4)
14658 .k(k)
14659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14660 }
14661 }
14662
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8_subtile)14663 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
14664 TEST_REQUIRES_X86_SSE2;
14665 for (size_t k = 9; k < 16; k++) {
14666 for (uint32_t n = 1; n <= 4; n++) {
14667 for (uint32_t m = 1; m <= 1; m++) {
14668 GemmMicrokernelTester()
14669 .mr(1)
14670 .nr(4)
14671 .kr(2)
14672 .sr(1)
14673 .m(m)
14674 .n(n)
14675 .k(k)
14676 .iterations(1)
14677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14678 }
14679 }
14680 }
14681 }
14682
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8)14683 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
14684 TEST_REQUIRES_X86_SSE2;
14685 for (size_t k = 16; k <= 80; k += 8) {
14686 GemmMicrokernelTester()
14687 .mr(1)
14688 .nr(4)
14689 .kr(2)
14690 .sr(1)
14691 .m(1)
14692 .n(4)
14693 .k(k)
14694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14695 }
14696 }
14697
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8_subtile)14698 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
14699 TEST_REQUIRES_X86_SSE2;
14700 for (size_t k = 16; k <= 80; k += 8) {
14701 for (uint32_t n = 1; n <= 4; n++) {
14702 for (uint32_t m = 1; m <= 1; m++) {
14703 GemmMicrokernelTester()
14704 .mr(1)
14705 .nr(4)
14706 .kr(2)
14707 .sr(1)
14708 .m(m)
14709 .n(n)
14710 .k(k)
14711 .iterations(1)
14712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14713 }
14714 }
14715 }
14716 }
14717
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4)14718 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
14719 TEST_REQUIRES_X86_SSE2;
14720 for (uint32_t n = 5; n < 8; n++) {
14721 for (size_t k = 1; k <= 40; k += 9) {
14722 GemmMicrokernelTester()
14723 .mr(1)
14724 .nr(4)
14725 .kr(2)
14726 .sr(1)
14727 .m(1)
14728 .n(n)
14729 .k(k)
14730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14731 }
14732 }
14733 }
14734
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_strided_cn)14735 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
14736 TEST_REQUIRES_X86_SSE2;
14737 for (uint32_t n = 5; n < 8; n++) {
14738 for (size_t k = 1; k <= 40; k += 9) {
14739 GemmMicrokernelTester()
14740 .mr(1)
14741 .nr(4)
14742 .kr(2)
14743 .sr(1)
14744 .m(1)
14745 .n(n)
14746 .k(k)
14747 .cn_stride(7)
14748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14749 }
14750 }
14751 }
14752
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_subtile)14753 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
14754 TEST_REQUIRES_X86_SSE2;
14755 for (uint32_t n = 5; n < 8; n++) {
14756 for (size_t k = 1; k <= 40; k += 9) {
14757 for (uint32_t m = 1; m <= 1; m++) {
14758 GemmMicrokernelTester()
14759 .mr(1)
14760 .nr(4)
14761 .kr(2)
14762 .sr(1)
14763 .m(m)
14764 .n(n)
14765 .k(k)
14766 .iterations(1)
14767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14768 }
14769 }
14770 }
14771 }
14772
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4)14773 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
14774 TEST_REQUIRES_X86_SSE2;
14775 for (uint32_t n = 8; n <= 12; n += 4) {
14776 for (size_t k = 1; k <= 40; k += 9) {
14777 GemmMicrokernelTester()
14778 .mr(1)
14779 .nr(4)
14780 .kr(2)
14781 .sr(1)
14782 .m(1)
14783 .n(n)
14784 .k(k)
14785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14786 }
14787 }
14788 }
14789
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_strided_cn)14790 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
14791 TEST_REQUIRES_X86_SSE2;
14792 for (uint32_t n = 8; n <= 12; n += 4) {
14793 for (size_t k = 1; k <= 40; k += 9) {
14794 GemmMicrokernelTester()
14795 .mr(1)
14796 .nr(4)
14797 .kr(2)
14798 .sr(1)
14799 .m(1)
14800 .n(n)
14801 .k(k)
14802 .cn_stride(7)
14803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14804 }
14805 }
14806 }
14807
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_subtile)14808 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
14809 TEST_REQUIRES_X86_SSE2;
14810 for (uint32_t n = 8; n <= 12; n += 4) {
14811 for (size_t k = 1; k <= 40; k += 9) {
14812 for (uint32_t m = 1; m <= 1; m++) {
14813 GemmMicrokernelTester()
14814 .mr(1)
14815 .nr(4)
14816 .kr(2)
14817 .sr(1)
14818 .m(m)
14819 .n(n)
14820 .k(k)
14821 .iterations(1)
14822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14823 }
14824 }
14825 }
14826 }
14827
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel)14828 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel) {
14829 TEST_REQUIRES_X86_SSE2;
14830 for (size_t k = 1; k <= 40; k += 9) {
14831 GemmMicrokernelTester()
14832 .mr(1)
14833 .nr(4)
14834 .kr(2)
14835 .sr(1)
14836 .m(1)
14837 .n(4)
14838 .k(k)
14839 .ks(3)
14840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14841 }
14842 }
14843
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel_subtile)14844 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel_subtile) {
14845 TEST_REQUIRES_X86_SSE2;
14846 for (size_t k = 1; k <= 40; k += 9) {
14847 for (uint32_t n = 1; n <= 4; n++) {
14848 for (uint32_t m = 1; m <= 1; m++) {
14849 GemmMicrokernelTester()
14850 .mr(1)
14851 .nr(4)
14852 .kr(2)
14853 .sr(1)
14854 .m(m)
14855 .n(n)
14856 .k(k)
14857 .ks(3)
14858 .iterations(1)
14859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14860 }
14861 }
14862 }
14863 }
14864
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_small_kernel)14865 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
14866 TEST_REQUIRES_X86_SSE2;
14867 for (uint32_t n = 5; n < 8; n++) {
14868 for (size_t k = 1; k <= 40; k += 9) {
14869 GemmMicrokernelTester()
14870 .mr(1)
14871 .nr(4)
14872 .kr(2)
14873 .sr(1)
14874 .m(1)
14875 .n(n)
14876 .k(k)
14877 .ks(3)
14878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14879 }
14880 }
14881 }
14882
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_small_kernel)14883 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
14884 TEST_REQUIRES_X86_SSE2;
14885 for (uint32_t n = 8; n <= 12; n += 4) {
14886 for (size_t k = 1; k <= 40; k += 9) {
14887 GemmMicrokernelTester()
14888 .mr(1)
14889 .nr(4)
14890 .kr(2)
14891 .sr(1)
14892 .m(1)
14893 .n(n)
14894 .k(k)
14895 .ks(3)
14896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14897 }
14898 }
14899 }
14900
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm_subtile)14901 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
14902 TEST_REQUIRES_X86_SSE2;
14903 for (size_t k = 1; k <= 40; k += 9) {
14904 for (uint32_t n = 1; n <= 4; n++) {
14905 for (uint32_t m = 1; m <= 1; m++) {
14906 GemmMicrokernelTester()
14907 .mr(1)
14908 .nr(4)
14909 .kr(2)
14910 .sr(1)
14911 .m(m)
14912 .n(n)
14913 .k(k)
14914 .cm_stride(7)
14915 .iterations(1)
14916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14917 }
14918 }
14919 }
14920 }
14921
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,a_offset)14922 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, a_offset) {
14923 TEST_REQUIRES_X86_SSE2;
14924 for (size_t k = 1; k <= 40; k += 9) {
14925 GemmMicrokernelTester()
14926 .mr(1)
14927 .nr(4)
14928 .kr(2)
14929 .sr(1)
14930 .m(1)
14931 .n(4)
14932 .k(k)
14933 .ks(3)
14934 .a_offset(43)
14935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14936 }
14937 }
14938
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,zero)14939 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, zero) {
14940 TEST_REQUIRES_X86_SSE2;
14941 for (size_t k = 1; k <= 40; k += 9) {
14942 for (uint32_t mz = 0; mz < 1; mz++) {
14943 GemmMicrokernelTester()
14944 .mr(1)
14945 .nr(4)
14946 .kr(2)
14947 .sr(1)
14948 .m(1)
14949 .n(4)
14950 .k(k)
14951 .ks(3)
14952 .a_offset(43)
14953 .zero_index(mz)
14954 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14955 }
14956 }
14957 }
14958
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmin)14959 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
14960 TEST_REQUIRES_X86_SSE2;
14961 GemmMicrokernelTester()
14962 .mr(1)
14963 .nr(4)
14964 .kr(2)
14965 .sr(1)
14966 .m(1)
14967 .n(4)
14968 .k(8)
14969 .qmin(128)
14970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14971 }
14972
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmax)14973 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
14974 TEST_REQUIRES_X86_SSE2;
14975 GemmMicrokernelTester()
14976 .mr(1)
14977 .nr(4)
14978 .kr(2)
14979 .sr(1)
14980 .m(1)
14981 .n(4)
14982 .k(8)
14983 .qmax(128)
14984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14985 }
14986
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm)14987 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
14988 TEST_REQUIRES_X86_SSE2;
14989 GemmMicrokernelTester()
14990 .mr(1)
14991 .nr(4)
14992 .kr(2)
14993 .sr(1)
14994 .m(1)
14995 .n(4)
14996 .k(8)
14997 .cm_stride(7)
14998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
14999 }
15000 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15001
15002
15003 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8)15004 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
15005 TEST_REQUIRES_X86_SSE41;
15006 GemmMicrokernelTester()
15007 .mr(1)
15008 .nr(4)
15009 .kr(2)
15010 .sr(1)
15011 .m(1)
15012 .n(4)
15013 .k(8)
15014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15015 }
15016
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cn)15017 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
15018 TEST_REQUIRES_X86_SSE41;
15019 GemmMicrokernelTester()
15020 .mr(1)
15021 .nr(4)
15022 .kr(2)
15023 .sr(1)
15024 .m(1)
15025 .n(4)
15026 .k(8)
15027 .cn_stride(7)
15028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15029 }
15030
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile)15031 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
15032 TEST_REQUIRES_X86_SSE41;
15033 for (uint32_t n = 1; n <= 4; n++) {
15034 for (uint32_t m = 1; m <= 1; m++) {
15035 GemmMicrokernelTester()
15036 .mr(1)
15037 .nr(4)
15038 .kr(2)
15039 .sr(1)
15040 .m(m)
15041 .n(n)
15042 .k(8)
15043 .iterations(1)
15044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15045 }
15046 }
15047 }
15048
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_m)15049 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
15050 TEST_REQUIRES_X86_SSE41;
15051 for (uint32_t m = 1; m <= 1; m++) {
15052 GemmMicrokernelTester()
15053 .mr(1)
15054 .nr(4)
15055 .kr(2)
15056 .sr(1)
15057 .m(m)
15058 .n(4)
15059 .k(8)
15060 .iterations(1)
15061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15062 }
15063 }
15064
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_n)15065 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
15066 TEST_REQUIRES_X86_SSE41;
15067 for (uint32_t n = 1; n <= 4; n++) {
15068 GemmMicrokernelTester()
15069 .mr(1)
15070 .nr(4)
15071 .kr(2)
15072 .sr(1)
15073 .m(1)
15074 .n(n)
15075 .k(8)
15076 .iterations(1)
15077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15078 }
15079 }
15080
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8)15081 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
15082 TEST_REQUIRES_X86_SSE41;
15083 for (size_t k = 1; k < 8; k++) {
15084 GemmMicrokernelTester()
15085 .mr(1)
15086 .nr(4)
15087 .kr(2)
15088 .sr(1)
15089 .m(1)
15090 .n(4)
15091 .k(k)
15092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15093 }
15094 }
15095
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8_subtile)15096 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
15097 TEST_REQUIRES_X86_SSE41;
15098 for (size_t k = 1; k < 8; k++) {
15099 for (uint32_t n = 1; n <= 4; n++) {
15100 for (uint32_t m = 1; m <= 1; m++) {
15101 GemmMicrokernelTester()
15102 .mr(1)
15103 .nr(4)
15104 .kr(2)
15105 .sr(1)
15106 .m(m)
15107 .n(n)
15108 .k(k)
15109 .iterations(1)
15110 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15111 }
15112 }
15113 }
15114 }
15115
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8)15116 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
15117 TEST_REQUIRES_X86_SSE41;
15118 for (size_t k = 9; k < 16; k++) {
15119 GemmMicrokernelTester()
15120 .mr(1)
15121 .nr(4)
15122 .kr(2)
15123 .sr(1)
15124 .m(1)
15125 .n(4)
15126 .k(k)
15127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15128 }
15129 }
15130
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8_subtile)15131 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
15132 TEST_REQUIRES_X86_SSE41;
15133 for (size_t k = 9; k < 16; k++) {
15134 for (uint32_t n = 1; n <= 4; n++) {
15135 for (uint32_t m = 1; m <= 1; m++) {
15136 GemmMicrokernelTester()
15137 .mr(1)
15138 .nr(4)
15139 .kr(2)
15140 .sr(1)
15141 .m(m)
15142 .n(n)
15143 .k(k)
15144 .iterations(1)
15145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15146 }
15147 }
15148 }
15149 }
15150
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8)15151 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
15152 TEST_REQUIRES_X86_SSE41;
15153 for (size_t k = 16; k <= 80; k += 8) {
15154 GemmMicrokernelTester()
15155 .mr(1)
15156 .nr(4)
15157 .kr(2)
15158 .sr(1)
15159 .m(1)
15160 .n(4)
15161 .k(k)
15162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15163 }
15164 }
15165
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8_subtile)15166 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
15167 TEST_REQUIRES_X86_SSE41;
15168 for (size_t k = 16; k <= 80; k += 8) {
15169 for (uint32_t n = 1; n <= 4; n++) {
15170 for (uint32_t m = 1; m <= 1; m++) {
15171 GemmMicrokernelTester()
15172 .mr(1)
15173 .nr(4)
15174 .kr(2)
15175 .sr(1)
15176 .m(m)
15177 .n(n)
15178 .k(k)
15179 .iterations(1)
15180 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15181 }
15182 }
15183 }
15184 }
15185
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4)15186 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
15187 TEST_REQUIRES_X86_SSE41;
15188 for (uint32_t n = 5; n < 8; n++) {
15189 for (size_t k = 1; k <= 40; k += 9) {
15190 GemmMicrokernelTester()
15191 .mr(1)
15192 .nr(4)
15193 .kr(2)
15194 .sr(1)
15195 .m(1)
15196 .n(n)
15197 .k(k)
15198 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15199 }
15200 }
15201 }
15202
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_strided_cn)15203 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
15204 TEST_REQUIRES_X86_SSE41;
15205 for (uint32_t n = 5; n < 8; n++) {
15206 for (size_t k = 1; k <= 40; k += 9) {
15207 GemmMicrokernelTester()
15208 .mr(1)
15209 .nr(4)
15210 .kr(2)
15211 .sr(1)
15212 .m(1)
15213 .n(n)
15214 .k(k)
15215 .cn_stride(7)
15216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15217 }
15218 }
15219 }
15220
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_subtile)15221 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
15222 TEST_REQUIRES_X86_SSE41;
15223 for (uint32_t n = 5; n < 8; n++) {
15224 for (size_t k = 1; k <= 40; k += 9) {
15225 for (uint32_t m = 1; m <= 1; m++) {
15226 GemmMicrokernelTester()
15227 .mr(1)
15228 .nr(4)
15229 .kr(2)
15230 .sr(1)
15231 .m(m)
15232 .n(n)
15233 .k(k)
15234 .iterations(1)
15235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15236 }
15237 }
15238 }
15239 }
15240
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4)15241 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
15242 TEST_REQUIRES_X86_SSE41;
15243 for (uint32_t n = 8; n <= 12; n += 4) {
15244 for (size_t k = 1; k <= 40; k += 9) {
15245 GemmMicrokernelTester()
15246 .mr(1)
15247 .nr(4)
15248 .kr(2)
15249 .sr(1)
15250 .m(1)
15251 .n(n)
15252 .k(k)
15253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15254 }
15255 }
15256 }
15257
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_strided_cn)15258 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
15259 TEST_REQUIRES_X86_SSE41;
15260 for (uint32_t n = 8; n <= 12; n += 4) {
15261 for (size_t k = 1; k <= 40; k += 9) {
15262 GemmMicrokernelTester()
15263 .mr(1)
15264 .nr(4)
15265 .kr(2)
15266 .sr(1)
15267 .m(1)
15268 .n(n)
15269 .k(k)
15270 .cn_stride(7)
15271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15272 }
15273 }
15274 }
15275
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_subtile)15276 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
15277 TEST_REQUIRES_X86_SSE41;
15278 for (uint32_t n = 8; n <= 12; n += 4) {
15279 for (size_t k = 1; k <= 40; k += 9) {
15280 for (uint32_t m = 1; m <= 1; m++) {
15281 GemmMicrokernelTester()
15282 .mr(1)
15283 .nr(4)
15284 .kr(2)
15285 .sr(1)
15286 .m(m)
15287 .n(n)
15288 .k(k)
15289 .iterations(1)
15290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15291 }
15292 }
15293 }
15294 }
15295
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel)15296 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel) {
15297 TEST_REQUIRES_X86_SSE41;
15298 for (size_t k = 1; k <= 40; k += 9) {
15299 GemmMicrokernelTester()
15300 .mr(1)
15301 .nr(4)
15302 .kr(2)
15303 .sr(1)
15304 .m(1)
15305 .n(4)
15306 .k(k)
15307 .ks(3)
15308 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15309 }
15310 }
15311
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel_subtile)15312 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel_subtile) {
15313 TEST_REQUIRES_X86_SSE41;
15314 for (size_t k = 1; k <= 40; k += 9) {
15315 for (uint32_t n = 1; n <= 4; n++) {
15316 for (uint32_t m = 1; m <= 1; m++) {
15317 GemmMicrokernelTester()
15318 .mr(1)
15319 .nr(4)
15320 .kr(2)
15321 .sr(1)
15322 .m(m)
15323 .n(n)
15324 .k(k)
15325 .ks(3)
15326 .iterations(1)
15327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15328 }
15329 }
15330 }
15331 }
15332
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_small_kernel)15333 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
15334 TEST_REQUIRES_X86_SSE41;
15335 for (uint32_t n = 5; n < 8; n++) {
15336 for (size_t k = 1; k <= 40; k += 9) {
15337 GemmMicrokernelTester()
15338 .mr(1)
15339 .nr(4)
15340 .kr(2)
15341 .sr(1)
15342 .m(1)
15343 .n(n)
15344 .k(k)
15345 .ks(3)
15346 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15347 }
15348 }
15349 }
15350
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_small_kernel)15351 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
15352 TEST_REQUIRES_X86_SSE41;
15353 for (uint32_t n = 8; n <= 12; n += 4) {
15354 for (size_t k = 1; k <= 40; k += 9) {
15355 GemmMicrokernelTester()
15356 .mr(1)
15357 .nr(4)
15358 .kr(2)
15359 .sr(1)
15360 .m(1)
15361 .n(n)
15362 .k(k)
15363 .ks(3)
15364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15365 }
15366 }
15367 }
15368
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm_subtile)15369 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
15370 TEST_REQUIRES_X86_SSE41;
15371 for (size_t k = 1; k <= 40; k += 9) {
15372 for (uint32_t n = 1; n <= 4; n++) {
15373 for (uint32_t m = 1; m <= 1; m++) {
15374 GemmMicrokernelTester()
15375 .mr(1)
15376 .nr(4)
15377 .kr(2)
15378 .sr(1)
15379 .m(m)
15380 .n(n)
15381 .k(k)
15382 .cm_stride(7)
15383 .iterations(1)
15384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15385 }
15386 }
15387 }
15388 }
15389
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,a_offset)15390 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, a_offset) {
15391 TEST_REQUIRES_X86_SSE41;
15392 for (size_t k = 1; k <= 40; k += 9) {
15393 GemmMicrokernelTester()
15394 .mr(1)
15395 .nr(4)
15396 .kr(2)
15397 .sr(1)
15398 .m(1)
15399 .n(4)
15400 .k(k)
15401 .ks(3)
15402 .a_offset(43)
15403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15404 }
15405 }
15406
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,zero)15407 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, zero) {
15408 TEST_REQUIRES_X86_SSE41;
15409 for (size_t k = 1; k <= 40; k += 9) {
15410 for (uint32_t mz = 0; mz < 1; mz++) {
15411 GemmMicrokernelTester()
15412 .mr(1)
15413 .nr(4)
15414 .kr(2)
15415 .sr(1)
15416 .m(1)
15417 .n(4)
15418 .k(k)
15419 .ks(3)
15420 .a_offset(43)
15421 .zero_index(mz)
15422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15423 }
15424 }
15425 }
15426
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmin)15427 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
15428 TEST_REQUIRES_X86_SSE41;
15429 GemmMicrokernelTester()
15430 .mr(1)
15431 .nr(4)
15432 .kr(2)
15433 .sr(1)
15434 .m(1)
15435 .n(4)
15436 .k(8)
15437 .qmin(128)
15438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15439 }
15440
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmax)15441 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
15442 TEST_REQUIRES_X86_SSE41;
15443 GemmMicrokernelTester()
15444 .mr(1)
15445 .nr(4)
15446 .kr(2)
15447 .sr(1)
15448 .m(1)
15449 .n(4)
15450 .k(8)
15451 .qmax(128)
15452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15453 }
15454
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm)15455 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
15456 TEST_REQUIRES_X86_SSE41;
15457 GemmMicrokernelTester()
15458 .mr(1)
15459 .nr(4)
15460 .kr(2)
15461 .sr(1)
15462 .m(1)
15463 .n(4)
15464 .k(8)
15465 .cm_stride(7)
15466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15467 }
15468 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15469
15470
15471 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8)15472 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8) {
15473 TEST_REQUIRES_X86_SSE2;
15474 GemmMicrokernelTester()
15475 .mr(2)
15476 .nr(4)
15477 .kr(2)
15478 .sr(1)
15479 .m(2)
15480 .n(4)
15481 .k(8)
15482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15483 }
15484
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cn)15485 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cn) {
15486 TEST_REQUIRES_X86_SSE2;
15487 GemmMicrokernelTester()
15488 .mr(2)
15489 .nr(4)
15490 .kr(2)
15491 .sr(1)
15492 .m(2)
15493 .n(4)
15494 .k(8)
15495 .cn_stride(7)
15496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15497 }
15498
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile)15499 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile) {
15500 TEST_REQUIRES_X86_SSE2;
15501 for (uint32_t n = 1; n <= 4; n++) {
15502 for (uint32_t m = 1; m <= 2; m++) {
15503 GemmMicrokernelTester()
15504 .mr(2)
15505 .nr(4)
15506 .kr(2)
15507 .sr(1)
15508 .m(m)
15509 .n(n)
15510 .k(8)
15511 .iterations(1)
15512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15513 }
15514 }
15515 }
15516
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_m)15517 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
15518 TEST_REQUIRES_X86_SSE2;
15519 for (uint32_t m = 1; m <= 2; m++) {
15520 GemmMicrokernelTester()
15521 .mr(2)
15522 .nr(4)
15523 .kr(2)
15524 .sr(1)
15525 .m(m)
15526 .n(4)
15527 .k(8)
15528 .iterations(1)
15529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15530 }
15531 }
15532
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_n)15533 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
15534 TEST_REQUIRES_X86_SSE2;
15535 for (uint32_t n = 1; n <= 4; n++) {
15536 GemmMicrokernelTester()
15537 .mr(2)
15538 .nr(4)
15539 .kr(2)
15540 .sr(1)
15541 .m(2)
15542 .n(n)
15543 .k(8)
15544 .iterations(1)
15545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15546 }
15547 }
15548
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8)15549 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8) {
15550 TEST_REQUIRES_X86_SSE2;
15551 for (size_t k = 1; k < 8; k++) {
15552 GemmMicrokernelTester()
15553 .mr(2)
15554 .nr(4)
15555 .kr(2)
15556 .sr(1)
15557 .m(2)
15558 .n(4)
15559 .k(k)
15560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15561 }
15562 }
15563
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8_subtile)15564 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_subtile) {
15565 TEST_REQUIRES_X86_SSE2;
15566 for (size_t k = 1; k < 8; k++) {
15567 for (uint32_t n = 1; n <= 4; n++) {
15568 for (uint32_t m = 1; m <= 2; m++) {
15569 GemmMicrokernelTester()
15570 .mr(2)
15571 .nr(4)
15572 .kr(2)
15573 .sr(1)
15574 .m(m)
15575 .n(n)
15576 .k(k)
15577 .iterations(1)
15578 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15579 }
15580 }
15581 }
15582 }
15583
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8)15584 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8) {
15585 TEST_REQUIRES_X86_SSE2;
15586 for (size_t k = 9; k < 16; k++) {
15587 GemmMicrokernelTester()
15588 .mr(2)
15589 .nr(4)
15590 .kr(2)
15591 .sr(1)
15592 .m(2)
15593 .n(4)
15594 .k(k)
15595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15596 }
15597 }
15598
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8_subtile)15599 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_subtile) {
15600 TEST_REQUIRES_X86_SSE2;
15601 for (size_t k = 9; k < 16; k++) {
15602 for (uint32_t n = 1; n <= 4; n++) {
15603 for (uint32_t m = 1; m <= 2; m++) {
15604 GemmMicrokernelTester()
15605 .mr(2)
15606 .nr(4)
15607 .kr(2)
15608 .sr(1)
15609 .m(m)
15610 .n(n)
15611 .k(k)
15612 .iterations(1)
15613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15614 }
15615 }
15616 }
15617 }
15618
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8)15619 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8) {
15620 TEST_REQUIRES_X86_SSE2;
15621 for (size_t k = 16; k <= 80; k += 8) {
15622 GemmMicrokernelTester()
15623 .mr(2)
15624 .nr(4)
15625 .kr(2)
15626 .sr(1)
15627 .m(2)
15628 .n(4)
15629 .k(k)
15630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15631 }
15632 }
15633
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8_subtile)15634 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_subtile) {
15635 TEST_REQUIRES_X86_SSE2;
15636 for (size_t k = 16; k <= 80; k += 8) {
15637 for (uint32_t n = 1; n <= 4; n++) {
15638 for (uint32_t m = 1; m <= 2; m++) {
15639 GemmMicrokernelTester()
15640 .mr(2)
15641 .nr(4)
15642 .kr(2)
15643 .sr(1)
15644 .m(m)
15645 .n(n)
15646 .k(k)
15647 .iterations(1)
15648 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15649 }
15650 }
15651 }
15652 }
15653
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4)15654 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4) {
15655 TEST_REQUIRES_X86_SSE2;
15656 for (uint32_t n = 5; n < 8; n++) {
15657 for (size_t k = 1; k <= 40; k += 9) {
15658 GemmMicrokernelTester()
15659 .mr(2)
15660 .nr(4)
15661 .kr(2)
15662 .sr(1)
15663 .m(2)
15664 .n(n)
15665 .k(k)
15666 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15667 }
15668 }
15669 }
15670
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_strided_cn)15671 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
15672 TEST_REQUIRES_X86_SSE2;
15673 for (uint32_t n = 5; n < 8; n++) {
15674 for (size_t k = 1; k <= 40; k += 9) {
15675 GemmMicrokernelTester()
15676 .mr(2)
15677 .nr(4)
15678 .kr(2)
15679 .sr(1)
15680 .m(2)
15681 .n(n)
15682 .k(k)
15683 .cn_stride(7)
15684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15685 }
15686 }
15687 }
15688
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_subtile)15689 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_subtile) {
15690 TEST_REQUIRES_X86_SSE2;
15691 for (uint32_t n = 5; n < 8; n++) {
15692 for (size_t k = 1; k <= 40; k += 9) {
15693 for (uint32_t m = 1; m <= 2; m++) {
15694 GemmMicrokernelTester()
15695 .mr(2)
15696 .nr(4)
15697 .kr(2)
15698 .sr(1)
15699 .m(m)
15700 .n(n)
15701 .k(k)
15702 .iterations(1)
15703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15704 }
15705 }
15706 }
15707 }
15708
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4)15709 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4) {
15710 TEST_REQUIRES_X86_SSE2;
15711 for (uint32_t n = 8; n <= 12; n += 4) {
15712 for (size_t k = 1; k <= 40; k += 9) {
15713 GemmMicrokernelTester()
15714 .mr(2)
15715 .nr(4)
15716 .kr(2)
15717 .sr(1)
15718 .m(2)
15719 .n(n)
15720 .k(k)
15721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15722 }
15723 }
15724 }
15725
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_strided_cn)15726 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
15727 TEST_REQUIRES_X86_SSE2;
15728 for (uint32_t n = 8; n <= 12; n += 4) {
15729 for (size_t k = 1; k <= 40; k += 9) {
15730 GemmMicrokernelTester()
15731 .mr(2)
15732 .nr(4)
15733 .kr(2)
15734 .sr(1)
15735 .m(2)
15736 .n(n)
15737 .k(k)
15738 .cn_stride(7)
15739 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15740 }
15741 }
15742 }
15743
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_subtile)15744 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_subtile) {
15745 TEST_REQUIRES_X86_SSE2;
15746 for (uint32_t n = 8; n <= 12; n += 4) {
15747 for (size_t k = 1; k <= 40; k += 9) {
15748 for (uint32_t m = 1; m <= 2; m++) {
15749 GemmMicrokernelTester()
15750 .mr(2)
15751 .nr(4)
15752 .kr(2)
15753 .sr(1)
15754 .m(m)
15755 .n(n)
15756 .k(k)
15757 .iterations(1)
15758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15759 }
15760 }
15761 }
15762 }
15763
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel)15764 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel) {
15765 TEST_REQUIRES_X86_SSE2;
15766 for (size_t k = 1; k <= 40; k += 9) {
15767 GemmMicrokernelTester()
15768 .mr(2)
15769 .nr(4)
15770 .kr(2)
15771 .sr(1)
15772 .m(2)
15773 .n(4)
15774 .k(k)
15775 .ks(3)
15776 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15777 }
15778 }
15779
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel_subtile)15780 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel_subtile) {
15781 TEST_REQUIRES_X86_SSE2;
15782 for (size_t k = 1; k <= 40; k += 9) {
15783 for (uint32_t n = 1; n <= 4; n++) {
15784 for (uint32_t m = 1; m <= 2; m++) {
15785 GemmMicrokernelTester()
15786 .mr(2)
15787 .nr(4)
15788 .kr(2)
15789 .sr(1)
15790 .m(m)
15791 .n(n)
15792 .k(k)
15793 .ks(3)
15794 .iterations(1)
15795 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15796 }
15797 }
15798 }
15799 }
15800
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_small_kernel)15801 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_small_kernel) {
15802 TEST_REQUIRES_X86_SSE2;
15803 for (uint32_t n = 5; n < 8; n++) {
15804 for (size_t k = 1; k <= 40; k += 9) {
15805 GemmMicrokernelTester()
15806 .mr(2)
15807 .nr(4)
15808 .kr(2)
15809 .sr(1)
15810 .m(2)
15811 .n(n)
15812 .k(k)
15813 .ks(3)
15814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15815 }
15816 }
15817 }
15818
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_small_kernel)15819 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_small_kernel) {
15820 TEST_REQUIRES_X86_SSE2;
15821 for (uint32_t n = 8; n <= 12; n += 4) {
15822 for (size_t k = 1; k <= 40; k += 9) {
15823 GemmMicrokernelTester()
15824 .mr(2)
15825 .nr(4)
15826 .kr(2)
15827 .sr(1)
15828 .m(2)
15829 .n(n)
15830 .k(k)
15831 .ks(3)
15832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15833 }
15834 }
15835 }
15836
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm_subtile)15837 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm_subtile) {
15838 TEST_REQUIRES_X86_SSE2;
15839 for (size_t k = 1; k <= 40; k += 9) {
15840 for (uint32_t n = 1; n <= 4; n++) {
15841 for (uint32_t m = 1; m <= 2; m++) {
15842 GemmMicrokernelTester()
15843 .mr(2)
15844 .nr(4)
15845 .kr(2)
15846 .sr(1)
15847 .m(m)
15848 .n(n)
15849 .k(k)
15850 .cm_stride(7)
15851 .iterations(1)
15852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15853 }
15854 }
15855 }
15856 }
15857
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,a_offset)15858 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, a_offset) {
15859 TEST_REQUIRES_X86_SSE2;
15860 for (size_t k = 1; k <= 40; k += 9) {
15861 GemmMicrokernelTester()
15862 .mr(2)
15863 .nr(4)
15864 .kr(2)
15865 .sr(1)
15866 .m(2)
15867 .n(4)
15868 .k(k)
15869 .ks(3)
15870 .a_offset(83)
15871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15872 }
15873 }
15874
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,zero)15875 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, zero) {
15876 TEST_REQUIRES_X86_SSE2;
15877 for (size_t k = 1; k <= 40; k += 9) {
15878 for (uint32_t mz = 0; mz < 2; mz++) {
15879 GemmMicrokernelTester()
15880 .mr(2)
15881 .nr(4)
15882 .kr(2)
15883 .sr(1)
15884 .m(2)
15885 .n(4)
15886 .k(k)
15887 .ks(3)
15888 .a_offset(83)
15889 .zero_index(mz)
15890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15891 }
15892 }
15893 }
15894
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmin)15895 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmin) {
15896 TEST_REQUIRES_X86_SSE2;
15897 GemmMicrokernelTester()
15898 .mr(2)
15899 .nr(4)
15900 .kr(2)
15901 .sr(1)
15902 .m(2)
15903 .n(4)
15904 .k(8)
15905 .qmin(128)
15906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15907 }
15908
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmax)15909 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmax) {
15910 TEST_REQUIRES_X86_SSE2;
15911 GemmMicrokernelTester()
15912 .mr(2)
15913 .nr(4)
15914 .kr(2)
15915 .sr(1)
15916 .m(2)
15917 .n(4)
15918 .k(8)
15919 .qmax(128)
15920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15921 }
15922
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm)15923 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm) {
15924 TEST_REQUIRES_X86_SSE2;
15925 GemmMicrokernelTester()
15926 .mr(2)
15927 .nr(4)
15928 .kr(2)
15929 .sr(1)
15930 .m(2)
15931 .n(4)
15932 .k(8)
15933 .cm_stride(7)
15934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15935 }
15936 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15937
15938
15939 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8)15940 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
15941 TEST_REQUIRES_X86_SSE41;
15942 GemmMicrokernelTester()
15943 .mr(2)
15944 .nr(4)
15945 .kr(2)
15946 .sr(1)
15947 .m(2)
15948 .n(4)
15949 .k(8)
15950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15951 }
15952
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cn)15953 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
15954 TEST_REQUIRES_X86_SSE41;
15955 GemmMicrokernelTester()
15956 .mr(2)
15957 .nr(4)
15958 .kr(2)
15959 .sr(1)
15960 .m(2)
15961 .n(4)
15962 .k(8)
15963 .cn_stride(7)
15964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15965 }
15966
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile)15967 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
15968 TEST_REQUIRES_X86_SSE41;
15969 for (uint32_t n = 1; n <= 4; n++) {
15970 for (uint32_t m = 1; m <= 2; m++) {
15971 GemmMicrokernelTester()
15972 .mr(2)
15973 .nr(4)
15974 .kr(2)
15975 .sr(1)
15976 .m(m)
15977 .n(n)
15978 .k(8)
15979 .iterations(1)
15980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15981 }
15982 }
15983 }
15984
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_m)15985 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
15986 TEST_REQUIRES_X86_SSE41;
15987 for (uint32_t m = 1; m <= 2; m++) {
15988 GemmMicrokernelTester()
15989 .mr(2)
15990 .nr(4)
15991 .kr(2)
15992 .sr(1)
15993 .m(m)
15994 .n(4)
15995 .k(8)
15996 .iterations(1)
15997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15998 }
15999 }
16000
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_n)16001 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
16002 TEST_REQUIRES_X86_SSE41;
16003 for (uint32_t n = 1; n <= 4; n++) {
16004 GemmMicrokernelTester()
16005 .mr(2)
16006 .nr(4)
16007 .kr(2)
16008 .sr(1)
16009 .m(2)
16010 .n(n)
16011 .k(8)
16012 .iterations(1)
16013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16014 }
16015 }
16016
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8)16017 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
16018 TEST_REQUIRES_X86_SSE41;
16019 for (size_t k = 1; k < 8; k++) {
16020 GemmMicrokernelTester()
16021 .mr(2)
16022 .nr(4)
16023 .kr(2)
16024 .sr(1)
16025 .m(2)
16026 .n(4)
16027 .k(k)
16028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16029 }
16030 }
16031
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8_subtile)16032 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
16033 TEST_REQUIRES_X86_SSE41;
16034 for (size_t k = 1; k < 8; k++) {
16035 for (uint32_t n = 1; n <= 4; n++) {
16036 for (uint32_t m = 1; m <= 2; m++) {
16037 GemmMicrokernelTester()
16038 .mr(2)
16039 .nr(4)
16040 .kr(2)
16041 .sr(1)
16042 .m(m)
16043 .n(n)
16044 .k(k)
16045 .iterations(1)
16046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16047 }
16048 }
16049 }
16050 }
16051
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8)16052 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
16053 TEST_REQUIRES_X86_SSE41;
16054 for (size_t k = 9; k < 16; k++) {
16055 GemmMicrokernelTester()
16056 .mr(2)
16057 .nr(4)
16058 .kr(2)
16059 .sr(1)
16060 .m(2)
16061 .n(4)
16062 .k(k)
16063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16064 }
16065 }
16066
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8_subtile)16067 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
16068 TEST_REQUIRES_X86_SSE41;
16069 for (size_t k = 9; k < 16; k++) {
16070 for (uint32_t n = 1; n <= 4; n++) {
16071 for (uint32_t m = 1; m <= 2; m++) {
16072 GemmMicrokernelTester()
16073 .mr(2)
16074 .nr(4)
16075 .kr(2)
16076 .sr(1)
16077 .m(m)
16078 .n(n)
16079 .k(k)
16080 .iterations(1)
16081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16082 }
16083 }
16084 }
16085 }
16086
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8)16087 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
16088 TEST_REQUIRES_X86_SSE41;
16089 for (size_t k = 16; k <= 80; k += 8) {
16090 GemmMicrokernelTester()
16091 .mr(2)
16092 .nr(4)
16093 .kr(2)
16094 .sr(1)
16095 .m(2)
16096 .n(4)
16097 .k(k)
16098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16099 }
16100 }
16101
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8_subtile)16102 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
16103 TEST_REQUIRES_X86_SSE41;
16104 for (size_t k = 16; k <= 80; k += 8) {
16105 for (uint32_t n = 1; n <= 4; n++) {
16106 for (uint32_t m = 1; m <= 2; m++) {
16107 GemmMicrokernelTester()
16108 .mr(2)
16109 .nr(4)
16110 .kr(2)
16111 .sr(1)
16112 .m(m)
16113 .n(n)
16114 .k(k)
16115 .iterations(1)
16116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16117 }
16118 }
16119 }
16120 }
16121
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4)16122 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
16123 TEST_REQUIRES_X86_SSE41;
16124 for (uint32_t n = 5; n < 8; n++) {
16125 for (size_t k = 1; k <= 40; k += 9) {
16126 GemmMicrokernelTester()
16127 .mr(2)
16128 .nr(4)
16129 .kr(2)
16130 .sr(1)
16131 .m(2)
16132 .n(n)
16133 .k(k)
16134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16135 }
16136 }
16137 }
16138
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_strided_cn)16139 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
16140 TEST_REQUIRES_X86_SSE41;
16141 for (uint32_t n = 5; n < 8; n++) {
16142 for (size_t k = 1; k <= 40; k += 9) {
16143 GemmMicrokernelTester()
16144 .mr(2)
16145 .nr(4)
16146 .kr(2)
16147 .sr(1)
16148 .m(2)
16149 .n(n)
16150 .k(k)
16151 .cn_stride(7)
16152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16153 }
16154 }
16155 }
16156
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_subtile)16157 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
16158 TEST_REQUIRES_X86_SSE41;
16159 for (uint32_t n = 5; n < 8; n++) {
16160 for (size_t k = 1; k <= 40; k += 9) {
16161 for (uint32_t m = 1; m <= 2; m++) {
16162 GemmMicrokernelTester()
16163 .mr(2)
16164 .nr(4)
16165 .kr(2)
16166 .sr(1)
16167 .m(m)
16168 .n(n)
16169 .k(k)
16170 .iterations(1)
16171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16172 }
16173 }
16174 }
16175 }
16176
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4)16177 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
16178 TEST_REQUIRES_X86_SSE41;
16179 for (uint32_t n = 8; n <= 12; n += 4) {
16180 for (size_t k = 1; k <= 40; k += 9) {
16181 GemmMicrokernelTester()
16182 .mr(2)
16183 .nr(4)
16184 .kr(2)
16185 .sr(1)
16186 .m(2)
16187 .n(n)
16188 .k(k)
16189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16190 }
16191 }
16192 }
16193
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_strided_cn)16194 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
16195 TEST_REQUIRES_X86_SSE41;
16196 for (uint32_t n = 8; n <= 12; n += 4) {
16197 for (size_t k = 1; k <= 40; k += 9) {
16198 GemmMicrokernelTester()
16199 .mr(2)
16200 .nr(4)
16201 .kr(2)
16202 .sr(1)
16203 .m(2)
16204 .n(n)
16205 .k(k)
16206 .cn_stride(7)
16207 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16208 }
16209 }
16210 }
16211
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_subtile)16212 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
16213 TEST_REQUIRES_X86_SSE41;
16214 for (uint32_t n = 8; n <= 12; n += 4) {
16215 for (size_t k = 1; k <= 40; k += 9) {
16216 for (uint32_t m = 1; m <= 2; m++) {
16217 GemmMicrokernelTester()
16218 .mr(2)
16219 .nr(4)
16220 .kr(2)
16221 .sr(1)
16222 .m(m)
16223 .n(n)
16224 .k(k)
16225 .iterations(1)
16226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16227 }
16228 }
16229 }
16230 }
16231
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel)16232 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel) {
16233 TEST_REQUIRES_X86_SSE41;
16234 for (size_t k = 1; k <= 40; k += 9) {
16235 GemmMicrokernelTester()
16236 .mr(2)
16237 .nr(4)
16238 .kr(2)
16239 .sr(1)
16240 .m(2)
16241 .n(4)
16242 .k(k)
16243 .ks(3)
16244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16245 }
16246 }
16247
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel_subtile)16248 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel_subtile) {
16249 TEST_REQUIRES_X86_SSE41;
16250 for (size_t k = 1; k <= 40; k += 9) {
16251 for (uint32_t n = 1; n <= 4; n++) {
16252 for (uint32_t m = 1; m <= 2; m++) {
16253 GemmMicrokernelTester()
16254 .mr(2)
16255 .nr(4)
16256 .kr(2)
16257 .sr(1)
16258 .m(m)
16259 .n(n)
16260 .k(k)
16261 .ks(3)
16262 .iterations(1)
16263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16264 }
16265 }
16266 }
16267 }
16268
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_small_kernel)16269 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
16270 TEST_REQUIRES_X86_SSE41;
16271 for (uint32_t n = 5; n < 8; n++) {
16272 for (size_t k = 1; k <= 40; k += 9) {
16273 GemmMicrokernelTester()
16274 .mr(2)
16275 .nr(4)
16276 .kr(2)
16277 .sr(1)
16278 .m(2)
16279 .n(n)
16280 .k(k)
16281 .ks(3)
16282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16283 }
16284 }
16285 }
16286
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_small_kernel)16287 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
16288 TEST_REQUIRES_X86_SSE41;
16289 for (uint32_t n = 8; n <= 12; n += 4) {
16290 for (size_t k = 1; k <= 40; k += 9) {
16291 GemmMicrokernelTester()
16292 .mr(2)
16293 .nr(4)
16294 .kr(2)
16295 .sr(1)
16296 .m(2)
16297 .n(n)
16298 .k(k)
16299 .ks(3)
16300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16301 }
16302 }
16303 }
16304
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm_subtile)16305 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
16306 TEST_REQUIRES_X86_SSE41;
16307 for (size_t k = 1; k <= 40; k += 9) {
16308 for (uint32_t n = 1; n <= 4; n++) {
16309 for (uint32_t m = 1; m <= 2; m++) {
16310 GemmMicrokernelTester()
16311 .mr(2)
16312 .nr(4)
16313 .kr(2)
16314 .sr(1)
16315 .m(m)
16316 .n(n)
16317 .k(k)
16318 .cm_stride(7)
16319 .iterations(1)
16320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16321 }
16322 }
16323 }
16324 }
16325
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,a_offset)16326 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, a_offset) {
16327 TEST_REQUIRES_X86_SSE41;
16328 for (size_t k = 1; k <= 40; k += 9) {
16329 GemmMicrokernelTester()
16330 .mr(2)
16331 .nr(4)
16332 .kr(2)
16333 .sr(1)
16334 .m(2)
16335 .n(4)
16336 .k(k)
16337 .ks(3)
16338 .a_offset(83)
16339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16340 }
16341 }
16342
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,zero)16343 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, zero) {
16344 TEST_REQUIRES_X86_SSE41;
16345 for (size_t k = 1; k <= 40; k += 9) {
16346 for (uint32_t mz = 0; mz < 2; mz++) {
16347 GemmMicrokernelTester()
16348 .mr(2)
16349 .nr(4)
16350 .kr(2)
16351 .sr(1)
16352 .m(2)
16353 .n(4)
16354 .k(k)
16355 .ks(3)
16356 .a_offset(83)
16357 .zero_index(mz)
16358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16359 }
16360 }
16361 }
16362
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmin)16363 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
16364 TEST_REQUIRES_X86_SSE41;
16365 GemmMicrokernelTester()
16366 .mr(2)
16367 .nr(4)
16368 .kr(2)
16369 .sr(1)
16370 .m(2)
16371 .n(4)
16372 .k(8)
16373 .qmin(128)
16374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16375 }
16376
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmax)16377 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
16378 TEST_REQUIRES_X86_SSE41;
16379 GemmMicrokernelTester()
16380 .mr(2)
16381 .nr(4)
16382 .kr(2)
16383 .sr(1)
16384 .m(2)
16385 .n(4)
16386 .k(8)
16387 .qmax(128)
16388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16389 }
16390
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm)16391 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
16392 TEST_REQUIRES_X86_SSE41;
16393 GemmMicrokernelTester()
16394 .mr(2)
16395 .nr(4)
16396 .kr(2)
16397 .sr(1)
16398 .m(2)
16399 .n(4)
16400 .k(8)
16401 .cm_stride(7)
16402 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16403 }
16404 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16405
16406
16407 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8)16408 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8) {
16409 TEST_REQUIRES_X86_SSE2;
16410 GemmMicrokernelTester()
16411 .mr(4)
16412 .nr(4)
16413 .kr(2)
16414 .sr(1)
16415 .m(4)
16416 .n(4)
16417 .k(8)
16418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16419 }
16420
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cn)16421 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cn) {
16422 TEST_REQUIRES_X86_SSE2;
16423 GemmMicrokernelTester()
16424 .mr(4)
16425 .nr(4)
16426 .kr(2)
16427 .sr(1)
16428 .m(4)
16429 .n(4)
16430 .k(8)
16431 .cn_stride(7)
16432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16433 }
16434
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile)16435 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile) {
16436 TEST_REQUIRES_X86_SSE2;
16437 for (uint32_t n = 1; n <= 4; n++) {
16438 for (uint32_t m = 1; m <= 4; m++) {
16439 GemmMicrokernelTester()
16440 .mr(4)
16441 .nr(4)
16442 .kr(2)
16443 .sr(1)
16444 .m(m)
16445 .n(n)
16446 .k(8)
16447 .iterations(1)
16448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16449 }
16450 }
16451 }
16452
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_m)16453 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
16454 TEST_REQUIRES_X86_SSE2;
16455 for (uint32_t m = 1; m <= 4; m++) {
16456 GemmMicrokernelTester()
16457 .mr(4)
16458 .nr(4)
16459 .kr(2)
16460 .sr(1)
16461 .m(m)
16462 .n(4)
16463 .k(8)
16464 .iterations(1)
16465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16466 }
16467 }
16468
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_n)16469 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
16470 TEST_REQUIRES_X86_SSE2;
16471 for (uint32_t n = 1; n <= 4; n++) {
16472 GemmMicrokernelTester()
16473 .mr(4)
16474 .nr(4)
16475 .kr(2)
16476 .sr(1)
16477 .m(4)
16478 .n(n)
16479 .k(8)
16480 .iterations(1)
16481 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16482 }
16483 }
16484
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8)16485 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8) {
16486 TEST_REQUIRES_X86_SSE2;
16487 for (size_t k = 1; k < 8; k++) {
16488 GemmMicrokernelTester()
16489 .mr(4)
16490 .nr(4)
16491 .kr(2)
16492 .sr(1)
16493 .m(4)
16494 .n(4)
16495 .k(k)
16496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16497 }
16498 }
16499
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8_subtile)16500 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_subtile) {
16501 TEST_REQUIRES_X86_SSE2;
16502 for (size_t k = 1; k < 8; k++) {
16503 for (uint32_t n = 1; n <= 4; n++) {
16504 for (uint32_t m = 1; m <= 4; m++) {
16505 GemmMicrokernelTester()
16506 .mr(4)
16507 .nr(4)
16508 .kr(2)
16509 .sr(1)
16510 .m(m)
16511 .n(n)
16512 .k(k)
16513 .iterations(1)
16514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16515 }
16516 }
16517 }
16518 }
16519
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8)16520 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8) {
16521 TEST_REQUIRES_X86_SSE2;
16522 for (size_t k = 9; k < 16; k++) {
16523 GemmMicrokernelTester()
16524 .mr(4)
16525 .nr(4)
16526 .kr(2)
16527 .sr(1)
16528 .m(4)
16529 .n(4)
16530 .k(k)
16531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8_subtile)16535 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_subtile) {
16536 TEST_REQUIRES_X86_SSE2;
16537 for (size_t k = 9; k < 16; k++) {
16538 for (uint32_t n = 1; n <= 4; n++) {
16539 for (uint32_t m = 1; m <= 4; m++) {
16540 GemmMicrokernelTester()
16541 .mr(4)
16542 .nr(4)
16543 .kr(2)
16544 .sr(1)
16545 .m(m)
16546 .n(n)
16547 .k(k)
16548 .iterations(1)
16549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16550 }
16551 }
16552 }
16553 }
16554
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8)16555 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8) {
16556 TEST_REQUIRES_X86_SSE2;
16557 for (size_t k = 16; k <= 80; k += 8) {
16558 GemmMicrokernelTester()
16559 .mr(4)
16560 .nr(4)
16561 .kr(2)
16562 .sr(1)
16563 .m(4)
16564 .n(4)
16565 .k(k)
16566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16567 }
16568 }
16569
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8_subtile)16570 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_subtile) {
16571 TEST_REQUIRES_X86_SSE2;
16572 for (size_t k = 16; k <= 80; k += 8) {
16573 for (uint32_t n = 1; n <= 4; n++) {
16574 for (uint32_t m = 1; m <= 4; m++) {
16575 GemmMicrokernelTester()
16576 .mr(4)
16577 .nr(4)
16578 .kr(2)
16579 .sr(1)
16580 .m(m)
16581 .n(n)
16582 .k(k)
16583 .iterations(1)
16584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16585 }
16586 }
16587 }
16588 }
16589
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4)16590 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4) {
16591 TEST_REQUIRES_X86_SSE2;
16592 for (uint32_t n = 5; n < 8; n++) {
16593 for (size_t k = 1; k <= 40; k += 9) {
16594 GemmMicrokernelTester()
16595 .mr(4)
16596 .nr(4)
16597 .kr(2)
16598 .sr(1)
16599 .m(4)
16600 .n(n)
16601 .k(k)
16602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16603 }
16604 }
16605 }
16606
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_strided_cn)16607 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
16608 TEST_REQUIRES_X86_SSE2;
16609 for (uint32_t n = 5; n < 8; n++) {
16610 for (size_t k = 1; k <= 40; k += 9) {
16611 GemmMicrokernelTester()
16612 .mr(4)
16613 .nr(4)
16614 .kr(2)
16615 .sr(1)
16616 .m(4)
16617 .n(n)
16618 .k(k)
16619 .cn_stride(7)
16620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16621 }
16622 }
16623 }
16624
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_subtile)16625 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_subtile) {
16626 TEST_REQUIRES_X86_SSE2;
16627 for (uint32_t n = 5; n < 8; n++) {
16628 for (size_t k = 1; k <= 40; k += 9) {
16629 for (uint32_t m = 1; m <= 4; m++) {
16630 GemmMicrokernelTester()
16631 .mr(4)
16632 .nr(4)
16633 .kr(2)
16634 .sr(1)
16635 .m(m)
16636 .n(n)
16637 .k(k)
16638 .iterations(1)
16639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16640 }
16641 }
16642 }
16643 }
16644
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4)16645 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4) {
16646 TEST_REQUIRES_X86_SSE2;
16647 for (uint32_t n = 8; n <= 12; n += 4) {
16648 for (size_t k = 1; k <= 40; k += 9) {
16649 GemmMicrokernelTester()
16650 .mr(4)
16651 .nr(4)
16652 .kr(2)
16653 .sr(1)
16654 .m(4)
16655 .n(n)
16656 .k(k)
16657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16658 }
16659 }
16660 }
16661
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_strided_cn)16662 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
16663 TEST_REQUIRES_X86_SSE2;
16664 for (uint32_t n = 8; n <= 12; n += 4) {
16665 for (size_t k = 1; k <= 40; k += 9) {
16666 GemmMicrokernelTester()
16667 .mr(4)
16668 .nr(4)
16669 .kr(2)
16670 .sr(1)
16671 .m(4)
16672 .n(n)
16673 .k(k)
16674 .cn_stride(7)
16675 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16676 }
16677 }
16678 }
16679
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_subtile)16680 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_subtile) {
16681 TEST_REQUIRES_X86_SSE2;
16682 for (uint32_t n = 8; n <= 12; n += 4) {
16683 for (size_t k = 1; k <= 40; k += 9) {
16684 for (uint32_t m = 1; m <= 4; m++) {
16685 GemmMicrokernelTester()
16686 .mr(4)
16687 .nr(4)
16688 .kr(2)
16689 .sr(1)
16690 .m(m)
16691 .n(n)
16692 .k(k)
16693 .iterations(1)
16694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16695 }
16696 }
16697 }
16698 }
16699
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel)16700 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel) {
16701 TEST_REQUIRES_X86_SSE2;
16702 for (size_t k = 1; k <= 40; k += 9) {
16703 GemmMicrokernelTester()
16704 .mr(4)
16705 .nr(4)
16706 .kr(2)
16707 .sr(1)
16708 .m(4)
16709 .n(4)
16710 .k(k)
16711 .ks(3)
16712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16713 }
16714 }
16715
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel_subtile)16716 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel_subtile) {
16717 TEST_REQUIRES_X86_SSE2;
16718 for (size_t k = 1; k <= 40; k += 9) {
16719 for (uint32_t n = 1; n <= 4; n++) {
16720 for (uint32_t m = 1; m <= 4; m++) {
16721 GemmMicrokernelTester()
16722 .mr(4)
16723 .nr(4)
16724 .kr(2)
16725 .sr(1)
16726 .m(m)
16727 .n(n)
16728 .k(k)
16729 .ks(3)
16730 .iterations(1)
16731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16732 }
16733 }
16734 }
16735 }
16736
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_small_kernel)16737 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_small_kernel) {
16738 TEST_REQUIRES_X86_SSE2;
16739 for (uint32_t n = 5; n < 8; n++) {
16740 for (size_t k = 1; k <= 40; k += 9) {
16741 GemmMicrokernelTester()
16742 .mr(4)
16743 .nr(4)
16744 .kr(2)
16745 .sr(1)
16746 .m(4)
16747 .n(n)
16748 .k(k)
16749 .ks(3)
16750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16751 }
16752 }
16753 }
16754
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_small_kernel)16755 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_small_kernel) {
16756 TEST_REQUIRES_X86_SSE2;
16757 for (uint32_t n = 8; n <= 12; n += 4) {
16758 for (size_t k = 1; k <= 40; k += 9) {
16759 GemmMicrokernelTester()
16760 .mr(4)
16761 .nr(4)
16762 .kr(2)
16763 .sr(1)
16764 .m(4)
16765 .n(n)
16766 .k(k)
16767 .ks(3)
16768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16769 }
16770 }
16771 }
16772
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm_subtile)16773 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm_subtile) {
16774 TEST_REQUIRES_X86_SSE2;
16775 for (size_t k = 1; k <= 40; k += 9) {
16776 for (uint32_t n = 1; n <= 4; n++) {
16777 for (uint32_t m = 1; m <= 4; m++) {
16778 GemmMicrokernelTester()
16779 .mr(4)
16780 .nr(4)
16781 .kr(2)
16782 .sr(1)
16783 .m(m)
16784 .n(n)
16785 .k(k)
16786 .cm_stride(7)
16787 .iterations(1)
16788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16789 }
16790 }
16791 }
16792 }
16793
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,a_offset)16794 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, a_offset) {
16795 TEST_REQUIRES_X86_SSE2;
16796 for (size_t k = 1; k <= 40; k += 9) {
16797 GemmMicrokernelTester()
16798 .mr(4)
16799 .nr(4)
16800 .kr(2)
16801 .sr(1)
16802 .m(4)
16803 .n(4)
16804 .k(k)
16805 .ks(3)
16806 .a_offset(163)
16807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16808 }
16809 }
16810
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,zero)16811 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, zero) {
16812 TEST_REQUIRES_X86_SSE2;
16813 for (size_t k = 1; k <= 40; k += 9) {
16814 for (uint32_t mz = 0; mz < 4; mz++) {
16815 GemmMicrokernelTester()
16816 .mr(4)
16817 .nr(4)
16818 .kr(2)
16819 .sr(1)
16820 .m(4)
16821 .n(4)
16822 .k(k)
16823 .ks(3)
16824 .a_offset(163)
16825 .zero_index(mz)
16826 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16827 }
16828 }
16829 }
16830
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmin)16831 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmin) {
16832 TEST_REQUIRES_X86_SSE2;
16833 GemmMicrokernelTester()
16834 .mr(4)
16835 .nr(4)
16836 .kr(2)
16837 .sr(1)
16838 .m(4)
16839 .n(4)
16840 .k(8)
16841 .qmin(128)
16842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16843 }
16844
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmax)16845 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmax) {
16846 TEST_REQUIRES_X86_SSE2;
16847 GemmMicrokernelTester()
16848 .mr(4)
16849 .nr(4)
16850 .kr(2)
16851 .sr(1)
16852 .m(4)
16853 .n(4)
16854 .k(8)
16855 .qmax(128)
16856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16857 }
16858
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm)16859 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm) {
16860 TEST_REQUIRES_X86_SSE2;
16861 GemmMicrokernelTester()
16862 .mr(4)
16863 .nr(4)
16864 .kr(2)
16865 .sr(1)
16866 .m(4)
16867 .n(4)
16868 .k(8)
16869 .cm_stride(7)
16870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16871 }
16872 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16873
16874
16875 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8)16876 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
16877 TEST_REQUIRES_X86_SSE41;
16878 GemmMicrokernelTester()
16879 .mr(4)
16880 .nr(4)
16881 .kr(2)
16882 .sr(1)
16883 .m(4)
16884 .n(4)
16885 .k(8)
16886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16887 }
16888
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cn)16889 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
16890 TEST_REQUIRES_X86_SSE41;
16891 GemmMicrokernelTester()
16892 .mr(4)
16893 .nr(4)
16894 .kr(2)
16895 .sr(1)
16896 .m(4)
16897 .n(4)
16898 .k(8)
16899 .cn_stride(7)
16900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16901 }
16902
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile)16903 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
16904 TEST_REQUIRES_X86_SSE41;
16905 for (uint32_t n = 1; n <= 4; n++) {
16906 for (uint32_t m = 1; m <= 4; m++) {
16907 GemmMicrokernelTester()
16908 .mr(4)
16909 .nr(4)
16910 .kr(2)
16911 .sr(1)
16912 .m(m)
16913 .n(n)
16914 .k(8)
16915 .iterations(1)
16916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16917 }
16918 }
16919 }
16920
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_m)16921 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
16922 TEST_REQUIRES_X86_SSE41;
16923 for (uint32_t m = 1; m <= 4; m++) {
16924 GemmMicrokernelTester()
16925 .mr(4)
16926 .nr(4)
16927 .kr(2)
16928 .sr(1)
16929 .m(m)
16930 .n(4)
16931 .k(8)
16932 .iterations(1)
16933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16934 }
16935 }
16936
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_n)16937 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
16938 TEST_REQUIRES_X86_SSE41;
16939 for (uint32_t n = 1; n <= 4; n++) {
16940 GemmMicrokernelTester()
16941 .mr(4)
16942 .nr(4)
16943 .kr(2)
16944 .sr(1)
16945 .m(4)
16946 .n(n)
16947 .k(8)
16948 .iterations(1)
16949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16950 }
16951 }
16952
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8)16953 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
16954 TEST_REQUIRES_X86_SSE41;
16955 for (size_t k = 1; k < 8; k++) {
16956 GemmMicrokernelTester()
16957 .mr(4)
16958 .nr(4)
16959 .kr(2)
16960 .sr(1)
16961 .m(4)
16962 .n(4)
16963 .k(k)
16964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16965 }
16966 }
16967
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8_subtile)16968 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
16969 TEST_REQUIRES_X86_SSE41;
16970 for (size_t k = 1; k < 8; k++) {
16971 for (uint32_t n = 1; n <= 4; n++) {
16972 for (uint32_t m = 1; m <= 4; m++) {
16973 GemmMicrokernelTester()
16974 .mr(4)
16975 .nr(4)
16976 .kr(2)
16977 .sr(1)
16978 .m(m)
16979 .n(n)
16980 .k(k)
16981 .iterations(1)
16982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16983 }
16984 }
16985 }
16986 }
16987
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8)16988 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
16989 TEST_REQUIRES_X86_SSE41;
16990 for (size_t k = 9; k < 16; k++) {
16991 GemmMicrokernelTester()
16992 .mr(4)
16993 .nr(4)
16994 .kr(2)
16995 .sr(1)
16996 .m(4)
16997 .n(4)
16998 .k(k)
16999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17000 }
17001 }
17002
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8_subtile)17003 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
17004 TEST_REQUIRES_X86_SSE41;
17005 for (size_t k = 9; k < 16; k++) {
17006 for (uint32_t n = 1; n <= 4; n++) {
17007 for (uint32_t m = 1; m <= 4; m++) {
17008 GemmMicrokernelTester()
17009 .mr(4)
17010 .nr(4)
17011 .kr(2)
17012 .sr(1)
17013 .m(m)
17014 .n(n)
17015 .k(k)
17016 .iterations(1)
17017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17018 }
17019 }
17020 }
17021 }
17022
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8)17023 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
17024 TEST_REQUIRES_X86_SSE41;
17025 for (size_t k = 16; k <= 80; k += 8) {
17026 GemmMicrokernelTester()
17027 .mr(4)
17028 .nr(4)
17029 .kr(2)
17030 .sr(1)
17031 .m(4)
17032 .n(4)
17033 .k(k)
17034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17035 }
17036 }
17037
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8_subtile)17038 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
17039 TEST_REQUIRES_X86_SSE41;
17040 for (size_t k = 16; k <= 80; k += 8) {
17041 for (uint32_t n = 1; n <= 4; n++) {
17042 for (uint32_t m = 1; m <= 4; m++) {
17043 GemmMicrokernelTester()
17044 .mr(4)
17045 .nr(4)
17046 .kr(2)
17047 .sr(1)
17048 .m(m)
17049 .n(n)
17050 .k(k)
17051 .iterations(1)
17052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17053 }
17054 }
17055 }
17056 }
17057
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4)17058 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
17059 TEST_REQUIRES_X86_SSE41;
17060 for (uint32_t n = 5; n < 8; n++) {
17061 for (size_t k = 1; k <= 40; k += 9) {
17062 GemmMicrokernelTester()
17063 .mr(4)
17064 .nr(4)
17065 .kr(2)
17066 .sr(1)
17067 .m(4)
17068 .n(n)
17069 .k(k)
17070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17071 }
17072 }
17073 }
17074
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_strided_cn)17075 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
17076 TEST_REQUIRES_X86_SSE41;
17077 for (uint32_t n = 5; n < 8; n++) {
17078 for (size_t k = 1; k <= 40; k += 9) {
17079 GemmMicrokernelTester()
17080 .mr(4)
17081 .nr(4)
17082 .kr(2)
17083 .sr(1)
17084 .m(4)
17085 .n(n)
17086 .k(k)
17087 .cn_stride(7)
17088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17089 }
17090 }
17091 }
17092
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_subtile)17093 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
17094 TEST_REQUIRES_X86_SSE41;
17095 for (uint32_t n = 5; n < 8; n++) {
17096 for (size_t k = 1; k <= 40; k += 9) {
17097 for (uint32_t m = 1; m <= 4; m++) {
17098 GemmMicrokernelTester()
17099 .mr(4)
17100 .nr(4)
17101 .kr(2)
17102 .sr(1)
17103 .m(m)
17104 .n(n)
17105 .k(k)
17106 .iterations(1)
17107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17108 }
17109 }
17110 }
17111 }
17112
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4)17113 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
17114 TEST_REQUIRES_X86_SSE41;
17115 for (uint32_t n = 8; n <= 12; n += 4) {
17116 for (size_t k = 1; k <= 40; k += 9) {
17117 GemmMicrokernelTester()
17118 .mr(4)
17119 .nr(4)
17120 .kr(2)
17121 .sr(1)
17122 .m(4)
17123 .n(n)
17124 .k(k)
17125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17126 }
17127 }
17128 }
17129
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_strided_cn)17130 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
17131 TEST_REQUIRES_X86_SSE41;
17132 for (uint32_t n = 8; n <= 12; n += 4) {
17133 for (size_t k = 1; k <= 40; k += 9) {
17134 GemmMicrokernelTester()
17135 .mr(4)
17136 .nr(4)
17137 .kr(2)
17138 .sr(1)
17139 .m(4)
17140 .n(n)
17141 .k(k)
17142 .cn_stride(7)
17143 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17144 }
17145 }
17146 }
17147
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_subtile)17148 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
17149 TEST_REQUIRES_X86_SSE41;
17150 for (uint32_t n = 8; n <= 12; n += 4) {
17151 for (size_t k = 1; k <= 40; k += 9) {
17152 for (uint32_t m = 1; m <= 4; m++) {
17153 GemmMicrokernelTester()
17154 .mr(4)
17155 .nr(4)
17156 .kr(2)
17157 .sr(1)
17158 .m(m)
17159 .n(n)
17160 .k(k)
17161 .iterations(1)
17162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17163 }
17164 }
17165 }
17166 }
17167
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel)17168 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel) {
17169 TEST_REQUIRES_X86_SSE41;
17170 for (size_t k = 1; k <= 40; k += 9) {
17171 GemmMicrokernelTester()
17172 .mr(4)
17173 .nr(4)
17174 .kr(2)
17175 .sr(1)
17176 .m(4)
17177 .n(4)
17178 .k(k)
17179 .ks(3)
17180 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17181 }
17182 }
17183
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel_subtile)17184 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel_subtile) {
17185 TEST_REQUIRES_X86_SSE41;
17186 for (size_t k = 1; k <= 40; k += 9) {
17187 for (uint32_t n = 1; n <= 4; n++) {
17188 for (uint32_t m = 1; m <= 4; m++) {
17189 GemmMicrokernelTester()
17190 .mr(4)
17191 .nr(4)
17192 .kr(2)
17193 .sr(1)
17194 .m(m)
17195 .n(n)
17196 .k(k)
17197 .ks(3)
17198 .iterations(1)
17199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17200 }
17201 }
17202 }
17203 }
17204
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_small_kernel)17205 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
17206 TEST_REQUIRES_X86_SSE41;
17207 for (uint32_t n = 5; n < 8; n++) {
17208 for (size_t k = 1; k <= 40; k += 9) {
17209 GemmMicrokernelTester()
17210 .mr(4)
17211 .nr(4)
17212 .kr(2)
17213 .sr(1)
17214 .m(4)
17215 .n(n)
17216 .k(k)
17217 .ks(3)
17218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17219 }
17220 }
17221 }
17222
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_small_kernel)17223 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
17224 TEST_REQUIRES_X86_SSE41;
17225 for (uint32_t n = 8; n <= 12; n += 4) {
17226 for (size_t k = 1; k <= 40; k += 9) {
17227 GemmMicrokernelTester()
17228 .mr(4)
17229 .nr(4)
17230 .kr(2)
17231 .sr(1)
17232 .m(4)
17233 .n(n)
17234 .k(k)
17235 .ks(3)
17236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17237 }
17238 }
17239 }
17240
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm_subtile)17241 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
17242 TEST_REQUIRES_X86_SSE41;
17243 for (size_t k = 1; k <= 40; k += 9) {
17244 for (uint32_t n = 1; n <= 4; n++) {
17245 for (uint32_t m = 1; m <= 4; m++) {
17246 GemmMicrokernelTester()
17247 .mr(4)
17248 .nr(4)
17249 .kr(2)
17250 .sr(1)
17251 .m(m)
17252 .n(n)
17253 .k(k)
17254 .cm_stride(7)
17255 .iterations(1)
17256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17257 }
17258 }
17259 }
17260 }
17261
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,a_offset)17262 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, a_offset) {
17263 TEST_REQUIRES_X86_SSE41;
17264 for (size_t k = 1; k <= 40; k += 9) {
17265 GemmMicrokernelTester()
17266 .mr(4)
17267 .nr(4)
17268 .kr(2)
17269 .sr(1)
17270 .m(4)
17271 .n(4)
17272 .k(k)
17273 .ks(3)
17274 .a_offset(163)
17275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17276 }
17277 }
17278
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,zero)17279 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, zero) {
17280 TEST_REQUIRES_X86_SSE41;
17281 for (size_t k = 1; k <= 40; k += 9) {
17282 for (uint32_t mz = 0; mz < 4; mz++) {
17283 GemmMicrokernelTester()
17284 .mr(4)
17285 .nr(4)
17286 .kr(2)
17287 .sr(1)
17288 .m(4)
17289 .n(4)
17290 .k(k)
17291 .ks(3)
17292 .a_offset(163)
17293 .zero_index(mz)
17294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17295 }
17296 }
17297 }
17298
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmin)17299 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
17300 TEST_REQUIRES_X86_SSE41;
17301 GemmMicrokernelTester()
17302 .mr(4)
17303 .nr(4)
17304 .kr(2)
17305 .sr(1)
17306 .m(4)
17307 .n(4)
17308 .k(8)
17309 .qmin(128)
17310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17311 }
17312
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmax)17313 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
17314 TEST_REQUIRES_X86_SSE41;
17315 GemmMicrokernelTester()
17316 .mr(4)
17317 .nr(4)
17318 .kr(2)
17319 .sr(1)
17320 .m(4)
17321 .n(4)
17322 .k(8)
17323 .qmax(128)
17324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17325 }
17326
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm)17327 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
17328 TEST_REQUIRES_X86_SSE41;
17329 GemmMicrokernelTester()
17330 .mr(4)
17331 .nr(4)
17332 .kr(2)
17333 .sr(1)
17334 .m(4)
17335 .n(4)
17336 .k(8)
17337 .cm_stride(7)
17338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17339 }
17340 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17341
17342
17343 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8)17344 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8) {
17345 TEST_REQUIRES_X86_AVX;
17346 GemmMicrokernelTester()
17347 .mr(1)
17348 .nr(4)
17349 .kr(2)
17350 .sr(1)
17351 .m(1)
17352 .n(4)
17353 .k(8)
17354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17355 }
17356
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cn)17357 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cn) {
17358 TEST_REQUIRES_X86_AVX;
17359 GemmMicrokernelTester()
17360 .mr(1)
17361 .nr(4)
17362 .kr(2)
17363 .sr(1)
17364 .m(1)
17365 .n(4)
17366 .k(8)
17367 .cn_stride(7)
17368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17369 }
17370
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile)17371 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile) {
17372 TEST_REQUIRES_X86_AVX;
17373 for (uint32_t n = 1; n <= 4; n++) {
17374 for (uint32_t m = 1; m <= 1; m++) {
17375 GemmMicrokernelTester()
17376 .mr(1)
17377 .nr(4)
17378 .kr(2)
17379 .sr(1)
17380 .m(m)
17381 .n(n)
17382 .k(8)
17383 .iterations(1)
17384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17385 }
17386 }
17387 }
17388
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_m)17389 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
17390 TEST_REQUIRES_X86_AVX;
17391 for (uint32_t m = 1; m <= 1; m++) {
17392 GemmMicrokernelTester()
17393 .mr(1)
17394 .nr(4)
17395 .kr(2)
17396 .sr(1)
17397 .m(m)
17398 .n(4)
17399 .k(8)
17400 .iterations(1)
17401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17402 }
17403 }
17404
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_n)17405 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
17406 TEST_REQUIRES_X86_AVX;
17407 for (uint32_t n = 1; n <= 4; n++) {
17408 GemmMicrokernelTester()
17409 .mr(1)
17410 .nr(4)
17411 .kr(2)
17412 .sr(1)
17413 .m(1)
17414 .n(n)
17415 .k(8)
17416 .iterations(1)
17417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17418 }
17419 }
17420
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8)17421 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8) {
17422 TEST_REQUIRES_X86_AVX;
17423 for (size_t k = 1; k < 8; k++) {
17424 GemmMicrokernelTester()
17425 .mr(1)
17426 .nr(4)
17427 .kr(2)
17428 .sr(1)
17429 .m(1)
17430 .n(4)
17431 .k(k)
17432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17433 }
17434 }
17435
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8_subtile)17436 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_subtile) {
17437 TEST_REQUIRES_X86_AVX;
17438 for (size_t k = 1; k < 8; k++) {
17439 for (uint32_t n = 1; n <= 4; n++) {
17440 for (uint32_t m = 1; m <= 1; m++) {
17441 GemmMicrokernelTester()
17442 .mr(1)
17443 .nr(4)
17444 .kr(2)
17445 .sr(1)
17446 .m(m)
17447 .n(n)
17448 .k(k)
17449 .iterations(1)
17450 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17451 }
17452 }
17453 }
17454 }
17455
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8)17456 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8) {
17457 TEST_REQUIRES_X86_AVX;
17458 for (size_t k = 9; k < 16; k++) {
17459 GemmMicrokernelTester()
17460 .mr(1)
17461 .nr(4)
17462 .kr(2)
17463 .sr(1)
17464 .m(1)
17465 .n(4)
17466 .k(k)
17467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17468 }
17469 }
17470
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8_subtile)17471 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_subtile) {
17472 TEST_REQUIRES_X86_AVX;
17473 for (size_t k = 9; k < 16; k++) {
17474 for (uint32_t n = 1; n <= 4; n++) {
17475 for (uint32_t m = 1; m <= 1; m++) {
17476 GemmMicrokernelTester()
17477 .mr(1)
17478 .nr(4)
17479 .kr(2)
17480 .sr(1)
17481 .m(m)
17482 .n(n)
17483 .k(k)
17484 .iterations(1)
17485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17486 }
17487 }
17488 }
17489 }
17490
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8)17491 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8) {
17492 TEST_REQUIRES_X86_AVX;
17493 for (size_t k = 16; k <= 80; k += 8) {
17494 GemmMicrokernelTester()
17495 .mr(1)
17496 .nr(4)
17497 .kr(2)
17498 .sr(1)
17499 .m(1)
17500 .n(4)
17501 .k(k)
17502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17503 }
17504 }
17505
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8_subtile)17506 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_subtile) {
17507 TEST_REQUIRES_X86_AVX;
17508 for (size_t k = 16; k <= 80; k += 8) {
17509 for (uint32_t n = 1; n <= 4; n++) {
17510 for (uint32_t m = 1; m <= 1; m++) {
17511 GemmMicrokernelTester()
17512 .mr(1)
17513 .nr(4)
17514 .kr(2)
17515 .sr(1)
17516 .m(m)
17517 .n(n)
17518 .k(k)
17519 .iterations(1)
17520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17521 }
17522 }
17523 }
17524 }
17525
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4)17526 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4) {
17527 TEST_REQUIRES_X86_AVX;
17528 for (uint32_t n = 5; n < 8; n++) {
17529 for (size_t k = 1; k <= 40; k += 9) {
17530 GemmMicrokernelTester()
17531 .mr(1)
17532 .nr(4)
17533 .kr(2)
17534 .sr(1)
17535 .m(1)
17536 .n(n)
17537 .k(k)
17538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17539 }
17540 }
17541 }
17542
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_strided_cn)17543 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
17544 TEST_REQUIRES_X86_AVX;
17545 for (uint32_t n = 5; n < 8; n++) {
17546 for (size_t k = 1; k <= 40; k += 9) {
17547 GemmMicrokernelTester()
17548 .mr(1)
17549 .nr(4)
17550 .kr(2)
17551 .sr(1)
17552 .m(1)
17553 .n(n)
17554 .k(k)
17555 .cn_stride(7)
17556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17557 }
17558 }
17559 }
17560
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_subtile)17561 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_subtile) {
17562 TEST_REQUIRES_X86_AVX;
17563 for (uint32_t n = 5; n < 8; n++) {
17564 for (size_t k = 1; k <= 40; k += 9) {
17565 for (uint32_t m = 1; m <= 1; m++) {
17566 GemmMicrokernelTester()
17567 .mr(1)
17568 .nr(4)
17569 .kr(2)
17570 .sr(1)
17571 .m(m)
17572 .n(n)
17573 .k(k)
17574 .iterations(1)
17575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17576 }
17577 }
17578 }
17579 }
17580
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4)17581 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4) {
17582 TEST_REQUIRES_X86_AVX;
17583 for (uint32_t n = 8; n <= 12; n += 4) {
17584 for (size_t k = 1; k <= 40; k += 9) {
17585 GemmMicrokernelTester()
17586 .mr(1)
17587 .nr(4)
17588 .kr(2)
17589 .sr(1)
17590 .m(1)
17591 .n(n)
17592 .k(k)
17593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17594 }
17595 }
17596 }
17597
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_strided_cn)17598 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_cn) {
17599 TEST_REQUIRES_X86_AVX;
17600 for (uint32_t n = 8; n <= 12; n += 4) {
17601 for (size_t k = 1; k <= 40; k += 9) {
17602 GemmMicrokernelTester()
17603 .mr(1)
17604 .nr(4)
17605 .kr(2)
17606 .sr(1)
17607 .m(1)
17608 .n(n)
17609 .k(k)
17610 .cn_stride(7)
17611 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17612 }
17613 }
17614 }
17615
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_subtile)17616 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_subtile) {
17617 TEST_REQUIRES_X86_AVX;
17618 for (uint32_t n = 8; n <= 12; n += 4) {
17619 for (size_t k = 1; k <= 40; k += 9) {
17620 for (uint32_t m = 1; m <= 1; m++) {
17621 GemmMicrokernelTester()
17622 .mr(1)
17623 .nr(4)
17624 .kr(2)
17625 .sr(1)
17626 .m(m)
17627 .n(n)
17628 .k(k)
17629 .iterations(1)
17630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17631 }
17632 }
17633 }
17634 }
17635
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel)17636 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel) {
17637 TEST_REQUIRES_X86_AVX;
17638 for (size_t k = 1; k <= 40; k += 9) {
17639 GemmMicrokernelTester()
17640 .mr(1)
17641 .nr(4)
17642 .kr(2)
17643 .sr(1)
17644 .m(1)
17645 .n(4)
17646 .k(k)
17647 .ks(3)
17648 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17649 }
17650 }
17651
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel_subtile)17652 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel_subtile) {
17653 TEST_REQUIRES_X86_AVX;
17654 for (size_t k = 1; k <= 40; k += 9) {
17655 for (uint32_t n = 1; n <= 4; n++) {
17656 for (uint32_t m = 1; m <= 1; m++) {
17657 GemmMicrokernelTester()
17658 .mr(1)
17659 .nr(4)
17660 .kr(2)
17661 .sr(1)
17662 .m(m)
17663 .n(n)
17664 .k(k)
17665 .ks(3)
17666 .iterations(1)
17667 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17668 }
17669 }
17670 }
17671 }
17672
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_small_kernel)17673 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_small_kernel) {
17674 TEST_REQUIRES_X86_AVX;
17675 for (uint32_t n = 5; n < 8; n++) {
17676 for (size_t k = 1; k <= 40; k += 9) {
17677 GemmMicrokernelTester()
17678 .mr(1)
17679 .nr(4)
17680 .kr(2)
17681 .sr(1)
17682 .m(1)
17683 .n(n)
17684 .k(k)
17685 .ks(3)
17686 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17687 }
17688 }
17689 }
17690
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_small_kernel)17691 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_small_kernel) {
17692 TEST_REQUIRES_X86_AVX;
17693 for (uint32_t n = 8; n <= 12; n += 4) {
17694 for (size_t k = 1; k <= 40; k += 9) {
17695 GemmMicrokernelTester()
17696 .mr(1)
17697 .nr(4)
17698 .kr(2)
17699 .sr(1)
17700 .m(1)
17701 .n(n)
17702 .k(k)
17703 .ks(3)
17704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17705 }
17706 }
17707 }
17708
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm_subtile)17709 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm_subtile) {
17710 TEST_REQUIRES_X86_AVX;
17711 for (size_t k = 1; k <= 40; k += 9) {
17712 for (uint32_t n = 1; n <= 4; n++) {
17713 for (uint32_t m = 1; m <= 1; m++) {
17714 GemmMicrokernelTester()
17715 .mr(1)
17716 .nr(4)
17717 .kr(2)
17718 .sr(1)
17719 .m(m)
17720 .n(n)
17721 .k(k)
17722 .cm_stride(7)
17723 .iterations(1)
17724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17725 }
17726 }
17727 }
17728 }
17729
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,a_offset)17730 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, a_offset) {
17731 TEST_REQUIRES_X86_AVX;
17732 for (size_t k = 1; k <= 40; k += 9) {
17733 GemmMicrokernelTester()
17734 .mr(1)
17735 .nr(4)
17736 .kr(2)
17737 .sr(1)
17738 .m(1)
17739 .n(4)
17740 .k(k)
17741 .ks(3)
17742 .a_offset(43)
17743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17744 }
17745 }
17746
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,zero)17747 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, zero) {
17748 TEST_REQUIRES_X86_AVX;
17749 for (size_t k = 1; k <= 40; k += 9) {
17750 for (uint32_t mz = 0; mz < 1; mz++) {
17751 GemmMicrokernelTester()
17752 .mr(1)
17753 .nr(4)
17754 .kr(2)
17755 .sr(1)
17756 .m(1)
17757 .n(4)
17758 .k(k)
17759 .ks(3)
17760 .a_offset(43)
17761 .zero_index(mz)
17762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17763 }
17764 }
17765 }
17766
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmin)17767 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmin) {
17768 TEST_REQUIRES_X86_AVX;
17769 GemmMicrokernelTester()
17770 .mr(1)
17771 .nr(4)
17772 .kr(2)
17773 .sr(1)
17774 .m(1)
17775 .n(4)
17776 .k(8)
17777 .qmin(128)
17778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17779 }
17780
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmax)17781 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmax) {
17782 TEST_REQUIRES_X86_AVX;
17783 GemmMicrokernelTester()
17784 .mr(1)
17785 .nr(4)
17786 .kr(2)
17787 .sr(1)
17788 .m(1)
17789 .n(4)
17790 .k(8)
17791 .qmax(128)
17792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17793 }
17794
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm)17795 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm) {
17796 TEST_REQUIRES_X86_AVX;
17797 GemmMicrokernelTester()
17798 .mr(1)
17799 .nr(4)
17800 .kr(2)
17801 .sr(1)
17802 .m(1)
17803 .n(4)
17804 .k(8)
17805 .cm_stride(7)
17806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17807 }
17808 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17809
17810
17811 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8)17812 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8) {
17813 TEST_REQUIRES_X86_XOP;
17814 GemmMicrokernelTester()
17815 .mr(1)
17816 .nr(4)
17817 .kr(2)
17818 .sr(1)
17819 .m(1)
17820 .n(4)
17821 .k(8)
17822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17823 }
17824
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cn)17825 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cn) {
17826 TEST_REQUIRES_X86_XOP;
17827 GemmMicrokernelTester()
17828 .mr(1)
17829 .nr(4)
17830 .kr(2)
17831 .sr(1)
17832 .m(1)
17833 .n(4)
17834 .k(8)
17835 .cn_stride(7)
17836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17837 }
17838
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile)17839 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile) {
17840 TEST_REQUIRES_X86_XOP;
17841 for (uint32_t n = 1; n <= 4; n++) {
17842 for (uint32_t m = 1; m <= 1; m++) {
17843 GemmMicrokernelTester()
17844 .mr(1)
17845 .nr(4)
17846 .kr(2)
17847 .sr(1)
17848 .m(m)
17849 .n(n)
17850 .k(8)
17851 .iterations(1)
17852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17853 }
17854 }
17855 }
17856
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_m)17857 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
17858 TEST_REQUIRES_X86_XOP;
17859 for (uint32_t m = 1; m <= 1; m++) {
17860 GemmMicrokernelTester()
17861 .mr(1)
17862 .nr(4)
17863 .kr(2)
17864 .sr(1)
17865 .m(m)
17866 .n(4)
17867 .k(8)
17868 .iterations(1)
17869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17870 }
17871 }
17872
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_n)17873 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
17874 TEST_REQUIRES_X86_XOP;
17875 for (uint32_t n = 1; n <= 4; n++) {
17876 GemmMicrokernelTester()
17877 .mr(1)
17878 .nr(4)
17879 .kr(2)
17880 .sr(1)
17881 .m(1)
17882 .n(n)
17883 .k(8)
17884 .iterations(1)
17885 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17886 }
17887 }
17888
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8)17889 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8) {
17890 TEST_REQUIRES_X86_XOP;
17891 for (size_t k = 1; k < 8; k++) {
17892 GemmMicrokernelTester()
17893 .mr(1)
17894 .nr(4)
17895 .kr(2)
17896 .sr(1)
17897 .m(1)
17898 .n(4)
17899 .k(k)
17900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17901 }
17902 }
17903
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8_subtile)17904 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8_subtile) {
17905 TEST_REQUIRES_X86_XOP;
17906 for (size_t k = 1; k < 8; k++) {
17907 for (uint32_t n = 1; n <= 4; n++) {
17908 for (uint32_t m = 1; m <= 1; m++) {
17909 GemmMicrokernelTester()
17910 .mr(1)
17911 .nr(4)
17912 .kr(2)
17913 .sr(1)
17914 .m(m)
17915 .n(n)
17916 .k(k)
17917 .iterations(1)
17918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17919 }
17920 }
17921 }
17922 }
17923
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8)17924 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8) {
17925 TEST_REQUIRES_X86_XOP;
17926 for (size_t k = 9; k < 16; k++) {
17927 GemmMicrokernelTester()
17928 .mr(1)
17929 .nr(4)
17930 .kr(2)
17931 .sr(1)
17932 .m(1)
17933 .n(4)
17934 .k(k)
17935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17936 }
17937 }
17938
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8_subtile)17939 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8_subtile) {
17940 TEST_REQUIRES_X86_XOP;
17941 for (size_t k = 9; k < 16; k++) {
17942 for (uint32_t n = 1; n <= 4; n++) {
17943 for (uint32_t m = 1; m <= 1; m++) {
17944 GemmMicrokernelTester()
17945 .mr(1)
17946 .nr(4)
17947 .kr(2)
17948 .sr(1)
17949 .m(m)
17950 .n(n)
17951 .k(k)
17952 .iterations(1)
17953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17954 }
17955 }
17956 }
17957 }
17958
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8)17959 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8) {
17960 TEST_REQUIRES_X86_XOP;
17961 for (size_t k = 16; k <= 80; k += 8) {
17962 GemmMicrokernelTester()
17963 .mr(1)
17964 .nr(4)
17965 .kr(2)
17966 .sr(1)
17967 .m(1)
17968 .n(4)
17969 .k(k)
17970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17971 }
17972 }
17973
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8_subtile)17974 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8_subtile) {
17975 TEST_REQUIRES_X86_XOP;
17976 for (size_t k = 16; k <= 80; k += 8) {
17977 for (uint32_t n = 1; n <= 4; n++) {
17978 for (uint32_t m = 1; m <= 1; m++) {
17979 GemmMicrokernelTester()
17980 .mr(1)
17981 .nr(4)
17982 .kr(2)
17983 .sr(1)
17984 .m(m)
17985 .n(n)
17986 .k(k)
17987 .iterations(1)
17988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17989 }
17990 }
17991 }
17992 }
17993
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4)17994 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4) {
17995 TEST_REQUIRES_X86_XOP;
17996 for (uint32_t n = 5; n < 8; n++) {
17997 for (size_t k = 1; k <= 40; k += 9) {
17998 GemmMicrokernelTester()
17999 .mr(1)
18000 .nr(4)
18001 .kr(2)
18002 .sr(1)
18003 .m(1)
18004 .n(n)
18005 .k(k)
18006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18007 }
18008 }
18009 }
18010
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_strided_cn)18011 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
18012 TEST_REQUIRES_X86_XOP;
18013 for (uint32_t n = 5; n < 8; n++) {
18014 for (size_t k = 1; k <= 40; k += 9) {
18015 GemmMicrokernelTester()
18016 .mr(1)
18017 .nr(4)
18018 .kr(2)
18019 .sr(1)
18020 .m(1)
18021 .n(n)
18022 .k(k)
18023 .cn_stride(7)
18024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18025 }
18026 }
18027 }
18028
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_subtile)18029 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_subtile) {
18030 TEST_REQUIRES_X86_XOP;
18031 for (uint32_t n = 5; n < 8; n++) {
18032 for (size_t k = 1; k <= 40; k += 9) {
18033 for (uint32_t m = 1; m <= 1; m++) {
18034 GemmMicrokernelTester()
18035 .mr(1)
18036 .nr(4)
18037 .kr(2)
18038 .sr(1)
18039 .m(m)
18040 .n(n)
18041 .k(k)
18042 .iterations(1)
18043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18044 }
18045 }
18046 }
18047 }
18048
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4)18049 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4) {
18050 TEST_REQUIRES_X86_XOP;
18051 for (uint32_t n = 8; n <= 12; n += 4) {
18052 for (size_t k = 1; k <= 40; k += 9) {
18053 GemmMicrokernelTester()
18054 .mr(1)
18055 .nr(4)
18056 .kr(2)
18057 .sr(1)
18058 .m(1)
18059 .n(n)
18060 .k(k)
18061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18062 }
18063 }
18064 }
18065
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_strided_cn)18066 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_strided_cn) {
18067 TEST_REQUIRES_X86_XOP;
18068 for (uint32_t n = 8; n <= 12; n += 4) {
18069 for (size_t k = 1; k <= 40; k += 9) {
18070 GemmMicrokernelTester()
18071 .mr(1)
18072 .nr(4)
18073 .kr(2)
18074 .sr(1)
18075 .m(1)
18076 .n(n)
18077 .k(k)
18078 .cn_stride(7)
18079 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18080 }
18081 }
18082 }
18083
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_subtile)18084 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_subtile) {
18085 TEST_REQUIRES_X86_XOP;
18086 for (uint32_t n = 8; n <= 12; n += 4) {
18087 for (size_t k = 1; k <= 40; k += 9) {
18088 for (uint32_t m = 1; m <= 1; m++) {
18089 GemmMicrokernelTester()
18090 .mr(1)
18091 .nr(4)
18092 .kr(2)
18093 .sr(1)
18094 .m(m)
18095 .n(n)
18096 .k(k)
18097 .iterations(1)
18098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18099 }
18100 }
18101 }
18102 }
18103
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel)18104 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel) {
18105 TEST_REQUIRES_X86_XOP;
18106 for (size_t k = 1; k <= 40; k += 9) {
18107 GemmMicrokernelTester()
18108 .mr(1)
18109 .nr(4)
18110 .kr(2)
18111 .sr(1)
18112 .m(1)
18113 .n(4)
18114 .k(k)
18115 .ks(3)
18116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18117 }
18118 }
18119
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel_subtile)18120 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel_subtile) {
18121 TEST_REQUIRES_X86_XOP;
18122 for (size_t k = 1; k <= 40; k += 9) {
18123 for (uint32_t n = 1; n <= 4; n++) {
18124 for (uint32_t m = 1; m <= 1; m++) {
18125 GemmMicrokernelTester()
18126 .mr(1)
18127 .nr(4)
18128 .kr(2)
18129 .sr(1)
18130 .m(m)
18131 .n(n)
18132 .k(k)
18133 .ks(3)
18134 .iterations(1)
18135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18136 }
18137 }
18138 }
18139 }
18140
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_small_kernel)18141 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_small_kernel) {
18142 TEST_REQUIRES_X86_XOP;
18143 for (uint32_t n = 5; n < 8; n++) {
18144 for (size_t k = 1; k <= 40; k += 9) {
18145 GemmMicrokernelTester()
18146 .mr(1)
18147 .nr(4)
18148 .kr(2)
18149 .sr(1)
18150 .m(1)
18151 .n(n)
18152 .k(k)
18153 .ks(3)
18154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18155 }
18156 }
18157 }
18158
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_small_kernel)18159 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_small_kernel) {
18160 TEST_REQUIRES_X86_XOP;
18161 for (uint32_t n = 8; n <= 12; n += 4) {
18162 for (size_t k = 1; k <= 40; k += 9) {
18163 GemmMicrokernelTester()
18164 .mr(1)
18165 .nr(4)
18166 .kr(2)
18167 .sr(1)
18168 .m(1)
18169 .n(n)
18170 .k(k)
18171 .ks(3)
18172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18173 }
18174 }
18175 }
18176
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm_subtile)18177 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm_subtile) {
18178 TEST_REQUIRES_X86_XOP;
18179 for (size_t k = 1; k <= 40; k += 9) {
18180 for (uint32_t n = 1; n <= 4; n++) {
18181 for (uint32_t m = 1; m <= 1; m++) {
18182 GemmMicrokernelTester()
18183 .mr(1)
18184 .nr(4)
18185 .kr(2)
18186 .sr(1)
18187 .m(m)
18188 .n(n)
18189 .k(k)
18190 .cm_stride(7)
18191 .iterations(1)
18192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18193 }
18194 }
18195 }
18196 }
18197
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,a_offset)18198 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, a_offset) {
18199 TEST_REQUIRES_X86_XOP;
18200 for (size_t k = 1; k <= 40; k += 9) {
18201 GemmMicrokernelTester()
18202 .mr(1)
18203 .nr(4)
18204 .kr(2)
18205 .sr(1)
18206 .m(1)
18207 .n(4)
18208 .k(k)
18209 .ks(3)
18210 .a_offset(43)
18211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18212 }
18213 }
18214
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,zero)18215 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, zero) {
18216 TEST_REQUIRES_X86_XOP;
18217 for (size_t k = 1; k <= 40; k += 9) {
18218 for (uint32_t mz = 0; mz < 1; mz++) {
18219 GemmMicrokernelTester()
18220 .mr(1)
18221 .nr(4)
18222 .kr(2)
18223 .sr(1)
18224 .m(1)
18225 .n(4)
18226 .k(k)
18227 .ks(3)
18228 .a_offset(43)
18229 .zero_index(mz)
18230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18231 }
18232 }
18233 }
18234
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmin)18235 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmin) {
18236 TEST_REQUIRES_X86_XOP;
18237 GemmMicrokernelTester()
18238 .mr(1)
18239 .nr(4)
18240 .kr(2)
18241 .sr(1)
18242 .m(1)
18243 .n(4)
18244 .k(8)
18245 .qmin(128)
18246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18247 }
18248
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmax)18249 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmax) {
18250 TEST_REQUIRES_X86_XOP;
18251 GemmMicrokernelTester()
18252 .mr(1)
18253 .nr(4)
18254 .kr(2)
18255 .sr(1)
18256 .m(1)
18257 .n(4)
18258 .k(8)
18259 .qmax(128)
18260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18261 }
18262
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm)18263 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm) {
18264 TEST_REQUIRES_X86_XOP;
18265 GemmMicrokernelTester()
18266 .mr(1)
18267 .nr(4)
18268 .kr(2)
18269 .sr(1)
18270 .m(1)
18271 .n(4)
18272 .k(8)
18273 .cm_stride(7)
18274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18275 }
18276 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18277
18278
18279 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8)18280 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
18281 TEST_REQUIRES_X86_SSE2;
18282 GemmMicrokernelTester()
18283 .mr(3)
18284 .nr(4)
18285 .kr(2)
18286 .sr(1)
18287 .m(3)
18288 .n(4)
18289 .k(8)
18290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18291 }
18292
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cn)18293 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
18294 TEST_REQUIRES_X86_SSE2;
18295 GemmMicrokernelTester()
18296 .mr(3)
18297 .nr(4)
18298 .kr(2)
18299 .sr(1)
18300 .m(3)
18301 .n(4)
18302 .k(8)
18303 .cn_stride(7)
18304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18305 }
18306
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile)18307 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
18308 TEST_REQUIRES_X86_SSE2;
18309 for (uint32_t n = 1; n <= 4; n++) {
18310 for (uint32_t m = 1; m <= 3; m++) {
18311 GemmMicrokernelTester()
18312 .mr(3)
18313 .nr(4)
18314 .kr(2)
18315 .sr(1)
18316 .m(m)
18317 .n(n)
18318 .k(8)
18319 .iterations(1)
18320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18321 }
18322 }
18323 }
18324
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_m)18325 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
18326 TEST_REQUIRES_X86_SSE2;
18327 for (uint32_t m = 1; m <= 3; m++) {
18328 GemmMicrokernelTester()
18329 .mr(3)
18330 .nr(4)
18331 .kr(2)
18332 .sr(1)
18333 .m(m)
18334 .n(4)
18335 .k(8)
18336 .iterations(1)
18337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18338 }
18339 }
18340
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_n)18341 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
18342 TEST_REQUIRES_X86_SSE2;
18343 for (uint32_t n = 1; n <= 4; n++) {
18344 GemmMicrokernelTester()
18345 .mr(3)
18346 .nr(4)
18347 .kr(2)
18348 .sr(1)
18349 .m(3)
18350 .n(n)
18351 .k(8)
18352 .iterations(1)
18353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18354 }
18355 }
18356
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8)18357 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
18358 TEST_REQUIRES_X86_SSE2;
18359 for (size_t k = 1; k < 8; k++) {
18360 GemmMicrokernelTester()
18361 .mr(3)
18362 .nr(4)
18363 .kr(2)
18364 .sr(1)
18365 .m(3)
18366 .n(4)
18367 .k(k)
18368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18369 }
18370 }
18371
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8_subtile)18372 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
18373 TEST_REQUIRES_X86_SSE2;
18374 for (size_t k = 1; k < 8; k++) {
18375 for (uint32_t n = 1; n <= 4; n++) {
18376 for (uint32_t m = 1; m <= 3; m++) {
18377 GemmMicrokernelTester()
18378 .mr(3)
18379 .nr(4)
18380 .kr(2)
18381 .sr(1)
18382 .m(m)
18383 .n(n)
18384 .k(k)
18385 .iterations(1)
18386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18387 }
18388 }
18389 }
18390 }
18391
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8)18392 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
18393 TEST_REQUIRES_X86_SSE2;
18394 for (size_t k = 9; k < 16; k++) {
18395 GemmMicrokernelTester()
18396 .mr(3)
18397 .nr(4)
18398 .kr(2)
18399 .sr(1)
18400 .m(3)
18401 .n(4)
18402 .k(k)
18403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18404 }
18405 }
18406
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8_subtile)18407 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
18408 TEST_REQUIRES_X86_SSE2;
18409 for (size_t k = 9; k < 16; k++) {
18410 for (uint32_t n = 1; n <= 4; n++) {
18411 for (uint32_t m = 1; m <= 3; m++) {
18412 GemmMicrokernelTester()
18413 .mr(3)
18414 .nr(4)
18415 .kr(2)
18416 .sr(1)
18417 .m(m)
18418 .n(n)
18419 .k(k)
18420 .iterations(1)
18421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18422 }
18423 }
18424 }
18425 }
18426
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8)18427 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
18428 TEST_REQUIRES_X86_SSE2;
18429 for (size_t k = 16; k <= 80; k += 8) {
18430 GemmMicrokernelTester()
18431 .mr(3)
18432 .nr(4)
18433 .kr(2)
18434 .sr(1)
18435 .m(3)
18436 .n(4)
18437 .k(k)
18438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18439 }
18440 }
18441
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8_subtile)18442 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
18443 TEST_REQUIRES_X86_SSE2;
18444 for (size_t k = 16; k <= 80; k += 8) {
18445 for (uint32_t n = 1; n <= 4; n++) {
18446 for (uint32_t m = 1; m <= 3; m++) {
18447 GemmMicrokernelTester()
18448 .mr(3)
18449 .nr(4)
18450 .kr(2)
18451 .sr(1)
18452 .m(m)
18453 .n(n)
18454 .k(k)
18455 .iterations(1)
18456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18457 }
18458 }
18459 }
18460 }
18461
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4)18462 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
18463 TEST_REQUIRES_X86_SSE2;
18464 for (uint32_t n = 5; n < 8; n++) {
18465 for (size_t k = 1; k <= 40; k += 9) {
18466 GemmMicrokernelTester()
18467 .mr(3)
18468 .nr(4)
18469 .kr(2)
18470 .sr(1)
18471 .m(3)
18472 .n(n)
18473 .k(k)
18474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18475 }
18476 }
18477 }
18478
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_strided_cn)18479 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
18480 TEST_REQUIRES_X86_SSE2;
18481 for (uint32_t n = 5; n < 8; n++) {
18482 for (size_t k = 1; k <= 40; k += 9) {
18483 GemmMicrokernelTester()
18484 .mr(3)
18485 .nr(4)
18486 .kr(2)
18487 .sr(1)
18488 .m(3)
18489 .n(n)
18490 .k(k)
18491 .cn_stride(7)
18492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18493 }
18494 }
18495 }
18496
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_subtile)18497 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
18498 TEST_REQUIRES_X86_SSE2;
18499 for (uint32_t n = 5; n < 8; n++) {
18500 for (size_t k = 1; k <= 40; k += 9) {
18501 for (uint32_t m = 1; m <= 3; m++) {
18502 GemmMicrokernelTester()
18503 .mr(3)
18504 .nr(4)
18505 .kr(2)
18506 .sr(1)
18507 .m(m)
18508 .n(n)
18509 .k(k)
18510 .iterations(1)
18511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18512 }
18513 }
18514 }
18515 }
18516
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4)18517 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
18518 TEST_REQUIRES_X86_SSE2;
18519 for (uint32_t n = 8; n <= 12; n += 4) {
18520 for (size_t k = 1; k <= 40; k += 9) {
18521 GemmMicrokernelTester()
18522 .mr(3)
18523 .nr(4)
18524 .kr(2)
18525 .sr(1)
18526 .m(3)
18527 .n(n)
18528 .k(k)
18529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18530 }
18531 }
18532 }
18533
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_strided_cn)18534 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
18535 TEST_REQUIRES_X86_SSE2;
18536 for (uint32_t n = 8; n <= 12; n += 4) {
18537 for (size_t k = 1; k <= 40; k += 9) {
18538 GemmMicrokernelTester()
18539 .mr(3)
18540 .nr(4)
18541 .kr(2)
18542 .sr(1)
18543 .m(3)
18544 .n(n)
18545 .k(k)
18546 .cn_stride(7)
18547 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18548 }
18549 }
18550 }
18551
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_subtile)18552 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
18553 TEST_REQUIRES_X86_SSE2;
18554 for (uint32_t n = 8; n <= 12; n += 4) {
18555 for (size_t k = 1; k <= 40; k += 9) {
18556 for (uint32_t m = 1; m <= 3; m++) {
18557 GemmMicrokernelTester()
18558 .mr(3)
18559 .nr(4)
18560 .kr(2)
18561 .sr(1)
18562 .m(m)
18563 .n(n)
18564 .k(k)
18565 .iterations(1)
18566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18567 }
18568 }
18569 }
18570 }
18571
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel)18572 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel) {
18573 TEST_REQUIRES_X86_SSE2;
18574 for (size_t k = 1; k <= 40; k += 9) {
18575 GemmMicrokernelTester()
18576 .mr(3)
18577 .nr(4)
18578 .kr(2)
18579 .sr(1)
18580 .m(3)
18581 .n(4)
18582 .k(k)
18583 .ks(3)
18584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18585 }
18586 }
18587
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel_subtile)18588 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel_subtile) {
18589 TEST_REQUIRES_X86_SSE2;
18590 for (size_t k = 1; k <= 40; k += 9) {
18591 for (uint32_t n = 1; n <= 4; n++) {
18592 for (uint32_t m = 1; m <= 3; m++) {
18593 GemmMicrokernelTester()
18594 .mr(3)
18595 .nr(4)
18596 .kr(2)
18597 .sr(1)
18598 .m(m)
18599 .n(n)
18600 .k(k)
18601 .ks(3)
18602 .iterations(1)
18603 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18604 }
18605 }
18606 }
18607 }
18608
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_small_kernel)18609 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
18610 TEST_REQUIRES_X86_SSE2;
18611 for (uint32_t n = 5; n < 8; n++) {
18612 for (size_t k = 1; k <= 40; k += 9) {
18613 GemmMicrokernelTester()
18614 .mr(3)
18615 .nr(4)
18616 .kr(2)
18617 .sr(1)
18618 .m(3)
18619 .n(n)
18620 .k(k)
18621 .ks(3)
18622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18623 }
18624 }
18625 }
18626
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_small_kernel)18627 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
18628 TEST_REQUIRES_X86_SSE2;
18629 for (uint32_t n = 8; n <= 12; n += 4) {
18630 for (size_t k = 1; k <= 40; k += 9) {
18631 GemmMicrokernelTester()
18632 .mr(3)
18633 .nr(4)
18634 .kr(2)
18635 .sr(1)
18636 .m(3)
18637 .n(n)
18638 .k(k)
18639 .ks(3)
18640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18641 }
18642 }
18643 }
18644
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm_subtile)18645 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
18646 TEST_REQUIRES_X86_SSE2;
18647 for (size_t k = 1; k <= 40; k += 9) {
18648 for (uint32_t n = 1; n <= 4; n++) {
18649 for (uint32_t m = 1; m <= 3; m++) {
18650 GemmMicrokernelTester()
18651 .mr(3)
18652 .nr(4)
18653 .kr(2)
18654 .sr(1)
18655 .m(m)
18656 .n(n)
18657 .k(k)
18658 .cm_stride(7)
18659 .iterations(1)
18660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18661 }
18662 }
18663 }
18664 }
18665
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,a_offset)18666 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, a_offset) {
18667 TEST_REQUIRES_X86_SSE2;
18668 for (size_t k = 1; k <= 40; k += 9) {
18669 GemmMicrokernelTester()
18670 .mr(3)
18671 .nr(4)
18672 .kr(2)
18673 .sr(1)
18674 .m(3)
18675 .n(4)
18676 .k(k)
18677 .ks(3)
18678 .a_offset(127)
18679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18680 }
18681 }
18682
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,zero)18683 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, zero) {
18684 TEST_REQUIRES_X86_SSE2;
18685 for (size_t k = 1; k <= 40; k += 9) {
18686 for (uint32_t mz = 0; mz < 3; mz++) {
18687 GemmMicrokernelTester()
18688 .mr(3)
18689 .nr(4)
18690 .kr(2)
18691 .sr(1)
18692 .m(3)
18693 .n(4)
18694 .k(k)
18695 .ks(3)
18696 .a_offset(127)
18697 .zero_index(mz)
18698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18699 }
18700 }
18701 }
18702
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmin)18703 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
18704 TEST_REQUIRES_X86_SSE2;
18705 GemmMicrokernelTester()
18706 .mr(3)
18707 .nr(4)
18708 .kr(2)
18709 .sr(1)
18710 .m(3)
18711 .n(4)
18712 .k(8)
18713 .qmin(128)
18714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18715 }
18716
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmax)18717 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
18718 TEST_REQUIRES_X86_SSE2;
18719 GemmMicrokernelTester()
18720 .mr(3)
18721 .nr(4)
18722 .kr(2)
18723 .sr(1)
18724 .m(3)
18725 .n(4)
18726 .k(8)
18727 .qmax(128)
18728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18729 }
18730
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm)18731 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
18732 TEST_REQUIRES_X86_SSE2;
18733 GemmMicrokernelTester()
18734 .mr(3)
18735 .nr(4)
18736 .kr(2)
18737 .sr(1)
18738 .m(3)
18739 .n(4)
18740 .k(8)
18741 .cm_stride(7)
18742 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18743 }
18744 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18745
18746
18747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8)18748 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
18749 TEST_REQUIRES_X86_SSE41;
18750 GemmMicrokernelTester()
18751 .mr(3)
18752 .nr(4)
18753 .kr(2)
18754 .sr(1)
18755 .m(3)
18756 .n(4)
18757 .k(8)
18758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18759 }
18760
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cn)18761 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
18762 TEST_REQUIRES_X86_SSE41;
18763 GemmMicrokernelTester()
18764 .mr(3)
18765 .nr(4)
18766 .kr(2)
18767 .sr(1)
18768 .m(3)
18769 .n(4)
18770 .k(8)
18771 .cn_stride(7)
18772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18773 }
18774
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile)18775 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
18776 TEST_REQUIRES_X86_SSE41;
18777 for (uint32_t n = 1; n <= 4; n++) {
18778 for (uint32_t m = 1; m <= 3; m++) {
18779 GemmMicrokernelTester()
18780 .mr(3)
18781 .nr(4)
18782 .kr(2)
18783 .sr(1)
18784 .m(m)
18785 .n(n)
18786 .k(8)
18787 .iterations(1)
18788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789 }
18790 }
18791 }
18792
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_m)18793 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
18794 TEST_REQUIRES_X86_SSE41;
18795 for (uint32_t m = 1; m <= 3; m++) {
18796 GemmMicrokernelTester()
18797 .mr(3)
18798 .nr(4)
18799 .kr(2)
18800 .sr(1)
18801 .m(m)
18802 .n(4)
18803 .k(8)
18804 .iterations(1)
18805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18806 }
18807 }
18808
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_n)18809 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
18810 TEST_REQUIRES_X86_SSE41;
18811 for (uint32_t n = 1; n <= 4; n++) {
18812 GemmMicrokernelTester()
18813 .mr(3)
18814 .nr(4)
18815 .kr(2)
18816 .sr(1)
18817 .m(3)
18818 .n(n)
18819 .k(8)
18820 .iterations(1)
18821 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18822 }
18823 }
18824
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8)18825 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
18826 TEST_REQUIRES_X86_SSE41;
18827 for (size_t k = 1; k < 8; k++) {
18828 GemmMicrokernelTester()
18829 .mr(3)
18830 .nr(4)
18831 .kr(2)
18832 .sr(1)
18833 .m(3)
18834 .n(4)
18835 .k(k)
18836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18837 }
18838 }
18839
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_subtile)18840 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
18841 TEST_REQUIRES_X86_SSE41;
18842 for (size_t k = 1; k < 8; k++) {
18843 for (uint32_t n = 1; n <= 4; n++) {
18844 for (uint32_t m = 1; m <= 3; m++) {
18845 GemmMicrokernelTester()
18846 .mr(3)
18847 .nr(4)
18848 .kr(2)
18849 .sr(1)
18850 .m(m)
18851 .n(n)
18852 .k(k)
18853 .iterations(1)
18854 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18855 }
18856 }
18857 }
18858 }
18859
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8)18860 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
18861 TEST_REQUIRES_X86_SSE41;
18862 for (size_t k = 9; k < 16; k++) {
18863 GemmMicrokernelTester()
18864 .mr(3)
18865 .nr(4)
18866 .kr(2)
18867 .sr(1)
18868 .m(3)
18869 .n(4)
18870 .k(k)
18871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18872 }
18873 }
18874
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_subtile)18875 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
18876 TEST_REQUIRES_X86_SSE41;
18877 for (size_t k = 9; k < 16; k++) {
18878 for (uint32_t n = 1; n <= 4; n++) {
18879 for (uint32_t m = 1; m <= 3; m++) {
18880 GemmMicrokernelTester()
18881 .mr(3)
18882 .nr(4)
18883 .kr(2)
18884 .sr(1)
18885 .m(m)
18886 .n(n)
18887 .k(k)
18888 .iterations(1)
18889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18890 }
18891 }
18892 }
18893 }
18894
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8)18895 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
18896 TEST_REQUIRES_X86_SSE41;
18897 for (size_t k = 16; k <= 80; k += 8) {
18898 GemmMicrokernelTester()
18899 .mr(3)
18900 .nr(4)
18901 .kr(2)
18902 .sr(1)
18903 .m(3)
18904 .n(4)
18905 .k(k)
18906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18907 }
18908 }
18909
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_subtile)18910 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
18911 TEST_REQUIRES_X86_SSE41;
18912 for (size_t k = 16; k <= 80; k += 8) {
18913 for (uint32_t n = 1; n <= 4; n++) {
18914 for (uint32_t m = 1; m <= 3; m++) {
18915 GemmMicrokernelTester()
18916 .mr(3)
18917 .nr(4)
18918 .kr(2)
18919 .sr(1)
18920 .m(m)
18921 .n(n)
18922 .k(k)
18923 .iterations(1)
18924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18925 }
18926 }
18927 }
18928 }
18929
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4)18930 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
18931 TEST_REQUIRES_X86_SSE41;
18932 for (uint32_t n = 5; n < 8; n++) {
18933 for (size_t k = 1; k <= 40; k += 9) {
18934 GemmMicrokernelTester()
18935 .mr(3)
18936 .nr(4)
18937 .kr(2)
18938 .sr(1)
18939 .m(3)
18940 .n(n)
18941 .k(k)
18942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18943 }
18944 }
18945 }
18946
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_cn)18947 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
18948 TEST_REQUIRES_X86_SSE41;
18949 for (uint32_t n = 5; n < 8; n++) {
18950 for (size_t k = 1; k <= 40; k += 9) {
18951 GemmMicrokernelTester()
18952 .mr(3)
18953 .nr(4)
18954 .kr(2)
18955 .sr(1)
18956 .m(3)
18957 .n(n)
18958 .k(k)
18959 .cn_stride(7)
18960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18961 }
18962 }
18963 }
18964
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_subtile)18965 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
18966 TEST_REQUIRES_X86_SSE41;
18967 for (uint32_t n = 5; n < 8; n++) {
18968 for (size_t k = 1; k <= 40; k += 9) {
18969 for (uint32_t m = 1; m <= 3; m++) {
18970 GemmMicrokernelTester()
18971 .mr(3)
18972 .nr(4)
18973 .kr(2)
18974 .sr(1)
18975 .m(m)
18976 .n(n)
18977 .k(k)
18978 .iterations(1)
18979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18980 }
18981 }
18982 }
18983 }
18984
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4)18985 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
18986 TEST_REQUIRES_X86_SSE41;
18987 for (uint32_t n = 8; n <= 12; n += 4) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(3)
18991 .nr(4)
18992 .kr(2)
18993 .sr(1)
18994 .m(3)
18995 .n(n)
18996 .k(k)
18997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18998 }
18999 }
19000 }
19001
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_cn)19002 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
19003 TEST_REQUIRES_X86_SSE41;
19004 for (uint32_t n = 8; n <= 12; n += 4) {
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 GemmMicrokernelTester()
19007 .mr(3)
19008 .nr(4)
19009 .kr(2)
19010 .sr(1)
19011 .m(3)
19012 .n(n)
19013 .k(k)
19014 .cn_stride(7)
19015 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19016 }
19017 }
19018 }
19019
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_subtile)19020 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
19021 TEST_REQUIRES_X86_SSE41;
19022 for (uint32_t n = 8; n <= 12; n += 4) {
19023 for (size_t k = 1; k <= 40; k += 9) {
19024 for (uint32_t m = 1; m <= 3; m++) {
19025 GemmMicrokernelTester()
19026 .mr(3)
19027 .nr(4)
19028 .kr(2)
19029 .sr(1)
19030 .m(m)
19031 .n(n)
19032 .k(k)
19033 .iterations(1)
19034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19035 }
19036 }
19037 }
19038 }
19039
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel)19040 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel) {
19041 TEST_REQUIRES_X86_SSE41;
19042 for (size_t k = 1; k <= 40; k += 9) {
19043 GemmMicrokernelTester()
19044 .mr(3)
19045 .nr(4)
19046 .kr(2)
19047 .sr(1)
19048 .m(3)
19049 .n(4)
19050 .k(k)
19051 .ks(3)
19052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19053 }
19054 }
19055
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel_subtile)19056 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel_subtile) {
19057 TEST_REQUIRES_X86_SSE41;
19058 for (size_t k = 1; k <= 40; k += 9) {
19059 for (uint32_t n = 1; n <= 4; n++) {
19060 for (uint32_t m = 1; m <= 3; m++) {
19061 GemmMicrokernelTester()
19062 .mr(3)
19063 .nr(4)
19064 .kr(2)
19065 .sr(1)
19066 .m(m)
19067 .n(n)
19068 .k(k)
19069 .ks(3)
19070 .iterations(1)
19071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19072 }
19073 }
19074 }
19075 }
19076
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_small_kernel)19077 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
19078 TEST_REQUIRES_X86_SSE41;
19079 for (uint32_t n = 5; n < 8; n++) {
19080 for (size_t k = 1; k <= 40; k += 9) {
19081 GemmMicrokernelTester()
19082 .mr(3)
19083 .nr(4)
19084 .kr(2)
19085 .sr(1)
19086 .m(3)
19087 .n(n)
19088 .k(k)
19089 .ks(3)
19090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19091 }
19092 }
19093 }
19094
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_small_kernel)19095 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
19096 TEST_REQUIRES_X86_SSE41;
19097 for (uint32_t n = 8; n <= 12; n += 4) {
19098 for (size_t k = 1; k <= 40; k += 9) {
19099 GemmMicrokernelTester()
19100 .mr(3)
19101 .nr(4)
19102 .kr(2)
19103 .sr(1)
19104 .m(3)
19105 .n(n)
19106 .k(k)
19107 .ks(3)
19108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm_subtile)19113 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
19114 TEST_REQUIRES_X86_SSE41;
19115 for (size_t k = 1; k <= 40; k += 9) {
19116 for (uint32_t n = 1; n <= 4; n++) {
19117 for (uint32_t m = 1; m <= 3; m++) {
19118 GemmMicrokernelTester()
19119 .mr(3)
19120 .nr(4)
19121 .kr(2)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .cm_stride(7)
19127 .iterations(1)
19128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19129 }
19130 }
19131 }
19132 }
19133
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,a_offset)19134 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, a_offset) {
19135 TEST_REQUIRES_X86_SSE41;
19136 for (size_t k = 1; k <= 40; k += 9) {
19137 GemmMicrokernelTester()
19138 .mr(3)
19139 .nr(4)
19140 .kr(2)
19141 .sr(1)
19142 .m(3)
19143 .n(4)
19144 .k(k)
19145 .ks(3)
19146 .a_offset(127)
19147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19148 }
19149 }
19150
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,zero)19151 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, zero) {
19152 TEST_REQUIRES_X86_SSE41;
19153 for (size_t k = 1; k <= 40; k += 9) {
19154 for (uint32_t mz = 0; mz < 3; mz++) {
19155 GemmMicrokernelTester()
19156 .mr(3)
19157 .nr(4)
19158 .kr(2)
19159 .sr(1)
19160 .m(3)
19161 .n(4)
19162 .k(k)
19163 .ks(3)
19164 .a_offset(127)
19165 .zero_index(mz)
19166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19167 }
19168 }
19169 }
19170
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmin)19171 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
19172 TEST_REQUIRES_X86_SSE41;
19173 GemmMicrokernelTester()
19174 .mr(3)
19175 .nr(4)
19176 .kr(2)
19177 .sr(1)
19178 .m(3)
19179 .n(4)
19180 .k(8)
19181 .qmin(128)
19182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19183 }
19184
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmax)19185 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
19186 TEST_REQUIRES_X86_SSE41;
19187 GemmMicrokernelTester()
19188 .mr(3)
19189 .nr(4)
19190 .kr(2)
19191 .sr(1)
19192 .m(3)
19193 .n(4)
19194 .k(8)
19195 .qmax(128)
19196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19197 }
19198
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm)19199 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
19200 TEST_REQUIRES_X86_SSE41;
19201 GemmMicrokernelTester()
19202 .mr(3)
19203 .nr(4)
19204 .kr(2)
19205 .sr(1)
19206 .m(3)
19207 .n(4)
19208 .k(8)
19209 .cm_stride(7)
19210 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19211 }
19212 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19213
19214
19215 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8)19216 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
19217 TEST_REQUIRES_X86_SSE41;
19218 GemmMicrokernelTester()
19219 .mr(4)
19220 .nr(4)
19221 .kr(2)
19222 .sr(1)
19223 .m(4)
19224 .n(4)
19225 .k(8)
19226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19227 }
19228
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cn)19229 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
19230 TEST_REQUIRES_X86_SSE41;
19231 GemmMicrokernelTester()
19232 .mr(4)
19233 .nr(4)
19234 .kr(2)
19235 .sr(1)
19236 .m(4)
19237 .n(4)
19238 .k(8)
19239 .cn_stride(7)
19240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19241 }
19242
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile)19243 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
19244 TEST_REQUIRES_X86_SSE41;
19245 for (uint32_t n = 1; n <= 4; n++) {
19246 for (uint32_t m = 1; m <= 4; m++) {
19247 GemmMicrokernelTester()
19248 .mr(4)
19249 .nr(4)
19250 .kr(2)
19251 .sr(1)
19252 .m(m)
19253 .n(n)
19254 .k(8)
19255 .iterations(1)
19256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19257 }
19258 }
19259 }
19260
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_m)19261 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
19262 TEST_REQUIRES_X86_SSE41;
19263 for (uint32_t m = 1; m <= 4; m++) {
19264 GemmMicrokernelTester()
19265 .mr(4)
19266 .nr(4)
19267 .kr(2)
19268 .sr(1)
19269 .m(m)
19270 .n(4)
19271 .k(8)
19272 .iterations(1)
19273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19274 }
19275 }
19276
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_n)19277 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
19278 TEST_REQUIRES_X86_SSE41;
19279 for (uint32_t n = 1; n <= 4; n++) {
19280 GemmMicrokernelTester()
19281 .mr(4)
19282 .nr(4)
19283 .kr(2)
19284 .sr(1)
19285 .m(4)
19286 .n(n)
19287 .k(8)
19288 .iterations(1)
19289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19290 }
19291 }
19292
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8)19293 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
19294 TEST_REQUIRES_X86_SSE41;
19295 for (size_t k = 1; k < 8; k++) {
19296 GemmMicrokernelTester()
19297 .mr(4)
19298 .nr(4)
19299 .kr(2)
19300 .sr(1)
19301 .m(4)
19302 .n(4)
19303 .k(k)
19304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19305 }
19306 }
19307
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_subtile)19308 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
19309 TEST_REQUIRES_X86_SSE41;
19310 for (size_t k = 1; k < 8; k++) {
19311 for (uint32_t n = 1; n <= 4; n++) {
19312 for (uint32_t m = 1; m <= 4; m++) {
19313 GemmMicrokernelTester()
19314 .mr(4)
19315 .nr(4)
19316 .kr(2)
19317 .sr(1)
19318 .m(m)
19319 .n(n)
19320 .k(k)
19321 .iterations(1)
19322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19323 }
19324 }
19325 }
19326 }
19327
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8)19328 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
19329 TEST_REQUIRES_X86_SSE41;
19330 for (size_t k = 9; k < 16; k++) {
19331 GemmMicrokernelTester()
19332 .mr(4)
19333 .nr(4)
19334 .kr(2)
19335 .sr(1)
19336 .m(4)
19337 .n(4)
19338 .k(k)
19339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19340 }
19341 }
19342
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_subtile)19343 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
19344 TEST_REQUIRES_X86_SSE41;
19345 for (size_t k = 9; k < 16; k++) {
19346 for (uint32_t n = 1; n <= 4; n++) {
19347 for (uint32_t m = 1; m <= 4; m++) {
19348 GemmMicrokernelTester()
19349 .mr(4)
19350 .nr(4)
19351 .kr(2)
19352 .sr(1)
19353 .m(m)
19354 .n(n)
19355 .k(k)
19356 .iterations(1)
19357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19358 }
19359 }
19360 }
19361 }
19362
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8)19363 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
19364 TEST_REQUIRES_X86_SSE41;
19365 for (size_t k = 16; k <= 80; k += 8) {
19366 GemmMicrokernelTester()
19367 .mr(4)
19368 .nr(4)
19369 .kr(2)
19370 .sr(1)
19371 .m(4)
19372 .n(4)
19373 .k(k)
19374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19375 }
19376 }
19377
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_subtile)19378 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
19379 TEST_REQUIRES_X86_SSE41;
19380 for (size_t k = 16; k <= 80; k += 8) {
19381 for (uint32_t n = 1; n <= 4; n++) {
19382 for (uint32_t m = 1; m <= 4; m++) {
19383 GemmMicrokernelTester()
19384 .mr(4)
19385 .nr(4)
19386 .kr(2)
19387 .sr(1)
19388 .m(m)
19389 .n(n)
19390 .k(k)
19391 .iterations(1)
19392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19393 }
19394 }
19395 }
19396 }
19397
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4)19398 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
19399 TEST_REQUIRES_X86_SSE41;
19400 for (uint32_t n = 5; n < 8; n++) {
19401 for (size_t k = 1; k <= 40; k += 9) {
19402 GemmMicrokernelTester()
19403 .mr(4)
19404 .nr(4)
19405 .kr(2)
19406 .sr(1)
19407 .m(4)
19408 .n(n)
19409 .k(k)
19410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19411 }
19412 }
19413 }
19414
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_cn)19415 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
19416 TEST_REQUIRES_X86_SSE41;
19417 for (uint32_t n = 5; n < 8; n++) {
19418 for (size_t k = 1; k <= 40; k += 9) {
19419 GemmMicrokernelTester()
19420 .mr(4)
19421 .nr(4)
19422 .kr(2)
19423 .sr(1)
19424 .m(4)
19425 .n(n)
19426 .k(k)
19427 .cn_stride(7)
19428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19429 }
19430 }
19431 }
19432
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_subtile)19433 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
19434 TEST_REQUIRES_X86_SSE41;
19435 for (uint32_t n = 5; n < 8; n++) {
19436 for (size_t k = 1; k <= 40; k += 9) {
19437 for (uint32_t m = 1; m <= 4; m++) {
19438 GemmMicrokernelTester()
19439 .mr(4)
19440 .nr(4)
19441 .kr(2)
19442 .sr(1)
19443 .m(m)
19444 .n(n)
19445 .k(k)
19446 .iterations(1)
19447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19448 }
19449 }
19450 }
19451 }
19452
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4)19453 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
19454 TEST_REQUIRES_X86_SSE41;
19455 for (uint32_t n = 8; n <= 12; n += 4) {
19456 for (size_t k = 1; k <= 40; k += 9) {
19457 GemmMicrokernelTester()
19458 .mr(4)
19459 .nr(4)
19460 .kr(2)
19461 .sr(1)
19462 .m(4)
19463 .n(n)
19464 .k(k)
19465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19466 }
19467 }
19468 }
19469
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_cn)19470 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
19471 TEST_REQUIRES_X86_SSE41;
19472 for (uint32_t n = 8; n <= 12; n += 4) {
19473 for (size_t k = 1; k <= 40; k += 9) {
19474 GemmMicrokernelTester()
19475 .mr(4)
19476 .nr(4)
19477 .kr(2)
19478 .sr(1)
19479 .m(4)
19480 .n(n)
19481 .k(k)
19482 .cn_stride(7)
19483 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19484 }
19485 }
19486 }
19487
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_subtile)19488 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
19489 TEST_REQUIRES_X86_SSE41;
19490 for (uint32_t n = 8; n <= 12; n += 4) {
19491 for (size_t k = 1; k <= 40; k += 9) {
19492 for (uint32_t m = 1; m <= 4; m++) {
19493 GemmMicrokernelTester()
19494 .mr(4)
19495 .nr(4)
19496 .kr(2)
19497 .sr(1)
19498 .m(m)
19499 .n(n)
19500 .k(k)
19501 .iterations(1)
19502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19503 }
19504 }
19505 }
19506 }
19507
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel)19508 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel) {
19509 TEST_REQUIRES_X86_SSE41;
19510 for (size_t k = 1; k <= 40; k += 9) {
19511 GemmMicrokernelTester()
19512 .mr(4)
19513 .nr(4)
19514 .kr(2)
19515 .sr(1)
19516 .m(4)
19517 .n(4)
19518 .k(k)
19519 .ks(3)
19520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19521 }
19522 }
19523
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel_subtile)19524 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel_subtile) {
19525 TEST_REQUIRES_X86_SSE41;
19526 for (size_t k = 1; k <= 40; k += 9) {
19527 for (uint32_t n = 1; n <= 4; n++) {
19528 for (uint32_t m = 1; m <= 4; m++) {
19529 GemmMicrokernelTester()
19530 .mr(4)
19531 .nr(4)
19532 .kr(2)
19533 .sr(1)
19534 .m(m)
19535 .n(n)
19536 .k(k)
19537 .ks(3)
19538 .iterations(1)
19539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19540 }
19541 }
19542 }
19543 }
19544
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_small_kernel)19545 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
19546 TEST_REQUIRES_X86_SSE41;
19547 for (uint32_t n = 5; n < 8; n++) {
19548 for (size_t k = 1; k <= 40; k += 9) {
19549 GemmMicrokernelTester()
19550 .mr(4)
19551 .nr(4)
19552 .kr(2)
19553 .sr(1)
19554 .m(4)
19555 .n(n)
19556 .k(k)
19557 .ks(3)
19558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19559 }
19560 }
19561 }
19562
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_small_kernel)19563 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
19564 TEST_REQUIRES_X86_SSE41;
19565 for (uint32_t n = 8; n <= 12; n += 4) {
19566 for (size_t k = 1; k <= 40; k += 9) {
19567 GemmMicrokernelTester()
19568 .mr(4)
19569 .nr(4)
19570 .kr(2)
19571 .sr(1)
19572 .m(4)
19573 .n(n)
19574 .k(k)
19575 .ks(3)
19576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19577 }
19578 }
19579 }
19580
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm_subtile)19581 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
19582 TEST_REQUIRES_X86_SSE41;
19583 for (size_t k = 1; k <= 40; k += 9) {
19584 for (uint32_t n = 1; n <= 4; n++) {
19585 for (uint32_t m = 1; m <= 4; m++) {
19586 GemmMicrokernelTester()
19587 .mr(4)
19588 .nr(4)
19589 .kr(2)
19590 .sr(1)
19591 .m(m)
19592 .n(n)
19593 .k(k)
19594 .cm_stride(7)
19595 .iterations(1)
19596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19597 }
19598 }
19599 }
19600 }
19601
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,a_offset)19602 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, a_offset) {
19603 TEST_REQUIRES_X86_SSE41;
19604 for (size_t k = 1; k <= 40; k += 9) {
19605 GemmMicrokernelTester()
19606 .mr(4)
19607 .nr(4)
19608 .kr(2)
19609 .sr(1)
19610 .m(4)
19611 .n(4)
19612 .k(k)
19613 .ks(3)
19614 .a_offset(163)
19615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19616 }
19617 }
19618
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,zero)19619 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, zero) {
19620 TEST_REQUIRES_X86_SSE41;
19621 for (size_t k = 1; k <= 40; k += 9) {
19622 for (uint32_t mz = 0; mz < 4; mz++) {
19623 GemmMicrokernelTester()
19624 .mr(4)
19625 .nr(4)
19626 .kr(2)
19627 .sr(1)
19628 .m(4)
19629 .n(4)
19630 .k(k)
19631 .ks(3)
19632 .a_offset(163)
19633 .zero_index(mz)
19634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19635 }
19636 }
19637 }
19638
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmin)19639 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
19640 TEST_REQUIRES_X86_SSE41;
19641 GemmMicrokernelTester()
19642 .mr(4)
19643 .nr(4)
19644 .kr(2)
19645 .sr(1)
19646 .m(4)
19647 .n(4)
19648 .k(8)
19649 .qmin(128)
19650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19651 }
19652
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmax)19653 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
19654 TEST_REQUIRES_X86_SSE41;
19655 GemmMicrokernelTester()
19656 .mr(4)
19657 .nr(4)
19658 .kr(2)
19659 .sr(1)
19660 .m(4)
19661 .n(4)
19662 .k(8)
19663 .qmax(128)
19664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19665 }
19666
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm)19667 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
19668 TEST_REQUIRES_X86_SSE41;
19669 GemmMicrokernelTester()
19670 .mr(4)
19671 .nr(4)
19672 .kr(2)
19673 .sr(1)
19674 .m(4)
19675 .n(4)
19676 .k(8)
19677 .cm_stride(7)
19678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19679 }
19680 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19681
19682
19683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8)19684 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8) {
19685 TEST_REQUIRES_X86_AVX;
19686 GemmMicrokernelTester()
19687 .mr(1)
19688 .nr(4)
19689 .kr(2)
19690 .sr(1)
19691 .m(1)
19692 .n(4)
19693 .k(8)
19694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19695 }
19696
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cn)19697 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cn) {
19698 TEST_REQUIRES_X86_AVX;
19699 GemmMicrokernelTester()
19700 .mr(1)
19701 .nr(4)
19702 .kr(2)
19703 .sr(1)
19704 .m(1)
19705 .n(4)
19706 .k(8)
19707 .cn_stride(7)
19708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19709 }
19710
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile)19711 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile) {
19712 TEST_REQUIRES_X86_AVX;
19713 for (uint32_t n = 1; n <= 4; n++) {
19714 for (uint32_t m = 1; m <= 1; m++) {
19715 GemmMicrokernelTester()
19716 .mr(1)
19717 .nr(4)
19718 .kr(2)
19719 .sr(1)
19720 .m(m)
19721 .n(n)
19722 .k(8)
19723 .iterations(1)
19724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19725 }
19726 }
19727 }
19728
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_m)19729 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
19730 TEST_REQUIRES_X86_AVX;
19731 for (uint32_t m = 1; m <= 1; m++) {
19732 GemmMicrokernelTester()
19733 .mr(1)
19734 .nr(4)
19735 .kr(2)
19736 .sr(1)
19737 .m(m)
19738 .n(4)
19739 .k(8)
19740 .iterations(1)
19741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19742 }
19743 }
19744
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_n)19745 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
19746 TEST_REQUIRES_X86_AVX;
19747 for (uint32_t n = 1; n <= 4; n++) {
19748 GemmMicrokernelTester()
19749 .mr(1)
19750 .nr(4)
19751 .kr(2)
19752 .sr(1)
19753 .m(1)
19754 .n(n)
19755 .k(8)
19756 .iterations(1)
19757 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19758 }
19759 }
19760
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8)19761 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8) {
19762 TEST_REQUIRES_X86_AVX;
19763 for (size_t k = 1; k < 8; k++) {
19764 GemmMicrokernelTester()
19765 .mr(1)
19766 .nr(4)
19767 .kr(2)
19768 .sr(1)
19769 .m(1)
19770 .n(4)
19771 .k(k)
19772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19773 }
19774 }
19775
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8_subtile)19776 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_subtile) {
19777 TEST_REQUIRES_X86_AVX;
19778 for (size_t k = 1; k < 8; k++) {
19779 for (uint32_t n = 1; n <= 4; n++) {
19780 for (uint32_t m = 1; m <= 1; m++) {
19781 GemmMicrokernelTester()
19782 .mr(1)
19783 .nr(4)
19784 .kr(2)
19785 .sr(1)
19786 .m(m)
19787 .n(n)
19788 .k(k)
19789 .iterations(1)
19790 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19791 }
19792 }
19793 }
19794 }
19795
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8)19796 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8) {
19797 TEST_REQUIRES_X86_AVX;
19798 for (size_t k = 9; k < 16; k++) {
19799 GemmMicrokernelTester()
19800 .mr(1)
19801 .nr(4)
19802 .kr(2)
19803 .sr(1)
19804 .m(1)
19805 .n(4)
19806 .k(k)
19807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19808 }
19809 }
19810
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8_subtile)19811 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_subtile) {
19812 TEST_REQUIRES_X86_AVX;
19813 for (size_t k = 9; k < 16; k++) {
19814 for (uint32_t n = 1; n <= 4; n++) {
19815 for (uint32_t m = 1; m <= 1; m++) {
19816 GemmMicrokernelTester()
19817 .mr(1)
19818 .nr(4)
19819 .kr(2)
19820 .sr(1)
19821 .m(m)
19822 .n(n)
19823 .k(k)
19824 .iterations(1)
19825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19826 }
19827 }
19828 }
19829 }
19830
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8)19831 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8) {
19832 TEST_REQUIRES_X86_AVX;
19833 for (size_t k = 16; k <= 80; k += 8) {
19834 GemmMicrokernelTester()
19835 .mr(1)
19836 .nr(4)
19837 .kr(2)
19838 .sr(1)
19839 .m(1)
19840 .n(4)
19841 .k(k)
19842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19843 }
19844 }
19845
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8_subtile)19846 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_subtile) {
19847 TEST_REQUIRES_X86_AVX;
19848 for (size_t k = 16; k <= 80; k += 8) {
19849 for (uint32_t n = 1; n <= 4; n++) {
19850 for (uint32_t m = 1; m <= 1; m++) {
19851 GemmMicrokernelTester()
19852 .mr(1)
19853 .nr(4)
19854 .kr(2)
19855 .sr(1)
19856 .m(m)
19857 .n(n)
19858 .k(k)
19859 .iterations(1)
19860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19861 }
19862 }
19863 }
19864 }
19865
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4)19866 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4) {
19867 TEST_REQUIRES_X86_AVX;
19868 for (uint32_t n = 5; n < 8; n++) {
19869 for (size_t k = 1; k <= 40; k += 9) {
19870 GemmMicrokernelTester()
19871 .mr(1)
19872 .nr(4)
19873 .kr(2)
19874 .sr(1)
19875 .m(1)
19876 .n(n)
19877 .k(k)
19878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19879 }
19880 }
19881 }
19882
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_strided_cn)19883 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
19884 TEST_REQUIRES_X86_AVX;
19885 for (uint32_t n = 5; n < 8; n++) {
19886 for (size_t k = 1; k <= 40; k += 9) {
19887 GemmMicrokernelTester()
19888 .mr(1)
19889 .nr(4)
19890 .kr(2)
19891 .sr(1)
19892 .m(1)
19893 .n(n)
19894 .k(k)
19895 .cn_stride(7)
19896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19897 }
19898 }
19899 }
19900
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_subtile)19901 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_subtile) {
19902 TEST_REQUIRES_X86_AVX;
19903 for (uint32_t n = 5; n < 8; n++) {
19904 for (size_t k = 1; k <= 40; k += 9) {
19905 for (uint32_t m = 1; m <= 1; m++) {
19906 GemmMicrokernelTester()
19907 .mr(1)
19908 .nr(4)
19909 .kr(2)
19910 .sr(1)
19911 .m(m)
19912 .n(n)
19913 .k(k)
19914 .iterations(1)
19915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19916 }
19917 }
19918 }
19919 }
19920
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4)19921 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4) {
19922 TEST_REQUIRES_X86_AVX;
19923 for (uint32_t n = 8; n <= 12; n += 4) {
19924 for (size_t k = 1; k <= 40; k += 9) {
19925 GemmMicrokernelTester()
19926 .mr(1)
19927 .nr(4)
19928 .kr(2)
19929 .sr(1)
19930 .m(1)
19931 .n(n)
19932 .k(k)
19933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19934 }
19935 }
19936 }
19937
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_strided_cn)19938 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_cn) {
19939 TEST_REQUIRES_X86_AVX;
19940 for (uint32_t n = 8; n <= 12; n += 4) {
19941 for (size_t k = 1; k <= 40; k += 9) {
19942 GemmMicrokernelTester()
19943 .mr(1)
19944 .nr(4)
19945 .kr(2)
19946 .sr(1)
19947 .m(1)
19948 .n(n)
19949 .k(k)
19950 .cn_stride(7)
19951 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19952 }
19953 }
19954 }
19955
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_subtile)19956 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_subtile) {
19957 TEST_REQUIRES_X86_AVX;
19958 for (uint32_t n = 8; n <= 12; n += 4) {
19959 for (size_t k = 1; k <= 40; k += 9) {
19960 for (uint32_t m = 1; m <= 1; m++) {
19961 GemmMicrokernelTester()
19962 .mr(1)
19963 .nr(4)
19964 .kr(2)
19965 .sr(1)
19966 .m(m)
19967 .n(n)
19968 .k(k)
19969 .iterations(1)
19970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19971 }
19972 }
19973 }
19974 }
19975
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel)19976 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel) {
19977 TEST_REQUIRES_X86_AVX;
19978 for (size_t k = 1; k <= 40; k += 9) {
19979 GemmMicrokernelTester()
19980 .mr(1)
19981 .nr(4)
19982 .kr(2)
19983 .sr(1)
19984 .m(1)
19985 .n(4)
19986 .k(k)
19987 .ks(3)
19988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19989 }
19990 }
19991
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel_subtile)19992 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel_subtile) {
19993 TEST_REQUIRES_X86_AVX;
19994 for (size_t k = 1; k <= 40; k += 9) {
19995 for (uint32_t n = 1; n <= 4; n++) {
19996 for (uint32_t m = 1; m <= 1; m++) {
19997 GemmMicrokernelTester()
19998 .mr(1)
19999 .nr(4)
20000 .kr(2)
20001 .sr(1)
20002 .m(m)
20003 .n(n)
20004 .k(k)
20005 .ks(3)
20006 .iterations(1)
20007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20008 }
20009 }
20010 }
20011 }
20012
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_small_kernel)20013 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_small_kernel) {
20014 TEST_REQUIRES_X86_AVX;
20015 for (uint32_t n = 5; n < 8; n++) {
20016 for (size_t k = 1; k <= 40; k += 9) {
20017 GemmMicrokernelTester()
20018 .mr(1)
20019 .nr(4)
20020 .kr(2)
20021 .sr(1)
20022 .m(1)
20023 .n(n)
20024 .k(k)
20025 .ks(3)
20026 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20027 }
20028 }
20029 }
20030
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_small_kernel)20031 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_small_kernel) {
20032 TEST_REQUIRES_X86_AVX;
20033 for (uint32_t n = 8; n <= 12; n += 4) {
20034 for (size_t k = 1; k <= 40; k += 9) {
20035 GemmMicrokernelTester()
20036 .mr(1)
20037 .nr(4)
20038 .kr(2)
20039 .sr(1)
20040 .m(1)
20041 .n(n)
20042 .k(k)
20043 .ks(3)
20044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20045 }
20046 }
20047 }
20048
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm_subtile)20049 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm_subtile) {
20050 TEST_REQUIRES_X86_AVX;
20051 for (size_t k = 1; k <= 40; k += 9) {
20052 for (uint32_t n = 1; n <= 4; n++) {
20053 for (uint32_t m = 1; m <= 1; m++) {
20054 GemmMicrokernelTester()
20055 .mr(1)
20056 .nr(4)
20057 .kr(2)
20058 .sr(1)
20059 .m(m)
20060 .n(n)
20061 .k(k)
20062 .cm_stride(7)
20063 .iterations(1)
20064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20065 }
20066 }
20067 }
20068 }
20069
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,a_offset)20070 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, a_offset) {
20071 TEST_REQUIRES_X86_AVX;
20072 for (size_t k = 1; k <= 40; k += 9) {
20073 GemmMicrokernelTester()
20074 .mr(1)
20075 .nr(4)
20076 .kr(2)
20077 .sr(1)
20078 .m(1)
20079 .n(4)
20080 .k(k)
20081 .ks(3)
20082 .a_offset(43)
20083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20084 }
20085 }
20086
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,zero)20087 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, zero) {
20088 TEST_REQUIRES_X86_AVX;
20089 for (size_t k = 1; k <= 40; k += 9) {
20090 for (uint32_t mz = 0; mz < 1; mz++) {
20091 GemmMicrokernelTester()
20092 .mr(1)
20093 .nr(4)
20094 .kr(2)
20095 .sr(1)
20096 .m(1)
20097 .n(4)
20098 .k(k)
20099 .ks(3)
20100 .a_offset(43)
20101 .zero_index(mz)
20102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20103 }
20104 }
20105 }
20106
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmin)20107 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmin) {
20108 TEST_REQUIRES_X86_AVX;
20109 GemmMicrokernelTester()
20110 .mr(1)
20111 .nr(4)
20112 .kr(2)
20113 .sr(1)
20114 .m(1)
20115 .n(4)
20116 .k(8)
20117 .qmin(128)
20118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20119 }
20120
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmax)20121 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmax) {
20122 TEST_REQUIRES_X86_AVX;
20123 GemmMicrokernelTester()
20124 .mr(1)
20125 .nr(4)
20126 .kr(2)
20127 .sr(1)
20128 .m(1)
20129 .n(4)
20130 .k(8)
20131 .qmax(128)
20132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20133 }
20134
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm)20135 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm) {
20136 TEST_REQUIRES_X86_AVX;
20137 GemmMicrokernelTester()
20138 .mr(1)
20139 .nr(4)
20140 .kr(2)
20141 .sr(1)
20142 .m(1)
20143 .n(4)
20144 .k(8)
20145 .cm_stride(7)
20146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20147 }
20148 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20149
20150
20151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8)20152 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8) {
20153 TEST_REQUIRES_X86_AVX;
20154 GemmMicrokernelTester()
20155 .mr(2)
20156 .nr(4)
20157 .kr(2)
20158 .sr(1)
20159 .m(2)
20160 .n(4)
20161 .k(8)
20162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20163 }
20164
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cn)20165 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cn) {
20166 TEST_REQUIRES_X86_AVX;
20167 GemmMicrokernelTester()
20168 .mr(2)
20169 .nr(4)
20170 .kr(2)
20171 .sr(1)
20172 .m(2)
20173 .n(4)
20174 .k(8)
20175 .cn_stride(7)
20176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20177 }
20178
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile)20179 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile) {
20180 TEST_REQUIRES_X86_AVX;
20181 for (uint32_t n = 1; n <= 4; n++) {
20182 for (uint32_t m = 1; m <= 2; m++) {
20183 GemmMicrokernelTester()
20184 .mr(2)
20185 .nr(4)
20186 .kr(2)
20187 .sr(1)
20188 .m(m)
20189 .n(n)
20190 .k(8)
20191 .iterations(1)
20192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20193 }
20194 }
20195 }
20196
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_m)20197 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
20198 TEST_REQUIRES_X86_AVX;
20199 for (uint32_t m = 1; m <= 2; m++) {
20200 GemmMicrokernelTester()
20201 .mr(2)
20202 .nr(4)
20203 .kr(2)
20204 .sr(1)
20205 .m(m)
20206 .n(4)
20207 .k(8)
20208 .iterations(1)
20209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20210 }
20211 }
20212
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_n)20213 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
20214 TEST_REQUIRES_X86_AVX;
20215 for (uint32_t n = 1; n <= 4; n++) {
20216 GemmMicrokernelTester()
20217 .mr(2)
20218 .nr(4)
20219 .kr(2)
20220 .sr(1)
20221 .m(2)
20222 .n(n)
20223 .k(8)
20224 .iterations(1)
20225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20226 }
20227 }
20228
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8)20229 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8) {
20230 TEST_REQUIRES_X86_AVX;
20231 for (size_t k = 1; k < 8; k++) {
20232 GemmMicrokernelTester()
20233 .mr(2)
20234 .nr(4)
20235 .kr(2)
20236 .sr(1)
20237 .m(2)
20238 .n(4)
20239 .k(k)
20240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20241 }
20242 }
20243
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8_subtile)20244 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_subtile) {
20245 TEST_REQUIRES_X86_AVX;
20246 for (size_t k = 1; k < 8; k++) {
20247 for (uint32_t n = 1; n <= 4; n++) {
20248 for (uint32_t m = 1; m <= 2; m++) {
20249 GemmMicrokernelTester()
20250 .mr(2)
20251 .nr(4)
20252 .kr(2)
20253 .sr(1)
20254 .m(m)
20255 .n(n)
20256 .k(k)
20257 .iterations(1)
20258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20259 }
20260 }
20261 }
20262 }
20263
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8)20264 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8) {
20265 TEST_REQUIRES_X86_AVX;
20266 for (size_t k = 9; k < 16; k++) {
20267 GemmMicrokernelTester()
20268 .mr(2)
20269 .nr(4)
20270 .kr(2)
20271 .sr(1)
20272 .m(2)
20273 .n(4)
20274 .k(k)
20275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20276 }
20277 }
20278
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8_subtile)20279 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_subtile) {
20280 TEST_REQUIRES_X86_AVX;
20281 for (size_t k = 9; k < 16; k++) {
20282 for (uint32_t n = 1; n <= 4; n++) {
20283 for (uint32_t m = 1; m <= 2; m++) {
20284 GemmMicrokernelTester()
20285 .mr(2)
20286 .nr(4)
20287 .kr(2)
20288 .sr(1)
20289 .m(m)
20290 .n(n)
20291 .k(k)
20292 .iterations(1)
20293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20294 }
20295 }
20296 }
20297 }
20298
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8)20299 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8) {
20300 TEST_REQUIRES_X86_AVX;
20301 for (size_t k = 16; k <= 80; k += 8) {
20302 GemmMicrokernelTester()
20303 .mr(2)
20304 .nr(4)
20305 .kr(2)
20306 .sr(1)
20307 .m(2)
20308 .n(4)
20309 .k(k)
20310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20311 }
20312 }
20313
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8_subtile)20314 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_subtile) {
20315 TEST_REQUIRES_X86_AVX;
20316 for (size_t k = 16; k <= 80; k += 8) {
20317 for (uint32_t n = 1; n <= 4; n++) {
20318 for (uint32_t m = 1; m <= 2; m++) {
20319 GemmMicrokernelTester()
20320 .mr(2)
20321 .nr(4)
20322 .kr(2)
20323 .sr(1)
20324 .m(m)
20325 .n(n)
20326 .k(k)
20327 .iterations(1)
20328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20329 }
20330 }
20331 }
20332 }
20333
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4)20334 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4) {
20335 TEST_REQUIRES_X86_AVX;
20336 for (uint32_t n = 5; n < 8; n++) {
20337 for (size_t k = 1; k <= 40; k += 9) {
20338 GemmMicrokernelTester()
20339 .mr(2)
20340 .nr(4)
20341 .kr(2)
20342 .sr(1)
20343 .m(2)
20344 .n(n)
20345 .k(k)
20346 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20347 }
20348 }
20349 }
20350
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_strided_cn)20351 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
20352 TEST_REQUIRES_X86_AVX;
20353 for (uint32_t n = 5; n < 8; n++) {
20354 for (size_t k = 1; k <= 40; k += 9) {
20355 GemmMicrokernelTester()
20356 .mr(2)
20357 .nr(4)
20358 .kr(2)
20359 .sr(1)
20360 .m(2)
20361 .n(n)
20362 .k(k)
20363 .cn_stride(7)
20364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20365 }
20366 }
20367 }
20368
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_subtile)20369 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_subtile) {
20370 TEST_REQUIRES_X86_AVX;
20371 for (uint32_t n = 5; n < 8; n++) {
20372 for (size_t k = 1; k <= 40; k += 9) {
20373 for (uint32_t m = 1; m <= 2; m++) {
20374 GemmMicrokernelTester()
20375 .mr(2)
20376 .nr(4)
20377 .kr(2)
20378 .sr(1)
20379 .m(m)
20380 .n(n)
20381 .k(k)
20382 .iterations(1)
20383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20384 }
20385 }
20386 }
20387 }
20388
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4)20389 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4) {
20390 TEST_REQUIRES_X86_AVX;
20391 for (uint32_t n = 8; n <= 12; n += 4) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 GemmMicrokernelTester()
20394 .mr(2)
20395 .nr(4)
20396 .kr(2)
20397 .sr(1)
20398 .m(2)
20399 .n(n)
20400 .k(k)
20401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20402 }
20403 }
20404 }
20405
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_strided_cn)20406 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_cn) {
20407 TEST_REQUIRES_X86_AVX;
20408 for (uint32_t n = 8; n <= 12; n += 4) {
20409 for (size_t k = 1; k <= 40; k += 9) {
20410 GemmMicrokernelTester()
20411 .mr(2)
20412 .nr(4)
20413 .kr(2)
20414 .sr(1)
20415 .m(2)
20416 .n(n)
20417 .k(k)
20418 .cn_stride(7)
20419 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20420 }
20421 }
20422 }
20423
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_subtile)20424 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_subtile) {
20425 TEST_REQUIRES_X86_AVX;
20426 for (uint32_t n = 8; n <= 12; n += 4) {
20427 for (size_t k = 1; k <= 40; k += 9) {
20428 for (uint32_t m = 1; m <= 2; m++) {
20429 GemmMicrokernelTester()
20430 .mr(2)
20431 .nr(4)
20432 .kr(2)
20433 .sr(1)
20434 .m(m)
20435 .n(n)
20436 .k(k)
20437 .iterations(1)
20438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20439 }
20440 }
20441 }
20442 }
20443
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel)20444 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel) {
20445 TEST_REQUIRES_X86_AVX;
20446 for (size_t k = 1; k <= 40; k += 9) {
20447 GemmMicrokernelTester()
20448 .mr(2)
20449 .nr(4)
20450 .kr(2)
20451 .sr(1)
20452 .m(2)
20453 .n(4)
20454 .k(k)
20455 .ks(3)
20456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20457 }
20458 }
20459
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel_subtile)20460 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel_subtile) {
20461 TEST_REQUIRES_X86_AVX;
20462 for (size_t k = 1; k <= 40; k += 9) {
20463 for (uint32_t n = 1; n <= 4; n++) {
20464 for (uint32_t m = 1; m <= 2; m++) {
20465 GemmMicrokernelTester()
20466 .mr(2)
20467 .nr(4)
20468 .kr(2)
20469 .sr(1)
20470 .m(m)
20471 .n(n)
20472 .k(k)
20473 .ks(3)
20474 .iterations(1)
20475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20476 }
20477 }
20478 }
20479 }
20480
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_small_kernel)20481 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_small_kernel) {
20482 TEST_REQUIRES_X86_AVX;
20483 for (uint32_t n = 5; n < 8; n++) {
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 GemmMicrokernelTester()
20486 .mr(2)
20487 .nr(4)
20488 .kr(2)
20489 .sr(1)
20490 .m(2)
20491 .n(n)
20492 .k(k)
20493 .ks(3)
20494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20495 }
20496 }
20497 }
20498
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_small_kernel)20499 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_small_kernel) {
20500 TEST_REQUIRES_X86_AVX;
20501 for (uint32_t n = 8; n <= 12; n += 4) {
20502 for (size_t k = 1; k <= 40; k += 9) {
20503 GemmMicrokernelTester()
20504 .mr(2)
20505 .nr(4)
20506 .kr(2)
20507 .sr(1)
20508 .m(2)
20509 .n(n)
20510 .k(k)
20511 .ks(3)
20512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513 }
20514 }
20515 }
20516
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm_subtile)20517 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm_subtile) {
20518 TEST_REQUIRES_X86_AVX;
20519 for (size_t k = 1; k <= 40; k += 9) {
20520 for (uint32_t n = 1; n <= 4; n++) {
20521 for (uint32_t m = 1; m <= 2; m++) {
20522 GemmMicrokernelTester()
20523 .mr(2)
20524 .nr(4)
20525 .kr(2)
20526 .sr(1)
20527 .m(m)
20528 .n(n)
20529 .k(k)
20530 .cm_stride(7)
20531 .iterations(1)
20532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20533 }
20534 }
20535 }
20536 }
20537
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,a_offset)20538 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, a_offset) {
20539 TEST_REQUIRES_X86_AVX;
20540 for (size_t k = 1; k <= 40; k += 9) {
20541 GemmMicrokernelTester()
20542 .mr(2)
20543 .nr(4)
20544 .kr(2)
20545 .sr(1)
20546 .m(2)
20547 .n(4)
20548 .k(k)
20549 .ks(3)
20550 .a_offset(83)
20551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20552 }
20553 }
20554
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,zero)20555 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, zero) {
20556 TEST_REQUIRES_X86_AVX;
20557 for (size_t k = 1; k <= 40; k += 9) {
20558 for (uint32_t mz = 0; mz < 2; mz++) {
20559 GemmMicrokernelTester()
20560 .mr(2)
20561 .nr(4)
20562 .kr(2)
20563 .sr(1)
20564 .m(2)
20565 .n(4)
20566 .k(k)
20567 .ks(3)
20568 .a_offset(83)
20569 .zero_index(mz)
20570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571 }
20572 }
20573 }
20574
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmin)20575 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmin) {
20576 TEST_REQUIRES_X86_AVX;
20577 GemmMicrokernelTester()
20578 .mr(2)
20579 .nr(4)
20580 .kr(2)
20581 .sr(1)
20582 .m(2)
20583 .n(4)
20584 .k(8)
20585 .qmin(128)
20586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20587 }
20588
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmax)20589 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmax) {
20590 TEST_REQUIRES_X86_AVX;
20591 GemmMicrokernelTester()
20592 .mr(2)
20593 .nr(4)
20594 .kr(2)
20595 .sr(1)
20596 .m(2)
20597 .n(4)
20598 .k(8)
20599 .qmax(128)
20600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20601 }
20602
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm)20603 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm) {
20604 TEST_REQUIRES_X86_AVX;
20605 GemmMicrokernelTester()
20606 .mr(2)
20607 .nr(4)
20608 .kr(2)
20609 .sr(1)
20610 .m(2)
20611 .n(4)
20612 .k(8)
20613 .cm_stride(7)
20614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20615 }
20616 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20617
20618
20619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8)20620 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
20621 TEST_REQUIRES_X86_XOP;
20622 GemmMicrokernelTester()
20623 .mr(3)
20624 .nr(4)
20625 .kr(2)
20626 .sr(1)
20627 .m(3)
20628 .n(4)
20629 .k(8)
20630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20631 }
20632
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cn)20633 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
20634 TEST_REQUIRES_X86_XOP;
20635 GemmMicrokernelTester()
20636 .mr(3)
20637 .nr(4)
20638 .kr(2)
20639 .sr(1)
20640 .m(3)
20641 .n(4)
20642 .k(8)
20643 .cn_stride(7)
20644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20645 }
20646
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile)20647 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
20648 TEST_REQUIRES_X86_XOP;
20649 for (uint32_t n = 1; n <= 4; n++) {
20650 for (uint32_t m = 1; m <= 3; m++) {
20651 GemmMicrokernelTester()
20652 .mr(3)
20653 .nr(4)
20654 .kr(2)
20655 .sr(1)
20656 .m(m)
20657 .n(n)
20658 .k(8)
20659 .iterations(1)
20660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20661 }
20662 }
20663 }
20664
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_m)20665 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
20666 TEST_REQUIRES_X86_XOP;
20667 for (uint32_t m = 1; m <= 3; m++) {
20668 GemmMicrokernelTester()
20669 .mr(3)
20670 .nr(4)
20671 .kr(2)
20672 .sr(1)
20673 .m(m)
20674 .n(4)
20675 .k(8)
20676 .iterations(1)
20677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20678 }
20679 }
20680
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_n)20681 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
20682 TEST_REQUIRES_X86_XOP;
20683 for (uint32_t n = 1; n <= 4; n++) {
20684 GemmMicrokernelTester()
20685 .mr(3)
20686 .nr(4)
20687 .kr(2)
20688 .sr(1)
20689 .m(3)
20690 .n(n)
20691 .k(8)
20692 .iterations(1)
20693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20694 }
20695 }
20696
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8)20697 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
20698 TEST_REQUIRES_X86_XOP;
20699 for (size_t k = 1; k < 8; k++) {
20700 GemmMicrokernelTester()
20701 .mr(3)
20702 .nr(4)
20703 .kr(2)
20704 .sr(1)
20705 .m(3)
20706 .n(4)
20707 .k(k)
20708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20709 }
20710 }
20711
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8_subtile)20712 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
20713 TEST_REQUIRES_X86_XOP;
20714 for (size_t k = 1; k < 8; k++) {
20715 for (uint32_t n = 1; n <= 4; n++) {
20716 for (uint32_t m = 1; m <= 3; m++) {
20717 GemmMicrokernelTester()
20718 .mr(3)
20719 .nr(4)
20720 .kr(2)
20721 .sr(1)
20722 .m(m)
20723 .n(n)
20724 .k(k)
20725 .iterations(1)
20726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20727 }
20728 }
20729 }
20730 }
20731
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8)20732 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
20733 TEST_REQUIRES_X86_XOP;
20734 for (size_t k = 9; k < 16; k++) {
20735 GemmMicrokernelTester()
20736 .mr(3)
20737 .nr(4)
20738 .kr(2)
20739 .sr(1)
20740 .m(3)
20741 .n(4)
20742 .k(k)
20743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20744 }
20745 }
20746
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8_subtile)20747 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
20748 TEST_REQUIRES_X86_XOP;
20749 for (size_t k = 9; k < 16; k++) {
20750 for (uint32_t n = 1; n <= 4; n++) {
20751 for (uint32_t m = 1; m <= 3; m++) {
20752 GemmMicrokernelTester()
20753 .mr(3)
20754 .nr(4)
20755 .kr(2)
20756 .sr(1)
20757 .m(m)
20758 .n(n)
20759 .k(k)
20760 .iterations(1)
20761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20762 }
20763 }
20764 }
20765 }
20766
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8)20767 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
20768 TEST_REQUIRES_X86_XOP;
20769 for (size_t k = 16; k <= 80; k += 8) {
20770 GemmMicrokernelTester()
20771 .mr(3)
20772 .nr(4)
20773 .kr(2)
20774 .sr(1)
20775 .m(3)
20776 .n(4)
20777 .k(k)
20778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20779 }
20780 }
20781
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8_subtile)20782 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
20783 TEST_REQUIRES_X86_XOP;
20784 for (size_t k = 16; k <= 80; k += 8) {
20785 for (uint32_t n = 1; n <= 4; n++) {
20786 for (uint32_t m = 1; m <= 3; m++) {
20787 GemmMicrokernelTester()
20788 .mr(3)
20789 .nr(4)
20790 .kr(2)
20791 .sr(1)
20792 .m(m)
20793 .n(n)
20794 .k(k)
20795 .iterations(1)
20796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20797 }
20798 }
20799 }
20800 }
20801
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4)20802 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
20803 TEST_REQUIRES_X86_XOP;
20804 for (uint32_t n = 5; n < 8; n++) {
20805 for (size_t k = 1; k <= 40; k += 9) {
20806 GemmMicrokernelTester()
20807 .mr(3)
20808 .nr(4)
20809 .kr(2)
20810 .sr(1)
20811 .m(3)
20812 .n(n)
20813 .k(k)
20814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20815 }
20816 }
20817 }
20818
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_strided_cn)20819 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
20820 TEST_REQUIRES_X86_XOP;
20821 for (uint32_t n = 5; n < 8; n++) {
20822 for (size_t k = 1; k <= 40; k += 9) {
20823 GemmMicrokernelTester()
20824 .mr(3)
20825 .nr(4)
20826 .kr(2)
20827 .sr(1)
20828 .m(3)
20829 .n(n)
20830 .k(k)
20831 .cn_stride(7)
20832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20833 }
20834 }
20835 }
20836
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_subtile)20837 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
20838 TEST_REQUIRES_X86_XOP;
20839 for (uint32_t n = 5; n < 8; n++) {
20840 for (size_t k = 1; k <= 40; k += 9) {
20841 for (uint32_t m = 1; m <= 3; m++) {
20842 GemmMicrokernelTester()
20843 .mr(3)
20844 .nr(4)
20845 .kr(2)
20846 .sr(1)
20847 .m(m)
20848 .n(n)
20849 .k(k)
20850 .iterations(1)
20851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20852 }
20853 }
20854 }
20855 }
20856
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4)20857 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
20858 TEST_REQUIRES_X86_XOP;
20859 for (uint32_t n = 8; n <= 12; n += 4) {
20860 for (size_t k = 1; k <= 40; k += 9) {
20861 GemmMicrokernelTester()
20862 .mr(3)
20863 .nr(4)
20864 .kr(2)
20865 .sr(1)
20866 .m(3)
20867 .n(n)
20868 .k(k)
20869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20870 }
20871 }
20872 }
20873
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_strided_cn)20874 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
20875 TEST_REQUIRES_X86_XOP;
20876 for (uint32_t n = 8; n <= 12; n += 4) {
20877 for (size_t k = 1; k <= 40; k += 9) {
20878 GemmMicrokernelTester()
20879 .mr(3)
20880 .nr(4)
20881 .kr(2)
20882 .sr(1)
20883 .m(3)
20884 .n(n)
20885 .k(k)
20886 .cn_stride(7)
20887 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20888 }
20889 }
20890 }
20891
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_subtile)20892 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
20893 TEST_REQUIRES_X86_XOP;
20894 for (uint32_t n = 8; n <= 12; n += 4) {
20895 for (size_t k = 1; k <= 40; k += 9) {
20896 for (uint32_t m = 1; m <= 3; m++) {
20897 GemmMicrokernelTester()
20898 .mr(3)
20899 .nr(4)
20900 .kr(2)
20901 .sr(1)
20902 .m(m)
20903 .n(n)
20904 .k(k)
20905 .iterations(1)
20906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20907 }
20908 }
20909 }
20910 }
20911
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel)20912 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel) {
20913 TEST_REQUIRES_X86_XOP;
20914 for (size_t k = 1; k <= 40; k += 9) {
20915 GemmMicrokernelTester()
20916 .mr(3)
20917 .nr(4)
20918 .kr(2)
20919 .sr(1)
20920 .m(3)
20921 .n(4)
20922 .k(k)
20923 .ks(3)
20924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20925 }
20926 }
20927
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel_subtile)20928 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel_subtile) {
20929 TEST_REQUIRES_X86_XOP;
20930 for (size_t k = 1; k <= 40; k += 9) {
20931 for (uint32_t n = 1; n <= 4; n++) {
20932 for (uint32_t m = 1; m <= 3; m++) {
20933 GemmMicrokernelTester()
20934 .mr(3)
20935 .nr(4)
20936 .kr(2)
20937 .sr(1)
20938 .m(m)
20939 .n(n)
20940 .k(k)
20941 .ks(3)
20942 .iterations(1)
20943 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20944 }
20945 }
20946 }
20947 }
20948
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_small_kernel)20949 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
20950 TEST_REQUIRES_X86_XOP;
20951 for (uint32_t n = 5; n < 8; n++) {
20952 for (size_t k = 1; k <= 40; k += 9) {
20953 GemmMicrokernelTester()
20954 .mr(3)
20955 .nr(4)
20956 .kr(2)
20957 .sr(1)
20958 .m(3)
20959 .n(n)
20960 .k(k)
20961 .ks(3)
20962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20963 }
20964 }
20965 }
20966
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_small_kernel)20967 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_small_kernel) {
20968 TEST_REQUIRES_X86_XOP;
20969 for (uint32_t n = 8; n <= 12; n += 4) {
20970 for (size_t k = 1; k <= 40; k += 9) {
20971 GemmMicrokernelTester()
20972 .mr(3)
20973 .nr(4)
20974 .kr(2)
20975 .sr(1)
20976 .m(3)
20977 .n(n)
20978 .k(k)
20979 .ks(3)
20980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20981 }
20982 }
20983 }
20984
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm_subtile)20985 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
20986 TEST_REQUIRES_X86_XOP;
20987 for (size_t k = 1; k <= 40; k += 9) {
20988 for (uint32_t n = 1; n <= 4; n++) {
20989 for (uint32_t m = 1; m <= 3; m++) {
20990 GemmMicrokernelTester()
20991 .mr(3)
20992 .nr(4)
20993 .kr(2)
20994 .sr(1)
20995 .m(m)
20996 .n(n)
20997 .k(k)
20998 .cm_stride(7)
20999 .iterations(1)
21000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21001 }
21002 }
21003 }
21004 }
21005
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,a_offset)21006 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, a_offset) {
21007 TEST_REQUIRES_X86_XOP;
21008 for (size_t k = 1; k <= 40; k += 9) {
21009 GemmMicrokernelTester()
21010 .mr(3)
21011 .nr(4)
21012 .kr(2)
21013 .sr(1)
21014 .m(3)
21015 .n(4)
21016 .k(k)
21017 .ks(3)
21018 .a_offset(127)
21019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21020 }
21021 }
21022
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,zero)21023 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, zero) {
21024 TEST_REQUIRES_X86_XOP;
21025 for (size_t k = 1; k <= 40; k += 9) {
21026 for (uint32_t mz = 0; mz < 3; mz++) {
21027 GemmMicrokernelTester()
21028 .mr(3)
21029 .nr(4)
21030 .kr(2)
21031 .sr(1)
21032 .m(3)
21033 .n(4)
21034 .k(k)
21035 .ks(3)
21036 .a_offset(127)
21037 .zero_index(mz)
21038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21039 }
21040 }
21041 }
21042
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmin)21043 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
21044 TEST_REQUIRES_X86_XOP;
21045 GemmMicrokernelTester()
21046 .mr(3)
21047 .nr(4)
21048 .kr(2)
21049 .sr(1)
21050 .m(3)
21051 .n(4)
21052 .k(8)
21053 .qmin(128)
21054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21055 }
21056
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmax)21057 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
21058 TEST_REQUIRES_X86_XOP;
21059 GemmMicrokernelTester()
21060 .mr(3)
21061 .nr(4)
21062 .kr(2)
21063 .sr(1)
21064 .m(3)
21065 .n(4)
21066 .k(8)
21067 .qmax(128)
21068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21069 }
21070
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm)21071 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
21072 TEST_REQUIRES_X86_XOP;
21073 GemmMicrokernelTester()
21074 .mr(3)
21075 .nr(4)
21076 .kr(2)
21077 .sr(1)
21078 .m(3)
21079 .n(4)
21080 .k(8)
21081 .cm_stride(7)
21082 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21083 }
21084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21085
21086
21087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8)21088 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
21089 TEST_REQUIRES_X86_AVX;
21090 GemmMicrokernelTester()
21091 .mr(4)
21092 .nr(4)
21093 .kr(2)
21094 .sr(1)
21095 .m(4)
21096 .n(4)
21097 .k(8)
21098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21099 }
21100
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cn)21101 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
21102 TEST_REQUIRES_X86_AVX;
21103 GemmMicrokernelTester()
21104 .mr(4)
21105 .nr(4)
21106 .kr(2)
21107 .sr(1)
21108 .m(4)
21109 .n(4)
21110 .k(8)
21111 .cn_stride(7)
21112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21113 }
21114
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile)21115 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
21116 TEST_REQUIRES_X86_AVX;
21117 for (uint32_t n = 1; n <= 4; n++) {
21118 for (uint32_t m = 1; m <= 4; m++) {
21119 GemmMicrokernelTester()
21120 .mr(4)
21121 .nr(4)
21122 .kr(2)
21123 .sr(1)
21124 .m(m)
21125 .n(n)
21126 .k(8)
21127 .iterations(1)
21128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21129 }
21130 }
21131 }
21132
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_m)21133 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
21134 TEST_REQUIRES_X86_AVX;
21135 for (uint32_t m = 1; m <= 4; m++) {
21136 GemmMicrokernelTester()
21137 .mr(4)
21138 .nr(4)
21139 .kr(2)
21140 .sr(1)
21141 .m(m)
21142 .n(4)
21143 .k(8)
21144 .iterations(1)
21145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21146 }
21147 }
21148
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_n)21149 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
21150 TEST_REQUIRES_X86_AVX;
21151 for (uint32_t n = 1; n <= 4; n++) {
21152 GemmMicrokernelTester()
21153 .mr(4)
21154 .nr(4)
21155 .kr(2)
21156 .sr(1)
21157 .m(4)
21158 .n(n)
21159 .k(8)
21160 .iterations(1)
21161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21162 }
21163 }
21164
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8)21165 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
21166 TEST_REQUIRES_X86_AVX;
21167 for (size_t k = 1; k < 8; k++) {
21168 GemmMicrokernelTester()
21169 .mr(4)
21170 .nr(4)
21171 .kr(2)
21172 .sr(1)
21173 .m(4)
21174 .n(4)
21175 .k(k)
21176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21177 }
21178 }
21179
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8_subtile)21180 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
21181 TEST_REQUIRES_X86_AVX;
21182 for (size_t k = 1; k < 8; k++) {
21183 for (uint32_t n = 1; n <= 4; n++) {
21184 for (uint32_t m = 1; m <= 4; m++) {
21185 GemmMicrokernelTester()
21186 .mr(4)
21187 .nr(4)
21188 .kr(2)
21189 .sr(1)
21190 .m(m)
21191 .n(n)
21192 .k(k)
21193 .iterations(1)
21194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21195 }
21196 }
21197 }
21198 }
21199
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8)21200 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
21201 TEST_REQUIRES_X86_AVX;
21202 for (size_t k = 9; k < 16; k++) {
21203 GemmMicrokernelTester()
21204 .mr(4)
21205 .nr(4)
21206 .kr(2)
21207 .sr(1)
21208 .m(4)
21209 .n(4)
21210 .k(k)
21211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21212 }
21213 }
21214
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8_subtile)21215 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
21216 TEST_REQUIRES_X86_AVX;
21217 for (size_t k = 9; k < 16; k++) {
21218 for (uint32_t n = 1; n <= 4; n++) {
21219 for (uint32_t m = 1; m <= 4; m++) {
21220 GemmMicrokernelTester()
21221 .mr(4)
21222 .nr(4)
21223 .kr(2)
21224 .sr(1)
21225 .m(m)
21226 .n(n)
21227 .k(k)
21228 .iterations(1)
21229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21230 }
21231 }
21232 }
21233 }
21234
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8)21235 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
21236 TEST_REQUIRES_X86_AVX;
21237 for (size_t k = 16; k <= 80; k += 8) {
21238 GemmMicrokernelTester()
21239 .mr(4)
21240 .nr(4)
21241 .kr(2)
21242 .sr(1)
21243 .m(4)
21244 .n(4)
21245 .k(k)
21246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21247 }
21248 }
21249
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8_subtile)21250 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
21251 TEST_REQUIRES_X86_AVX;
21252 for (size_t k = 16; k <= 80; k += 8) {
21253 for (uint32_t n = 1; n <= 4; n++) {
21254 for (uint32_t m = 1; m <= 4; m++) {
21255 GemmMicrokernelTester()
21256 .mr(4)
21257 .nr(4)
21258 .kr(2)
21259 .sr(1)
21260 .m(m)
21261 .n(n)
21262 .k(k)
21263 .iterations(1)
21264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21265 }
21266 }
21267 }
21268 }
21269
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4)21270 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
21271 TEST_REQUIRES_X86_AVX;
21272 for (uint32_t n = 5; n < 8; n++) {
21273 for (size_t k = 1; k <= 40; k += 9) {
21274 GemmMicrokernelTester()
21275 .mr(4)
21276 .nr(4)
21277 .kr(2)
21278 .sr(1)
21279 .m(4)
21280 .n(n)
21281 .k(k)
21282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21283 }
21284 }
21285 }
21286
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_strided_cn)21287 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
21288 TEST_REQUIRES_X86_AVX;
21289 for (uint32_t n = 5; n < 8; n++) {
21290 for (size_t k = 1; k <= 40; k += 9) {
21291 GemmMicrokernelTester()
21292 .mr(4)
21293 .nr(4)
21294 .kr(2)
21295 .sr(1)
21296 .m(4)
21297 .n(n)
21298 .k(k)
21299 .cn_stride(7)
21300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21301 }
21302 }
21303 }
21304
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_subtile)21305 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
21306 TEST_REQUIRES_X86_AVX;
21307 for (uint32_t n = 5; n < 8; n++) {
21308 for (size_t k = 1; k <= 40; k += 9) {
21309 for (uint32_t m = 1; m <= 4; m++) {
21310 GemmMicrokernelTester()
21311 .mr(4)
21312 .nr(4)
21313 .kr(2)
21314 .sr(1)
21315 .m(m)
21316 .n(n)
21317 .k(k)
21318 .iterations(1)
21319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21320 }
21321 }
21322 }
21323 }
21324
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4)21325 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
21326 TEST_REQUIRES_X86_AVX;
21327 for (uint32_t n = 8; n <= 12; n += 4) {
21328 for (size_t k = 1; k <= 40; k += 9) {
21329 GemmMicrokernelTester()
21330 .mr(4)
21331 .nr(4)
21332 .kr(2)
21333 .sr(1)
21334 .m(4)
21335 .n(n)
21336 .k(k)
21337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21338 }
21339 }
21340 }
21341
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_strided_cn)21342 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
21343 TEST_REQUIRES_X86_AVX;
21344 for (uint32_t n = 8; n <= 12; n += 4) {
21345 for (size_t k = 1; k <= 40; k += 9) {
21346 GemmMicrokernelTester()
21347 .mr(4)
21348 .nr(4)
21349 .kr(2)
21350 .sr(1)
21351 .m(4)
21352 .n(n)
21353 .k(k)
21354 .cn_stride(7)
21355 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21356 }
21357 }
21358 }
21359
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_subtile)21360 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
21361 TEST_REQUIRES_X86_AVX;
21362 for (uint32_t n = 8; n <= 12; n += 4) {
21363 for (size_t k = 1; k <= 40; k += 9) {
21364 for (uint32_t m = 1; m <= 4; m++) {
21365 GemmMicrokernelTester()
21366 .mr(4)
21367 .nr(4)
21368 .kr(2)
21369 .sr(1)
21370 .m(m)
21371 .n(n)
21372 .k(k)
21373 .iterations(1)
21374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21375 }
21376 }
21377 }
21378 }
21379
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel)21380 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel) {
21381 TEST_REQUIRES_X86_AVX;
21382 for (size_t k = 1; k <= 40; k += 9) {
21383 GemmMicrokernelTester()
21384 .mr(4)
21385 .nr(4)
21386 .kr(2)
21387 .sr(1)
21388 .m(4)
21389 .n(4)
21390 .k(k)
21391 .ks(3)
21392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21393 }
21394 }
21395
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel_subtile)21396 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel_subtile) {
21397 TEST_REQUIRES_X86_AVX;
21398 for (size_t k = 1; k <= 40; k += 9) {
21399 for (uint32_t n = 1; n <= 4; n++) {
21400 for (uint32_t m = 1; m <= 4; m++) {
21401 GemmMicrokernelTester()
21402 .mr(4)
21403 .nr(4)
21404 .kr(2)
21405 .sr(1)
21406 .m(m)
21407 .n(n)
21408 .k(k)
21409 .ks(3)
21410 .iterations(1)
21411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21412 }
21413 }
21414 }
21415 }
21416
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_small_kernel)21417 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
21418 TEST_REQUIRES_X86_AVX;
21419 for (uint32_t n = 5; n < 8; n++) {
21420 for (size_t k = 1; k <= 40; k += 9) {
21421 GemmMicrokernelTester()
21422 .mr(4)
21423 .nr(4)
21424 .kr(2)
21425 .sr(1)
21426 .m(4)
21427 .n(n)
21428 .k(k)
21429 .ks(3)
21430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21431 }
21432 }
21433 }
21434
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_small_kernel)21435 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_small_kernel) {
21436 TEST_REQUIRES_X86_AVX;
21437 for (uint32_t n = 8; n <= 12; n += 4) {
21438 for (size_t k = 1; k <= 40; k += 9) {
21439 GemmMicrokernelTester()
21440 .mr(4)
21441 .nr(4)
21442 .kr(2)
21443 .sr(1)
21444 .m(4)
21445 .n(n)
21446 .k(k)
21447 .ks(3)
21448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21449 }
21450 }
21451 }
21452
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm_subtile)21453 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
21454 TEST_REQUIRES_X86_AVX;
21455 for (size_t k = 1; k <= 40; k += 9) {
21456 for (uint32_t n = 1; n <= 4; n++) {
21457 for (uint32_t m = 1; m <= 4; m++) {
21458 GemmMicrokernelTester()
21459 .mr(4)
21460 .nr(4)
21461 .kr(2)
21462 .sr(1)
21463 .m(m)
21464 .n(n)
21465 .k(k)
21466 .cm_stride(7)
21467 .iterations(1)
21468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21469 }
21470 }
21471 }
21472 }
21473
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,a_offset)21474 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, a_offset) {
21475 TEST_REQUIRES_X86_AVX;
21476 for (size_t k = 1; k <= 40; k += 9) {
21477 GemmMicrokernelTester()
21478 .mr(4)
21479 .nr(4)
21480 .kr(2)
21481 .sr(1)
21482 .m(4)
21483 .n(4)
21484 .k(k)
21485 .ks(3)
21486 .a_offset(163)
21487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21488 }
21489 }
21490
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,zero)21491 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, zero) {
21492 TEST_REQUIRES_X86_AVX;
21493 for (size_t k = 1; k <= 40; k += 9) {
21494 for (uint32_t mz = 0; mz < 4; mz++) {
21495 GemmMicrokernelTester()
21496 .mr(4)
21497 .nr(4)
21498 .kr(2)
21499 .sr(1)
21500 .m(4)
21501 .n(4)
21502 .k(k)
21503 .ks(3)
21504 .a_offset(163)
21505 .zero_index(mz)
21506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21507 }
21508 }
21509 }
21510
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmin)21511 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
21512 TEST_REQUIRES_X86_AVX;
21513 GemmMicrokernelTester()
21514 .mr(4)
21515 .nr(4)
21516 .kr(2)
21517 .sr(1)
21518 .m(4)
21519 .n(4)
21520 .k(8)
21521 .qmin(128)
21522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21523 }
21524
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmax)21525 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
21526 TEST_REQUIRES_X86_AVX;
21527 GemmMicrokernelTester()
21528 .mr(4)
21529 .nr(4)
21530 .kr(2)
21531 .sr(1)
21532 .m(4)
21533 .n(4)
21534 .k(8)
21535 .qmax(128)
21536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21537 }
21538
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm)21539 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
21540 TEST_REQUIRES_X86_AVX;
21541 GemmMicrokernelTester()
21542 .mr(4)
21543 .nr(4)
21544 .kr(2)
21545 .sr(1)
21546 .m(4)
21547 .n(4)
21548 .k(8)
21549 .cm_stride(7)
21550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21551 }
21552 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21553
21554
21555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8)21556 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8) {
21557 TEST_REQUIRES_X86_SSE41;
21558 GemmMicrokernelTester()
21559 .mr(1)
21560 .nr(4)
21561 .kr(2)
21562 .sr(4)
21563 .m(1)
21564 .n(4)
21565 .k(8)
21566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21567 }
21568
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cn)21569 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cn) {
21570 TEST_REQUIRES_X86_SSE41;
21571 GemmMicrokernelTester()
21572 .mr(1)
21573 .nr(4)
21574 .kr(2)
21575 .sr(4)
21576 .m(1)
21577 .n(4)
21578 .k(8)
21579 .cn_stride(7)
21580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21581 }
21582
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile)21583 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile) {
21584 TEST_REQUIRES_X86_SSE41;
21585 for (uint32_t n = 1; n <= 4; n++) {
21586 for (uint32_t m = 1; m <= 1; m++) {
21587 GemmMicrokernelTester()
21588 .mr(1)
21589 .nr(4)
21590 .kr(2)
21591 .sr(4)
21592 .m(m)
21593 .n(n)
21594 .k(8)
21595 .iterations(1)
21596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21597 }
21598 }
21599 }
21600
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_m)21601 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
21602 TEST_REQUIRES_X86_SSE41;
21603 for (uint32_t m = 1; m <= 1; m++) {
21604 GemmMicrokernelTester()
21605 .mr(1)
21606 .nr(4)
21607 .kr(2)
21608 .sr(4)
21609 .m(m)
21610 .n(4)
21611 .k(8)
21612 .iterations(1)
21613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21614 }
21615 }
21616
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_n)21617 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
21618 TEST_REQUIRES_X86_SSE41;
21619 for (uint32_t n = 1; n <= 4; n++) {
21620 GemmMicrokernelTester()
21621 .mr(1)
21622 .nr(4)
21623 .kr(2)
21624 .sr(4)
21625 .m(1)
21626 .n(n)
21627 .k(8)
21628 .iterations(1)
21629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21630 }
21631 }
21632
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8)21633 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8) {
21634 TEST_REQUIRES_X86_SSE41;
21635 for (size_t k = 1; k < 8; k++) {
21636 GemmMicrokernelTester()
21637 .mr(1)
21638 .nr(4)
21639 .kr(2)
21640 .sr(4)
21641 .m(1)
21642 .n(4)
21643 .k(k)
21644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21645 }
21646 }
21647
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8_subtile)21648 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8_subtile) {
21649 TEST_REQUIRES_X86_SSE41;
21650 for (size_t k = 1; k < 8; k++) {
21651 for (uint32_t n = 1; n <= 4; n++) {
21652 for (uint32_t m = 1; m <= 1; m++) {
21653 GemmMicrokernelTester()
21654 .mr(1)
21655 .nr(4)
21656 .kr(2)
21657 .sr(4)
21658 .m(m)
21659 .n(n)
21660 .k(k)
21661 .iterations(1)
21662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21663 }
21664 }
21665 }
21666 }
21667
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8)21668 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8) {
21669 TEST_REQUIRES_X86_SSE41;
21670 for (size_t k = 9; k < 16; k++) {
21671 GemmMicrokernelTester()
21672 .mr(1)
21673 .nr(4)
21674 .kr(2)
21675 .sr(4)
21676 .m(1)
21677 .n(4)
21678 .k(k)
21679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21680 }
21681 }
21682
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8_subtile)21683 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8_subtile) {
21684 TEST_REQUIRES_X86_SSE41;
21685 for (size_t k = 9; k < 16; k++) {
21686 for (uint32_t n = 1; n <= 4; n++) {
21687 for (uint32_t m = 1; m <= 1; m++) {
21688 GemmMicrokernelTester()
21689 .mr(1)
21690 .nr(4)
21691 .kr(2)
21692 .sr(4)
21693 .m(m)
21694 .n(n)
21695 .k(k)
21696 .iterations(1)
21697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21698 }
21699 }
21700 }
21701 }
21702
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8)21703 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8) {
21704 TEST_REQUIRES_X86_SSE41;
21705 for (size_t k = 16; k <= 80; k += 8) {
21706 GemmMicrokernelTester()
21707 .mr(1)
21708 .nr(4)
21709 .kr(2)
21710 .sr(4)
21711 .m(1)
21712 .n(4)
21713 .k(k)
21714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21715 }
21716 }
21717
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8_subtile)21718 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8_subtile) {
21719 TEST_REQUIRES_X86_SSE41;
21720 for (size_t k = 16; k <= 80; k += 8) {
21721 for (uint32_t n = 1; n <= 4; n++) {
21722 for (uint32_t m = 1; m <= 1; m++) {
21723 GemmMicrokernelTester()
21724 .mr(1)
21725 .nr(4)
21726 .kr(2)
21727 .sr(4)
21728 .m(m)
21729 .n(n)
21730 .k(k)
21731 .iterations(1)
21732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21733 }
21734 }
21735 }
21736 }
21737
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4)21738 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4) {
21739 TEST_REQUIRES_X86_SSE41;
21740 for (uint32_t n = 5; n < 8; n++) {
21741 for (size_t k = 1; k <= 40; k += 9) {
21742 GemmMicrokernelTester()
21743 .mr(1)
21744 .nr(4)
21745 .kr(2)
21746 .sr(4)
21747 .m(1)
21748 .n(n)
21749 .k(k)
21750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21751 }
21752 }
21753 }
21754
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_strided_cn)21755 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
21756 TEST_REQUIRES_X86_SSE41;
21757 for (uint32_t n = 5; n < 8; n++) {
21758 for (size_t k = 1; k <= 40; k += 9) {
21759 GemmMicrokernelTester()
21760 .mr(1)
21761 .nr(4)
21762 .kr(2)
21763 .sr(4)
21764 .m(1)
21765 .n(n)
21766 .k(k)
21767 .cn_stride(7)
21768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21769 }
21770 }
21771 }
21772
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_subtile)21773 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_subtile) {
21774 TEST_REQUIRES_X86_SSE41;
21775 for (uint32_t n = 5; n < 8; n++) {
21776 for (size_t k = 1; k <= 40; k += 9) {
21777 for (uint32_t m = 1; m <= 1; m++) {
21778 GemmMicrokernelTester()
21779 .mr(1)
21780 .nr(4)
21781 .kr(2)
21782 .sr(4)
21783 .m(m)
21784 .n(n)
21785 .k(k)
21786 .iterations(1)
21787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21788 }
21789 }
21790 }
21791 }
21792
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4)21793 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4) {
21794 TEST_REQUIRES_X86_SSE41;
21795 for (uint32_t n = 8; n <= 12; n += 4) {
21796 for (size_t k = 1; k <= 40; k += 9) {
21797 GemmMicrokernelTester()
21798 .mr(1)
21799 .nr(4)
21800 .kr(2)
21801 .sr(4)
21802 .m(1)
21803 .n(n)
21804 .k(k)
21805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21806 }
21807 }
21808 }
21809
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_strided_cn)21810 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
21811 TEST_REQUIRES_X86_SSE41;
21812 for (uint32_t n = 8; n <= 12; n += 4) {
21813 for (size_t k = 1; k <= 40; k += 9) {
21814 GemmMicrokernelTester()
21815 .mr(1)
21816 .nr(4)
21817 .kr(2)
21818 .sr(4)
21819 .m(1)
21820 .n(n)
21821 .k(k)
21822 .cn_stride(7)
21823 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21824 }
21825 }
21826 }
21827
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_subtile)21828 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_subtile) {
21829 TEST_REQUIRES_X86_SSE41;
21830 for (uint32_t n = 8; n <= 12; n += 4) {
21831 for (size_t k = 1; k <= 40; k += 9) {
21832 for (uint32_t m = 1; m <= 1; m++) {
21833 GemmMicrokernelTester()
21834 .mr(1)
21835 .nr(4)
21836 .kr(2)
21837 .sr(4)
21838 .m(m)
21839 .n(n)
21840 .k(k)
21841 .iterations(1)
21842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21843 }
21844 }
21845 }
21846 }
21847
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel)21848 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel) {
21849 TEST_REQUIRES_X86_SSE41;
21850 for (size_t k = 1; k <= 40; k += 9) {
21851 GemmMicrokernelTester()
21852 .mr(1)
21853 .nr(4)
21854 .kr(2)
21855 .sr(4)
21856 .m(1)
21857 .n(4)
21858 .k(k)
21859 .ks(3)
21860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21861 }
21862 }
21863
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel_subtile)21864 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel_subtile) {
21865 TEST_REQUIRES_X86_SSE41;
21866 for (size_t k = 1; k <= 40; k += 9) {
21867 for (uint32_t n = 1; n <= 4; n++) {
21868 for (uint32_t m = 1; m <= 1; m++) {
21869 GemmMicrokernelTester()
21870 .mr(1)
21871 .nr(4)
21872 .kr(2)
21873 .sr(4)
21874 .m(m)
21875 .n(n)
21876 .k(k)
21877 .ks(3)
21878 .iterations(1)
21879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21880 }
21881 }
21882 }
21883 }
21884
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_small_kernel)21885 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
21886 TEST_REQUIRES_X86_SSE41;
21887 for (uint32_t n = 5; n < 8; n++) {
21888 for (size_t k = 1; k <= 40; k += 9) {
21889 GemmMicrokernelTester()
21890 .mr(1)
21891 .nr(4)
21892 .kr(2)
21893 .sr(4)
21894 .m(1)
21895 .n(n)
21896 .k(k)
21897 .ks(3)
21898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21899 }
21900 }
21901 }
21902
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_small_kernel)21903 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
21904 TEST_REQUIRES_X86_SSE41;
21905 for (uint32_t n = 8; n <= 12; n += 4) {
21906 for (size_t k = 1; k <= 40; k += 9) {
21907 GemmMicrokernelTester()
21908 .mr(1)
21909 .nr(4)
21910 .kr(2)
21911 .sr(4)
21912 .m(1)
21913 .n(n)
21914 .k(k)
21915 .ks(3)
21916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21917 }
21918 }
21919 }
21920
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm_subtile)21921 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm_subtile) {
21922 TEST_REQUIRES_X86_SSE41;
21923 for (size_t k = 1; k <= 40; k += 9) {
21924 for (uint32_t n = 1; n <= 4; n++) {
21925 for (uint32_t m = 1; m <= 1; m++) {
21926 GemmMicrokernelTester()
21927 .mr(1)
21928 .nr(4)
21929 .kr(2)
21930 .sr(4)
21931 .m(m)
21932 .n(n)
21933 .k(k)
21934 .cm_stride(7)
21935 .iterations(1)
21936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21937 }
21938 }
21939 }
21940 }
21941
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,a_offset)21942 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, a_offset) {
21943 TEST_REQUIRES_X86_SSE41;
21944 for (size_t k = 1; k <= 40; k += 9) {
21945 GemmMicrokernelTester()
21946 .mr(1)
21947 .nr(4)
21948 .kr(2)
21949 .sr(4)
21950 .m(1)
21951 .n(4)
21952 .k(k)
21953 .ks(3)
21954 .a_offset(43)
21955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21956 }
21957 }
21958
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,zero)21959 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, zero) {
21960 TEST_REQUIRES_X86_SSE41;
21961 for (size_t k = 1; k <= 40; k += 9) {
21962 for (uint32_t mz = 0; mz < 1; mz++) {
21963 GemmMicrokernelTester()
21964 .mr(1)
21965 .nr(4)
21966 .kr(2)
21967 .sr(4)
21968 .m(1)
21969 .n(4)
21970 .k(k)
21971 .ks(3)
21972 .a_offset(43)
21973 .zero_index(mz)
21974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21975 }
21976 }
21977 }
21978
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmin)21979 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmin) {
21980 TEST_REQUIRES_X86_SSE41;
21981 GemmMicrokernelTester()
21982 .mr(1)
21983 .nr(4)
21984 .kr(2)
21985 .sr(4)
21986 .m(1)
21987 .n(4)
21988 .k(8)
21989 .qmin(128)
21990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21991 }
21992
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmax)21993 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmax) {
21994 TEST_REQUIRES_X86_SSE41;
21995 GemmMicrokernelTester()
21996 .mr(1)
21997 .nr(4)
21998 .kr(2)
21999 .sr(4)
22000 .m(1)
22001 .n(4)
22002 .k(8)
22003 .qmax(128)
22004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22005 }
22006
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm)22007 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm) {
22008 TEST_REQUIRES_X86_SSE41;
22009 GemmMicrokernelTester()
22010 .mr(1)
22011 .nr(4)
22012 .kr(2)
22013 .sr(4)
22014 .m(1)
22015 .n(4)
22016 .k(8)
22017 .cm_stride(7)
22018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22019 }
22020 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22021
22022
22023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8)22024 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8) {
22025 TEST_REQUIRES_X86_SSE2;
22026 GemmMicrokernelTester()
22027 .mr(3)
22028 .nr(4)
22029 .kr(2)
22030 .sr(4)
22031 .m(3)
22032 .n(4)
22033 .k(8)
22034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22035 }
22036
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cn)22037 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cn) {
22038 TEST_REQUIRES_X86_SSE2;
22039 GemmMicrokernelTester()
22040 .mr(3)
22041 .nr(4)
22042 .kr(2)
22043 .sr(4)
22044 .m(3)
22045 .n(4)
22046 .k(8)
22047 .cn_stride(7)
22048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22049 }
22050
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile)22051 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile) {
22052 TEST_REQUIRES_X86_SSE2;
22053 for (uint32_t n = 1; n <= 4; n++) {
22054 for (uint32_t m = 1; m <= 3; m++) {
22055 GemmMicrokernelTester()
22056 .mr(3)
22057 .nr(4)
22058 .kr(2)
22059 .sr(4)
22060 .m(m)
22061 .n(n)
22062 .k(8)
22063 .iterations(1)
22064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22065 }
22066 }
22067 }
22068
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_m)22069 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
22070 TEST_REQUIRES_X86_SSE2;
22071 for (uint32_t m = 1; m <= 3; m++) {
22072 GemmMicrokernelTester()
22073 .mr(3)
22074 .nr(4)
22075 .kr(2)
22076 .sr(4)
22077 .m(m)
22078 .n(4)
22079 .k(8)
22080 .iterations(1)
22081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22082 }
22083 }
22084
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_n)22085 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
22086 TEST_REQUIRES_X86_SSE2;
22087 for (uint32_t n = 1; n <= 4; n++) {
22088 GemmMicrokernelTester()
22089 .mr(3)
22090 .nr(4)
22091 .kr(2)
22092 .sr(4)
22093 .m(3)
22094 .n(n)
22095 .k(8)
22096 .iterations(1)
22097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22098 }
22099 }
22100
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8)22101 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8) {
22102 TEST_REQUIRES_X86_SSE2;
22103 for (size_t k = 1; k < 8; k++) {
22104 GemmMicrokernelTester()
22105 .mr(3)
22106 .nr(4)
22107 .kr(2)
22108 .sr(4)
22109 .m(3)
22110 .n(4)
22111 .k(k)
22112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22113 }
22114 }
22115
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_subtile)22116 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_subtile) {
22117 TEST_REQUIRES_X86_SSE2;
22118 for (size_t k = 1; k < 8; k++) {
22119 for (uint32_t n = 1; n <= 4; n++) {
22120 for (uint32_t m = 1; m <= 3; m++) {
22121 GemmMicrokernelTester()
22122 .mr(3)
22123 .nr(4)
22124 .kr(2)
22125 .sr(4)
22126 .m(m)
22127 .n(n)
22128 .k(k)
22129 .iterations(1)
22130 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22131 }
22132 }
22133 }
22134 }
22135
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8)22136 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8) {
22137 TEST_REQUIRES_X86_SSE2;
22138 for (size_t k = 9; k < 16; k++) {
22139 GemmMicrokernelTester()
22140 .mr(3)
22141 .nr(4)
22142 .kr(2)
22143 .sr(4)
22144 .m(3)
22145 .n(4)
22146 .k(k)
22147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22148 }
22149 }
22150
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_subtile)22151 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_subtile) {
22152 TEST_REQUIRES_X86_SSE2;
22153 for (size_t k = 9; k < 16; k++) {
22154 for (uint32_t n = 1; n <= 4; n++) {
22155 for (uint32_t m = 1; m <= 3; m++) {
22156 GemmMicrokernelTester()
22157 .mr(3)
22158 .nr(4)
22159 .kr(2)
22160 .sr(4)
22161 .m(m)
22162 .n(n)
22163 .k(k)
22164 .iterations(1)
22165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22166 }
22167 }
22168 }
22169 }
22170
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8)22171 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8) {
22172 TEST_REQUIRES_X86_SSE2;
22173 for (size_t k = 16; k <= 80; k += 8) {
22174 GemmMicrokernelTester()
22175 .mr(3)
22176 .nr(4)
22177 .kr(2)
22178 .sr(4)
22179 .m(3)
22180 .n(4)
22181 .k(k)
22182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22183 }
22184 }
22185
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_subtile)22186 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_subtile) {
22187 TEST_REQUIRES_X86_SSE2;
22188 for (size_t k = 16; k <= 80; k += 8) {
22189 for (uint32_t n = 1; n <= 4; n++) {
22190 for (uint32_t m = 1; m <= 3; m++) {
22191 GemmMicrokernelTester()
22192 .mr(3)
22193 .nr(4)
22194 .kr(2)
22195 .sr(4)
22196 .m(m)
22197 .n(n)
22198 .k(k)
22199 .iterations(1)
22200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22201 }
22202 }
22203 }
22204 }
22205
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4)22206 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4) {
22207 TEST_REQUIRES_X86_SSE2;
22208 for (uint32_t n = 5; n < 8; n++) {
22209 for (size_t k = 1; k <= 40; k += 9) {
22210 GemmMicrokernelTester()
22211 .mr(3)
22212 .nr(4)
22213 .kr(2)
22214 .sr(4)
22215 .m(3)
22216 .n(n)
22217 .k(k)
22218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22219 }
22220 }
22221 }
22222
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_cn)22223 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
22224 TEST_REQUIRES_X86_SSE2;
22225 for (uint32_t n = 5; n < 8; n++) {
22226 for (size_t k = 1; k <= 40; k += 9) {
22227 GemmMicrokernelTester()
22228 .mr(3)
22229 .nr(4)
22230 .kr(2)
22231 .sr(4)
22232 .m(3)
22233 .n(n)
22234 .k(k)
22235 .cn_stride(7)
22236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22237 }
22238 }
22239 }
22240
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_subtile)22241 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_subtile) {
22242 TEST_REQUIRES_X86_SSE2;
22243 for (uint32_t n = 5; n < 8; n++) {
22244 for (size_t k = 1; k <= 40; k += 9) {
22245 for (uint32_t m = 1; m <= 3; m++) {
22246 GemmMicrokernelTester()
22247 .mr(3)
22248 .nr(4)
22249 .kr(2)
22250 .sr(4)
22251 .m(m)
22252 .n(n)
22253 .k(k)
22254 .iterations(1)
22255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22256 }
22257 }
22258 }
22259 }
22260
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4)22261 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4) {
22262 TEST_REQUIRES_X86_SSE2;
22263 for (uint32_t n = 8; n <= 12; n += 4) {
22264 for (size_t k = 1; k <= 40; k += 9) {
22265 GemmMicrokernelTester()
22266 .mr(3)
22267 .nr(4)
22268 .kr(2)
22269 .sr(4)
22270 .m(3)
22271 .n(n)
22272 .k(k)
22273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22274 }
22275 }
22276 }
22277
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_cn)22278 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
22279 TEST_REQUIRES_X86_SSE2;
22280 for (uint32_t n = 8; n <= 12; n += 4) {
22281 for (size_t k = 1; k <= 40; k += 9) {
22282 GemmMicrokernelTester()
22283 .mr(3)
22284 .nr(4)
22285 .kr(2)
22286 .sr(4)
22287 .m(3)
22288 .n(n)
22289 .k(k)
22290 .cn_stride(7)
22291 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22292 }
22293 }
22294 }
22295
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_subtile)22296 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_subtile) {
22297 TEST_REQUIRES_X86_SSE2;
22298 for (uint32_t n = 8; n <= 12; n += 4) {
22299 for (size_t k = 1; k <= 40; k += 9) {
22300 for (uint32_t m = 1; m <= 3; m++) {
22301 GemmMicrokernelTester()
22302 .mr(3)
22303 .nr(4)
22304 .kr(2)
22305 .sr(4)
22306 .m(m)
22307 .n(n)
22308 .k(k)
22309 .iterations(1)
22310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22311 }
22312 }
22313 }
22314 }
22315
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel)22316 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel) {
22317 TEST_REQUIRES_X86_SSE2;
22318 for (size_t k = 1; k <= 40; k += 9) {
22319 GemmMicrokernelTester()
22320 .mr(3)
22321 .nr(4)
22322 .kr(2)
22323 .sr(4)
22324 .m(3)
22325 .n(4)
22326 .k(k)
22327 .ks(3)
22328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22329 }
22330 }
22331
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel_subtile)22332 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel_subtile) {
22333 TEST_REQUIRES_X86_SSE2;
22334 for (size_t k = 1; k <= 40; k += 9) {
22335 for (uint32_t n = 1; n <= 4; n++) {
22336 for (uint32_t m = 1; m <= 3; m++) {
22337 GemmMicrokernelTester()
22338 .mr(3)
22339 .nr(4)
22340 .kr(2)
22341 .sr(4)
22342 .m(m)
22343 .n(n)
22344 .k(k)
22345 .ks(3)
22346 .iterations(1)
22347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22348 }
22349 }
22350 }
22351 }
22352
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_small_kernel)22353 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
22354 TEST_REQUIRES_X86_SSE2;
22355 for (uint32_t n = 5; n < 8; n++) {
22356 for (size_t k = 1; k <= 40; k += 9) {
22357 GemmMicrokernelTester()
22358 .mr(3)
22359 .nr(4)
22360 .kr(2)
22361 .sr(4)
22362 .m(3)
22363 .n(n)
22364 .k(k)
22365 .ks(3)
22366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22367 }
22368 }
22369 }
22370
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_small_kernel)22371 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
22372 TEST_REQUIRES_X86_SSE2;
22373 for (uint32_t n = 8; n <= 12; n += 4) {
22374 for (size_t k = 1; k <= 40; k += 9) {
22375 GemmMicrokernelTester()
22376 .mr(3)
22377 .nr(4)
22378 .kr(2)
22379 .sr(4)
22380 .m(3)
22381 .n(n)
22382 .k(k)
22383 .ks(3)
22384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22385 }
22386 }
22387 }
22388
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm_subtile)22389 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm_subtile) {
22390 TEST_REQUIRES_X86_SSE2;
22391 for (size_t k = 1; k <= 40; k += 9) {
22392 for (uint32_t n = 1; n <= 4; n++) {
22393 for (uint32_t m = 1; m <= 3; m++) {
22394 GemmMicrokernelTester()
22395 .mr(3)
22396 .nr(4)
22397 .kr(2)
22398 .sr(4)
22399 .m(m)
22400 .n(n)
22401 .k(k)
22402 .cm_stride(7)
22403 .iterations(1)
22404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22405 }
22406 }
22407 }
22408 }
22409
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,a_offset)22410 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, a_offset) {
22411 TEST_REQUIRES_X86_SSE2;
22412 for (size_t k = 1; k <= 40; k += 9) {
22413 GemmMicrokernelTester()
22414 .mr(3)
22415 .nr(4)
22416 .kr(2)
22417 .sr(4)
22418 .m(3)
22419 .n(4)
22420 .k(k)
22421 .ks(3)
22422 .a_offset(127)
22423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22424 }
22425 }
22426
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,zero)22427 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, zero) {
22428 TEST_REQUIRES_X86_SSE2;
22429 for (size_t k = 1; k <= 40; k += 9) {
22430 for (uint32_t mz = 0; mz < 3; mz++) {
22431 GemmMicrokernelTester()
22432 .mr(3)
22433 .nr(4)
22434 .kr(2)
22435 .sr(4)
22436 .m(3)
22437 .n(4)
22438 .k(k)
22439 .ks(3)
22440 .a_offset(127)
22441 .zero_index(mz)
22442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22443 }
22444 }
22445 }
22446
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmin)22447 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmin) {
22448 TEST_REQUIRES_X86_SSE2;
22449 GemmMicrokernelTester()
22450 .mr(3)
22451 .nr(4)
22452 .kr(2)
22453 .sr(4)
22454 .m(3)
22455 .n(4)
22456 .k(8)
22457 .qmin(128)
22458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22459 }
22460
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmax)22461 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmax) {
22462 TEST_REQUIRES_X86_SSE2;
22463 GemmMicrokernelTester()
22464 .mr(3)
22465 .nr(4)
22466 .kr(2)
22467 .sr(4)
22468 .m(3)
22469 .n(4)
22470 .k(8)
22471 .qmax(128)
22472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22473 }
22474
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm)22475 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm) {
22476 TEST_REQUIRES_X86_SSE2;
22477 GemmMicrokernelTester()
22478 .mr(3)
22479 .nr(4)
22480 .kr(2)
22481 .sr(4)
22482 .m(3)
22483 .n(4)
22484 .k(8)
22485 .cm_stride(7)
22486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22487 }
22488 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22489
22490
22491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8)22492 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8) {
22493 TEST_REQUIRES_X86_SSE2;
22494 GemmMicrokernelTester()
22495 .mr(4)
22496 .nr(4)
22497 .kr(2)
22498 .sr(4)
22499 .m(4)
22500 .n(4)
22501 .k(8)
22502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22503 }
22504
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cn)22505 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cn) {
22506 TEST_REQUIRES_X86_SSE2;
22507 GemmMicrokernelTester()
22508 .mr(4)
22509 .nr(4)
22510 .kr(2)
22511 .sr(4)
22512 .m(4)
22513 .n(4)
22514 .k(8)
22515 .cn_stride(7)
22516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22517 }
22518
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile)22519 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile) {
22520 TEST_REQUIRES_X86_SSE2;
22521 for (uint32_t n = 1; n <= 4; n++) {
22522 for (uint32_t m = 1; m <= 4; m++) {
22523 GemmMicrokernelTester()
22524 .mr(4)
22525 .nr(4)
22526 .kr(2)
22527 .sr(4)
22528 .m(m)
22529 .n(n)
22530 .k(8)
22531 .iterations(1)
22532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22533 }
22534 }
22535 }
22536
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_m)22537 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
22538 TEST_REQUIRES_X86_SSE2;
22539 for (uint32_t m = 1; m <= 4; m++) {
22540 GemmMicrokernelTester()
22541 .mr(4)
22542 .nr(4)
22543 .kr(2)
22544 .sr(4)
22545 .m(m)
22546 .n(4)
22547 .k(8)
22548 .iterations(1)
22549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22550 }
22551 }
22552
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_n)22553 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
22554 TEST_REQUIRES_X86_SSE2;
22555 for (uint32_t n = 1; n <= 4; n++) {
22556 GemmMicrokernelTester()
22557 .mr(4)
22558 .nr(4)
22559 .kr(2)
22560 .sr(4)
22561 .m(4)
22562 .n(n)
22563 .k(8)
22564 .iterations(1)
22565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22566 }
22567 }
22568
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8)22569 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8) {
22570 TEST_REQUIRES_X86_SSE2;
22571 for (size_t k = 1; k < 8; k++) {
22572 GemmMicrokernelTester()
22573 .mr(4)
22574 .nr(4)
22575 .kr(2)
22576 .sr(4)
22577 .m(4)
22578 .n(4)
22579 .k(k)
22580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22581 }
22582 }
22583
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_subtile)22584 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_subtile) {
22585 TEST_REQUIRES_X86_SSE2;
22586 for (size_t k = 1; k < 8; k++) {
22587 for (uint32_t n = 1; n <= 4; n++) {
22588 for (uint32_t m = 1; m <= 4; m++) {
22589 GemmMicrokernelTester()
22590 .mr(4)
22591 .nr(4)
22592 .kr(2)
22593 .sr(4)
22594 .m(m)
22595 .n(n)
22596 .k(k)
22597 .iterations(1)
22598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22599 }
22600 }
22601 }
22602 }
22603
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8)22604 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8) {
22605 TEST_REQUIRES_X86_SSE2;
22606 for (size_t k = 9; k < 16; k++) {
22607 GemmMicrokernelTester()
22608 .mr(4)
22609 .nr(4)
22610 .kr(2)
22611 .sr(4)
22612 .m(4)
22613 .n(4)
22614 .k(k)
22615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22616 }
22617 }
22618
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_subtile)22619 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_subtile) {
22620 TEST_REQUIRES_X86_SSE2;
22621 for (size_t k = 9; k < 16; k++) {
22622 for (uint32_t n = 1; n <= 4; n++) {
22623 for (uint32_t m = 1; m <= 4; m++) {
22624 GemmMicrokernelTester()
22625 .mr(4)
22626 .nr(4)
22627 .kr(2)
22628 .sr(4)
22629 .m(m)
22630 .n(n)
22631 .k(k)
22632 .iterations(1)
22633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22634 }
22635 }
22636 }
22637 }
22638
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8)22639 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8) {
22640 TEST_REQUIRES_X86_SSE2;
22641 for (size_t k = 16; k <= 80; k += 8) {
22642 GemmMicrokernelTester()
22643 .mr(4)
22644 .nr(4)
22645 .kr(2)
22646 .sr(4)
22647 .m(4)
22648 .n(4)
22649 .k(k)
22650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22651 }
22652 }
22653
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_subtile)22654 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_subtile) {
22655 TEST_REQUIRES_X86_SSE2;
22656 for (size_t k = 16; k <= 80; k += 8) {
22657 for (uint32_t n = 1; n <= 4; n++) {
22658 for (uint32_t m = 1; m <= 4; m++) {
22659 GemmMicrokernelTester()
22660 .mr(4)
22661 .nr(4)
22662 .kr(2)
22663 .sr(4)
22664 .m(m)
22665 .n(n)
22666 .k(k)
22667 .iterations(1)
22668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22669 }
22670 }
22671 }
22672 }
22673
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4)22674 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4) {
22675 TEST_REQUIRES_X86_SSE2;
22676 for (uint32_t n = 5; n < 8; n++) {
22677 for (size_t k = 1; k <= 40; k += 9) {
22678 GemmMicrokernelTester()
22679 .mr(4)
22680 .nr(4)
22681 .kr(2)
22682 .sr(4)
22683 .m(4)
22684 .n(n)
22685 .k(k)
22686 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22687 }
22688 }
22689 }
22690
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_cn)22691 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
22692 TEST_REQUIRES_X86_SSE2;
22693 for (uint32_t n = 5; n < 8; n++) {
22694 for (size_t k = 1; k <= 40; k += 9) {
22695 GemmMicrokernelTester()
22696 .mr(4)
22697 .nr(4)
22698 .kr(2)
22699 .sr(4)
22700 .m(4)
22701 .n(n)
22702 .k(k)
22703 .cn_stride(7)
22704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22705 }
22706 }
22707 }
22708
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_subtile)22709 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_subtile) {
22710 TEST_REQUIRES_X86_SSE2;
22711 for (uint32_t n = 5; n < 8; n++) {
22712 for (size_t k = 1; k <= 40; k += 9) {
22713 for (uint32_t m = 1; m <= 4; m++) {
22714 GemmMicrokernelTester()
22715 .mr(4)
22716 .nr(4)
22717 .kr(2)
22718 .sr(4)
22719 .m(m)
22720 .n(n)
22721 .k(k)
22722 .iterations(1)
22723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22724 }
22725 }
22726 }
22727 }
22728
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4)22729 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4) {
22730 TEST_REQUIRES_X86_SSE2;
22731 for (uint32_t n = 8; n <= 12; n += 4) {
22732 for (size_t k = 1; k <= 40; k += 9) {
22733 GemmMicrokernelTester()
22734 .mr(4)
22735 .nr(4)
22736 .kr(2)
22737 .sr(4)
22738 .m(4)
22739 .n(n)
22740 .k(k)
22741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22742 }
22743 }
22744 }
22745
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_cn)22746 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
22747 TEST_REQUIRES_X86_SSE2;
22748 for (uint32_t n = 8; n <= 12; n += 4) {
22749 for (size_t k = 1; k <= 40; k += 9) {
22750 GemmMicrokernelTester()
22751 .mr(4)
22752 .nr(4)
22753 .kr(2)
22754 .sr(4)
22755 .m(4)
22756 .n(n)
22757 .k(k)
22758 .cn_stride(7)
22759 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22760 }
22761 }
22762 }
22763
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_subtile)22764 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_subtile) {
22765 TEST_REQUIRES_X86_SSE2;
22766 for (uint32_t n = 8; n <= 12; n += 4) {
22767 for (size_t k = 1; k <= 40; k += 9) {
22768 for (uint32_t m = 1; m <= 4; m++) {
22769 GemmMicrokernelTester()
22770 .mr(4)
22771 .nr(4)
22772 .kr(2)
22773 .sr(4)
22774 .m(m)
22775 .n(n)
22776 .k(k)
22777 .iterations(1)
22778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22779 }
22780 }
22781 }
22782 }
22783
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel)22784 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel) {
22785 TEST_REQUIRES_X86_SSE2;
22786 for (size_t k = 1; k <= 40; k += 9) {
22787 GemmMicrokernelTester()
22788 .mr(4)
22789 .nr(4)
22790 .kr(2)
22791 .sr(4)
22792 .m(4)
22793 .n(4)
22794 .k(k)
22795 .ks(3)
22796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22797 }
22798 }
22799
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel_subtile)22800 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel_subtile) {
22801 TEST_REQUIRES_X86_SSE2;
22802 for (size_t k = 1; k <= 40; k += 9) {
22803 for (uint32_t n = 1; n <= 4; n++) {
22804 for (uint32_t m = 1; m <= 4; m++) {
22805 GemmMicrokernelTester()
22806 .mr(4)
22807 .nr(4)
22808 .kr(2)
22809 .sr(4)
22810 .m(m)
22811 .n(n)
22812 .k(k)
22813 .ks(3)
22814 .iterations(1)
22815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22816 }
22817 }
22818 }
22819 }
22820
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_small_kernel)22821 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
22822 TEST_REQUIRES_X86_SSE2;
22823 for (uint32_t n = 5; n < 8; n++) {
22824 for (size_t k = 1; k <= 40; k += 9) {
22825 GemmMicrokernelTester()
22826 .mr(4)
22827 .nr(4)
22828 .kr(2)
22829 .sr(4)
22830 .m(4)
22831 .n(n)
22832 .k(k)
22833 .ks(3)
22834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22835 }
22836 }
22837 }
22838
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_small_kernel)22839 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
22840 TEST_REQUIRES_X86_SSE2;
22841 for (uint32_t n = 8; n <= 12; n += 4) {
22842 for (size_t k = 1; k <= 40; k += 9) {
22843 GemmMicrokernelTester()
22844 .mr(4)
22845 .nr(4)
22846 .kr(2)
22847 .sr(4)
22848 .m(4)
22849 .n(n)
22850 .k(k)
22851 .ks(3)
22852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22853 }
22854 }
22855 }
22856
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm_subtile)22857 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm_subtile) {
22858 TEST_REQUIRES_X86_SSE2;
22859 for (size_t k = 1; k <= 40; k += 9) {
22860 for (uint32_t n = 1; n <= 4; n++) {
22861 for (uint32_t m = 1; m <= 4; m++) {
22862 GemmMicrokernelTester()
22863 .mr(4)
22864 .nr(4)
22865 .kr(2)
22866 .sr(4)
22867 .m(m)
22868 .n(n)
22869 .k(k)
22870 .cm_stride(7)
22871 .iterations(1)
22872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22873 }
22874 }
22875 }
22876 }
22877
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,a_offset)22878 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, a_offset) {
22879 TEST_REQUIRES_X86_SSE2;
22880 for (size_t k = 1; k <= 40; k += 9) {
22881 GemmMicrokernelTester()
22882 .mr(4)
22883 .nr(4)
22884 .kr(2)
22885 .sr(4)
22886 .m(4)
22887 .n(4)
22888 .k(k)
22889 .ks(3)
22890 .a_offset(163)
22891 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22892 }
22893 }
22894
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,zero)22895 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, zero) {
22896 TEST_REQUIRES_X86_SSE2;
22897 for (size_t k = 1; k <= 40; k += 9) {
22898 for (uint32_t mz = 0; mz < 4; mz++) {
22899 GemmMicrokernelTester()
22900 .mr(4)
22901 .nr(4)
22902 .kr(2)
22903 .sr(4)
22904 .m(4)
22905 .n(4)
22906 .k(k)
22907 .ks(3)
22908 .a_offset(163)
22909 .zero_index(mz)
22910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22911 }
22912 }
22913 }
22914
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmin)22915 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmin) {
22916 TEST_REQUIRES_X86_SSE2;
22917 GemmMicrokernelTester()
22918 .mr(4)
22919 .nr(4)
22920 .kr(2)
22921 .sr(4)
22922 .m(4)
22923 .n(4)
22924 .k(8)
22925 .qmin(128)
22926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22927 }
22928
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmax)22929 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmax) {
22930 TEST_REQUIRES_X86_SSE2;
22931 GemmMicrokernelTester()
22932 .mr(4)
22933 .nr(4)
22934 .kr(2)
22935 .sr(4)
22936 .m(4)
22937 .n(4)
22938 .k(8)
22939 .qmax(128)
22940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22941 }
22942
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm)22943 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm) {
22944 TEST_REQUIRES_X86_SSE2;
22945 GemmMicrokernelTester()
22946 .mr(4)
22947 .nr(4)
22948 .kr(2)
22949 .sr(4)
22950 .m(4)
22951 .n(4)
22952 .k(8)
22953 .cm_stride(7)
22954 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22955 }
22956 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22957
22958
22959 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8)22960 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8) {
22961 TEST_REQUIRES_X86_SSE41;
22962 GemmMicrokernelTester()
22963 .mr(4)
22964 .nr(4)
22965 .kr(2)
22966 .sr(4)
22967 .m(4)
22968 .n(4)
22969 .k(8)
22970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22971 }
22972
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cn)22973 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cn) {
22974 TEST_REQUIRES_X86_SSE41;
22975 GemmMicrokernelTester()
22976 .mr(4)
22977 .nr(4)
22978 .kr(2)
22979 .sr(4)
22980 .m(4)
22981 .n(4)
22982 .k(8)
22983 .cn_stride(7)
22984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22985 }
22986
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile)22987 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile) {
22988 TEST_REQUIRES_X86_SSE41;
22989 for (uint32_t n = 1; n <= 4; n++) {
22990 for (uint32_t m = 1; m <= 4; m++) {
22991 GemmMicrokernelTester()
22992 .mr(4)
22993 .nr(4)
22994 .kr(2)
22995 .sr(4)
22996 .m(m)
22997 .n(n)
22998 .k(8)
22999 .iterations(1)
23000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23001 }
23002 }
23003 }
23004
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_m)23005 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
23006 TEST_REQUIRES_X86_SSE41;
23007 for (uint32_t m = 1; m <= 4; m++) {
23008 GemmMicrokernelTester()
23009 .mr(4)
23010 .nr(4)
23011 .kr(2)
23012 .sr(4)
23013 .m(m)
23014 .n(4)
23015 .k(8)
23016 .iterations(1)
23017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23018 }
23019 }
23020
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_n)23021 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
23022 TEST_REQUIRES_X86_SSE41;
23023 for (uint32_t n = 1; n <= 4; n++) {
23024 GemmMicrokernelTester()
23025 .mr(4)
23026 .nr(4)
23027 .kr(2)
23028 .sr(4)
23029 .m(4)
23030 .n(n)
23031 .k(8)
23032 .iterations(1)
23033 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23034 }
23035 }
23036
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8)23037 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8) {
23038 TEST_REQUIRES_X86_SSE41;
23039 for (size_t k = 1; k < 8; k++) {
23040 GemmMicrokernelTester()
23041 .mr(4)
23042 .nr(4)
23043 .kr(2)
23044 .sr(4)
23045 .m(4)
23046 .n(4)
23047 .k(k)
23048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049 }
23050 }
23051
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8_subtile)23052 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8_subtile) {
23053 TEST_REQUIRES_X86_SSE41;
23054 for (size_t k = 1; k < 8; k++) {
23055 for (uint32_t n = 1; n <= 4; n++) {
23056 for (uint32_t m = 1; m <= 4; m++) {
23057 GemmMicrokernelTester()
23058 .mr(4)
23059 .nr(4)
23060 .kr(2)
23061 .sr(4)
23062 .m(m)
23063 .n(n)
23064 .k(k)
23065 .iterations(1)
23066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069 }
23070 }
23071
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8)23072 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8) {
23073 TEST_REQUIRES_X86_SSE41;
23074 for (size_t k = 9; k < 16; k++) {
23075 GemmMicrokernelTester()
23076 .mr(4)
23077 .nr(4)
23078 .kr(2)
23079 .sr(4)
23080 .m(4)
23081 .n(4)
23082 .k(k)
23083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23084 }
23085 }
23086
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8_subtile)23087 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8_subtile) {
23088 TEST_REQUIRES_X86_SSE41;
23089 for (size_t k = 9; k < 16; k++) {
23090 for (uint32_t n = 1; n <= 4; n++) {
23091 for (uint32_t m = 1; m <= 4; m++) {
23092 GemmMicrokernelTester()
23093 .mr(4)
23094 .nr(4)
23095 .kr(2)
23096 .sr(4)
23097 .m(m)
23098 .n(n)
23099 .k(k)
23100 .iterations(1)
23101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23102 }
23103 }
23104 }
23105 }
23106
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8)23107 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8) {
23108 TEST_REQUIRES_X86_SSE41;
23109 for (size_t k = 16; k <= 80; k += 8) {
23110 GemmMicrokernelTester()
23111 .mr(4)
23112 .nr(4)
23113 .kr(2)
23114 .sr(4)
23115 .m(4)
23116 .n(4)
23117 .k(k)
23118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23119 }
23120 }
23121
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8_subtile)23122 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8_subtile) {
23123 TEST_REQUIRES_X86_SSE41;
23124 for (size_t k = 16; k <= 80; k += 8) {
23125 for (uint32_t n = 1; n <= 4; n++) {
23126 for (uint32_t m = 1; m <= 4; m++) {
23127 GemmMicrokernelTester()
23128 .mr(4)
23129 .nr(4)
23130 .kr(2)
23131 .sr(4)
23132 .m(m)
23133 .n(n)
23134 .k(k)
23135 .iterations(1)
23136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23137 }
23138 }
23139 }
23140 }
23141
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4)23142 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4) {
23143 TEST_REQUIRES_X86_SSE41;
23144 for (uint32_t n = 5; n < 8; n++) {
23145 for (size_t k = 1; k <= 40; k += 9) {
23146 GemmMicrokernelTester()
23147 .mr(4)
23148 .nr(4)
23149 .kr(2)
23150 .sr(4)
23151 .m(4)
23152 .n(n)
23153 .k(k)
23154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23155 }
23156 }
23157 }
23158
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_strided_cn)23159 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
23160 TEST_REQUIRES_X86_SSE41;
23161 for (uint32_t n = 5; n < 8; n++) {
23162 for (size_t k = 1; k <= 40; k += 9) {
23163 GemmMicrokernelTester()
23164 .mr(4)
23165 .nr(4)
23166 .kr(2)
23167 .sr(4)
23168 .m(4)
23169 .n(n)
23170 .k(k)
23171 .cn_stride(7)
23172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23173 }
23174 }
23175 }
23176
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_subtile)23177 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_subtile) {
23178 TEST_REQUIRES_X86_SSE41;
23179 for (uint32_t n = 5; n < 8; n++) {
23180 for (size_t k = 1; k <= 40; k += 9) {
23181 for (uint32_t m = 1; m <= 4; m++) {
23182 GemmMicrokernelTester()
23183 .mr(4)
23184 .nr(4)
23185 .kr(2)
23186 .sr(4)
23187 .m(m)
23188 .n(n)
23189 .k(k)
23190 .iterations(1)
23191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23192 }
23193 }
23194 }
23195 }
23196
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4)23197 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4) {
23198 TEST_REQUIRES_X86_SSE41;
23199 for (uint32_t n = 8; n <= 12; n += 4) {
23200 for (size_t k = 1; k <= 40; k += 9) {
23201 GemmMicrokernelTester()
23202 .mr(4)
23203 .nr(4)
23204 .kr(2)
23205 .sr(4)
23206 .m(4)
23207 .n(n)
23208 .k(k)
23209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23210 }
23211 }
23212 }
23213
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_strided_cn)23214 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
23215 TEST_REQUIRES_X86_SSE41;
23216 for (uint32_t n = 8; n <= 12; n += 4) {
23217 for (size_t k = 1; k <= 40; k += 9) {
23218 GemmMicrokernelTester()
23219 .mr(4)
23220 .nr(4)
23221 .kr(2)
23222 .sr(4)
23223 .m(4)
23224 .n(n)
23225 .k(k)
23226 .cn_stride(7)
23227 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23228 }
23229 }
23230 }
23231
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_subtile)23232 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_subtile) {
23233 TEST_REQUIRES_X86_SSE41;
23234 for (uint32_t n = 8; n <= 12; n += 4) {
23235 for (size_t k = 1; k <= 40; k += 9) {
23236 for (uint32_t m = 1; m <= 4; m++) {
23237 GemmMicrokernelTester()
23238 .mr(4)
23239 .nr(4)
23240 .kr(2)
23241 .sr(4)
23242 .m(m)
23243 .n(n)
23244 .k(k)
23245 .iterations(1)
23246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23247 }
23248 }
23249 }
23250 }
23251
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel)23252 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel) {
23253 TEST_REQUIRES_X86_SSE41;
23254 for (size_t k = 1; k <= 40; k += 9) {
23255 GemmMicrokernelTester()
23256 .mr(4)
23257 .nr(4)
23258 .kr(2)
23259 .sr(4)
23260 .m(4)
23261 .n(4)
23262 .k(k)
23263 .ks(3)
23264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265 }
23266 }
23267
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel_subtile)23268 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel_subtile) {
23269 TEST_REQUIRES_X86_SSE41;
23270 for (size_t k = 1; k <= 40; k += 9) {
23271 for (uint32_t n = 1; n <= 4; n++) {
23272 for (uint32_t m = 1; m <= 4; m++) {
23273 GemmMicrokernelTester()
23274 .mr(4)
23275 .nr(4)
23276 .kr(2)
23277 .sr(4)
23278 .m(m)
23279 .n(n)
23280 .k(k)
23281 .ks(3)
23282 .iterations(1)
23283 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23284 }
23285 }
23286 }
23287 }
23288
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_small_kernel)23289 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
23290 TEST_REQUIRES_X86_SSE41;
23291 for (uint32_t n = 5; n < 8; n++) {
23292 for (size_t k = 1; k <= 40; k += 9) {
23293 GemmMicrokernelTester()
23294 .mr(4)
23295 .nr(4)
23296 .kr(2)
23297 .sr(4)
23298 .m(4)
23299 .n(n)
23300 .k(k)
23301 .ks(3)
23302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23303 }
23304 }
23305 }
23306
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_small_kernel)23307 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
23308 TEST_REQUIRES_X86_SSE41;
23309 for (uint32_t n = 8; n <= 12; n += 4) {
23310 for (size_t k = 1; k <= 40; k += 9) {
23311 GemmMicrokernelTester()
23312 .mr(4)
23313 .nr(4)
23314 .kr(2)
23315 .sr(4)
23316 .m(4)
23317 .n(n)
23318 .k(k)
23319 .ks(3)
23320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23321 }
23322 }
23323 }
23324
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm_subtile)23325 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm_subtile) {
23326 TEST_REQUIRES_X86_SSE41;
23327 for (size_t k = 1; k <= 40; k += 9) {
23328 for (uint32_t n = 1; n <= 4; n++) {
23329 for (uint32_t m = 1; m <= 4; m++) {
23330 GemmMicrokernelTester()
23331 .mr(4)
23332 .nr(4)
23333 .kr(2)
23334 .sr(4)
23335 .m(m)
23336 .n(n)
23337 .k(k)
23338 .cm_stride(7)
23339 .iterations(1)
23340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23341 }
23342 }
23343 }
23344 }
23345
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,a_offset)23346 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, a_offset) {
23347 TEST_REQUIRES_X86_SSE41;
23348 for (size_t k = 1; k <= 40; k += 9) {
23349 GemmMicrokernelTester()
23350 .mr(4)
23351 .nr(4)
23352 .kr(2)
23353 .sr(4)
23354 .m(4)
23355 .n(4)
23356 .k(k)
23357 .ks(3)
23358 .a_offset(163)
23359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23360 }
23361 }
23362
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,zero)23363 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, zero) {
23364 TEST_REQUIRES_X86_SSE41;
23365 for (size_t k = 1; k <= 40; k += 9) {
23366 for (uint32_t mz = 0; mz < 4; mz++) {
23367 GemmMicrokernelTester()
23368 .mr(4)
23369 .nr(4)
23370 .kr(2)
23371 .sr(4)
23372 .m(4)
23373 .n(4)
23374 .k(k)
23375 .ks(3)
23376 .a_offset(163)
23377 .zero_index(mz)
23378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23379 }
23380 }
23381 }
23382
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmin)23383 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmin) {
23384 TEST_REQUIRES_X86_SSE41;
23385 GemmMicrokernelTester()
23386 .mr(4)
23387 .nr(4)
23388 .kr(2)
23389 .sr(4)
23390 .m(4)
23391 .n(4)
23392 .k(8)
23393 .qmin(128)
23394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23395 }
23396
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmax)23397 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmax) {
23398 TEST_REQUIRES_X86_SSE41;
23399 GemmMicrokernelTester()
23400 .mr(4)
23401 .nr(4)
23402 .kr(2)
23403 .sr(4)
23404 .m(4)
23405 .n(4)
23406 .k(8)
23407 .qmax(128)
23408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23409 }
23410
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm)23411 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm) {
23412 TEST_REQUIRES_X86_SSE41;
23413 GemmMicrokernelTester()
23414 .mr(4)
23415 .nr(4)
23416 .kr(2)
23417 .sr(4)
23418 .m(4)
23419 .n(4)
23420 .k(8)
23421 .cm_stride(7)
23422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23423 }
23424 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23425
23426
23427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8)23428 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8) {
23429 TEST_REQUIRES_X86_AVX;
23430 GemmMicrokernelTester()
23431 .mr(1)
23432 .nr(4)
23433 .kr(2)
23434 .sr(4)
23435 .m(1)
23436 .n(4)
23437 .k(8)
23438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23439 }
23440
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cn)23441 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cn) {
23442 TEST_REQUIRES_X86_AVX;
23443 GemmMicrokernelTester()
23444 .mr(1)
23445 .nr(4)
23446 .kr(2)
23447 .sr(4)
23448 .m(1)
23449 .n(4)
23450 .k(8)
23451 .cn_stride(7)
23452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23453 }
23454
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile)23455 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile) {
23456 TEST_REQUIRES_X86_AVX;
23457 for (uint32_t n = 1; n <= 4; n++) {
23458 for (uint32_t m = 1; m <= 1; m++) {
23459 GemmMicrokernelTester()
23460 .mr(1)
23461 .nr(4)
23462 .kr(2)
23463 .sr(4)
23464 .m(m)
23465 .n(n)
23466 .k(8)
23467 .iterations(1)
23468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23469 }
23470 }
23471 }
23472
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_m)23473 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
23474 TEST_REQUIRES_X86_AVX;
23475 for (uint32_t m = 1; m <= 1; m++) {
23476 GemmMicrokernelTester()
23477 .mr(1)
23478 .nr(4)
23479 .kr(2)
23480 .sr(4)
23481 .m(m)
23482 .n(4)
23483 .k(8)
23484 .iterations(1)
23485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23486 }
23487 }
23488
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_n)23489 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
23490 TEST_REQUIRES_X86_AVX;
23491 for (uint32_t n = 1; n <= 4; n++) {
23492 GemmMicrokernelTester()
23493 .mr(1)
23494 .nr(4)
23495 .kr(2)
23496 .sr(4)
23497 .m(1)
23498 .n(n)
23499 .k(8)
23500 .iterations(1)
23501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23502 }
23503 }
23504
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8)23505 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8) {
23506 TEST_REQUIRES_X86_AVX;
23507 for (size_t k = 1; k < 8; k++) {
23508 GemmMicrokernelTester()
23509 .mr(1)
23510 .nr(4)
23511 .kr(2)
23512 .sr(4)
23513 .m(1)
23514 .n(4)
23515 .k(k)
23516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23517 }
23518 }
23519
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8_subtile)23520 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8_subtile) {
23521 TEST_REQUIRES_X86_AVX;
23522 for (size_t k = 1; k < 8; k++) {
23523 for (uint32_t n = 1; n <= 4; n++) {
23524 for (uint32_t m = 1; m <= 1; m++) {
23525 GemmMicrokernelTester()
23526 .mr(1)
23527 .nr(4)
23528 .kr(2)
23529 .sr(4)
23530 .m(m)
23531 .n(n)
23532 .k(k)
23533 .iterations(1)
23534 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23535 }
23536 }
23537 }
23538 }
23539
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8)23540 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8) {
23541 TEST_REQUIRES_X86_AVX;
23542 for (size_t k = 9; k < 16; k++) {
23543 GemmMicrokernelTester()
23544 .mr(1)
23545 .nr(4)
23546 .kr(2)
23547 .sr(4)
23548 .m(1)
23549 .n(4)
23550 .k(k)
23551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23552 }
23553 }
23554
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8_subtile)23555 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8_subtile) {
23556 TEST_REQUIRES_X86_AVX;
23557 for (size_t k = 9; k < 16; k++) {
23558 for (uint32_t n = 1; n <= 4; n++) {
23559 for (uint32_t m = 1; m <= 1; m++) {
23560 GemmMicrokernelTester()
23561 .mr(1)
23562 .nr(4)
23563 .kr(2)
23564 .sr(4)
23565 .m(m)
23566 .n(n)
23567 .k(k)
23568 .iterations(1)
23569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23570 }
23571 }
23572 }
23573 }
23574
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8)23575 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8) {
23576 TEST_REQUIRES_X86_AVX;
23577 for (size_t k = 16; k <= 80; k += 8) {
23578 GemmMicrokernelTester()
23579 .mr(1)
23580 .nr(4)
23581 .kr(2)
23582 .sr(4)
23583 .m(1)
23584 .n(4)
23585 .k(k)
23586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23587 }
23588 }
23589
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8_subtile)23590 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8_subtile) {
23591 TEST_REQUIRES_X86_AVX;
23592 for (size_t k = 16; k <= 80; k += 8) {
23593 for (uint32_t n = 1; n <= 4; n++) {
23594 for (uint32_t m = 1; m <= 1; m++) {
23595 GemmMicrokernelTester()
23596 .mr(1)
23597 .nr(4)
23598 .kr(2)
23599 .sr(4)
23600 .m(m)
23601 .n(n)
23602 .k(k)
23603 .iterations(1)
23604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23605 }
23606 }
23607 }
23608 }
23609
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4)23610 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4) {
23611 TEST_REQUIRES_X86_AVX;
23612 for (uint32_t n = 5; n < 8; n++) {
23613 for (size_t k = 1; k <= 40; k += 9) {
23614 GemmMicrokernelTester()
23615 .mr(1)
23616 .nr(4)
23617 .kr(2)
23618 .sr(4)
23619 .m(1)
23620 .n(n)
23621 .k(k)
23622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23623 }
23624 }
23625 }
23626
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_strided_cn)23627 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
23628 TEST_REQUIRES_X86_AVX;
23629 for (uint32_t n = 5; n < 8; n++) {
23630 for (size_t k = 1; k <= 40; k += 9) {
23631 GemmMicrokernelTester()
23632 .mr(1)
23633 .nr(4)
23634 .kr(2)
23635 .sr(4)
23636 .m(1)
23637 .n(n)
23638 .k(k)
23639 .cn_stride(7)
23640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23641 }
23642 }
23643 }
23644
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_subtile)23645 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_subtile) {
23646 TEST_REQUIRES_X86_AVX;
23647 for (uint32_t n = 5; n < 8; n++) {
23648 for (size_t k = 1; k <= 40; k += 9) {
23649 for (uint32_t m = 1; m <= 1; m++) {
23650 GemmMicrokernelTester()
23651 .mr(1)
23652 .nr(4)
23653 .kr(2)
23654 .sr(4)
23655 .m(m)
23656 .n(n)
23657 .k(k)
23658 .iterations(1)
23659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23660 }
23661 }
23662 }
23663 }
23664
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4)23665 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4) {
23666 TEST_REQUIRES_X86_AVX;
23667 for (uint32_t n = 8; n <= 12; n += 4) {
23668 for (size_t k = 1; k <= 40; k += 9) {
23669 GemmMicrokernelTester()
23670 .mr(1)
23671 .nr(4)
23672 .kr(2)
23673 .sr(4)
23674 .m(1)
23675 .n(n)
23676 .k(k)
23677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23678 }
23679 }
23680 }
23681
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_strided_cn)23682 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_strided_cn) {
23683 TEST_REQUIRES_X86_AVX;
23684 for (uint32_t n = 8; n <= 12; n += 4) {
23685 for (size_t k = 1; k <= 40; k += 9) {
23686 GemmMicrokernelTester()
23687 .mr(1)
23688 .nr(4)
23689 .kr(2)
23690 .sr(4)
23691 .m(1)
23692 .n(n)
23693 .k(k)
23694 .cn_stride(7)
23695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23696 }
23697 }
23698 }
23699
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_subtile)23700 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_subtile) {
23701 TEST_REQUIRES_X86_AVX;
23702 for (uint32_t n = 8; n <= 12; n += 4) {
23703 for (size_t k = 1; k <= 40; k += 9) {
23704 for (uint32_t m = 1; m <= 1; m++) {
23705 GemmMicrokernelTester()
23706 .mr(1)
23707 .nr(4)
23708 .kr(2)
23709 .sr(4)
23710 .m(m)
23711 .n(n)
23712 .k(k)
23713 .iterations(1)
23714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23715 }
23716 }
23717 }
23718 }
23719
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel)23720 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel) {
23721 TEST_REQUIRES_X86_AVX;
23722 for (size_t k = 1; k <= 40; k += 9) {
23723 GemmMicrokernelTester()
23724 .mr(1)
23725 .nr(4)
23726 .kr(2)
23727 .sr(4)
23728 .m(1)
23729 .n(4)
23730 .k(k)
23731 .ks(3)
23732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23733 }
23734 }
23735
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel_subtile)23736 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel_subtile) {
23737 TEST_REQUIRES_X86_AVX;
23738 for (size_t k = 1; k <= 40; k += 9) {
23739 for (uint32_t n = 1; n <= 4; n++) {
23740 for (uint32_t m = 1; m <= 1; m++) {
23741 GemmMicrokernelTester()
23742 .mr(1)
23743 .nr(4)
23744 .kr(2)
23745 .sr(4)
23746 .m(m)
23747 .n(n)
23748 .k(k)
23749 .ks(3)
23750 .iterations(1)
23751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23752 }
23753 }
23754 }
23755 }
23756
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_small_kernel)23757 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
23758 TEST_REQUIRES_X86_AVX;
23759 for (uint32_t n = 5; n < 8; n++) {
23760 for (size_t k = 1; k <= 40; k += 9) {
23761 GemmMicrokernelTester()
23762 .mr(1)
23763 .nr(4)
23764 .kr(2)
23765 .sr(4)
23766 .m(1)
23767 .n(n)
23768 .k(k)
23769 .ks(3)
23770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23771 }
23772 }
23773 }
23774
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_small_kernel)23775 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_small_kernel) {
23776 TEST_REQUIRES_X86_AVX;
23777 for (uint32_t n = 8; n <= 12; n += 4) {
23778 for (size_t k = 1; k <= 40; k += 9) {
23779 GemmMicrokernelTester()
23780 .mr(1)
23781 .nr(4)
23782 .kr(2)
23783 .sr(4)
23784 .m(1)
23785 .n(n)
23786 .k(k)
23787 .ks(3)
23788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23789 }
23790 }
23791 }
23792
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm_subtile)23793 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm_subtile) {
23794 TEST_REQUIRES_X86_AVX;
23795 for (size_t k = 1; k <= 40; k += 9) {
23796 for (uint32_t n = 1; n <= 4; n++) {
23797 for (uint32_t m = 1; m <= 1; m++) {
23798 GemmMicrokernelTester()
23799 .mr(1)
23800 .nr(4)
23801 .kr(2)
23802 .sr(4)
23803 .m(m)
23804 .n(n)
23805 .k(k)
23806 .cm_stride(7)
23807 .iterations(1)
23808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23809 }
23810 }
23811 }
23812 }
23813
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,a_offset)23814 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, a_offset) {
23815 TEST_REQUIRES_X86_AVX;
23816 for (size_t k = 1; k <= 40; k += 9) {
23817 GemmMicrokernelTester()
23818 .mr(1)
23819 .nr(4)
23820 .kr(2)
23821 .sr(4)
23822 .m(1)
23823 .n(4)
23824 .k(k)
23825 .ks(3)
23826 .a_offset(43)
23827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828 }
23829 }
23830
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,zero)23831 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, zero) {
23832 TEST_REQUIRES_X86_AVX;
23833 for (size_t k = 1; k <= 40; k += 9) {
23834 for (uint32_t mz = 0; mz < 1; mz++) {
23835 GemmMicrokernelTester()
23836 .mr(1)
23837 .nr(4)
23838 .kr(2)
23839 .sr(4)
23840 .m(1)
23841 .n(4)
23842 .k(k)
23843 .ks(3)
23844 .a_offset(43)
23845 .zero_index(mz)
23846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23847 }
23848 }
23849 }
23850
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmin)23851 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmin) {
23852 TEST_REQUIRES_X86_AVX;
23853 GemmMicrokernelTester()
23854 .mr(1)
23855 .nr(4)
23856 .kr(2)
23857 .sr(4)
23858 .m(1)
23859 .n(4)
23860 .k(8)
23861 .qmin(128)
23862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23863 }
23864
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmax)23865 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmax) {
23866 TEST_REQUIRES_X86_AVX;
23867 GemmMicrokernelTester()
23868 .mr(1)
23869 .nr(4)
23870 .kr(2)
23871 .sr(4)
23872 .m(1)
23873 .n(4)
23874 .k(8)
23875 .qmax(128)
23876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877 }
23878
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm)23879 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm) {
23880 TEST_REQUIRES_X86_AVX;
23881 GemmMicrokernelTester()
23882 .mr(1)
23883 .nr(4)
23884 .kr(2)
23885 .sr(4)
23886 .m(1)
23887 .n(4)
23888 .k(8)
23889 .cm_stride(7)
23890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23891 }
23892 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23893
23894
23895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8)23896 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8) {
23897 TEST_REQUIRES_X86_XOP;
23898 GemmMicrokernelTester()
23899 .mr(1)
23900 .nr(4)
23901 .kr(2)
23902 .sr(4)
23903 .m(1)
23904 .n(4)
23905 .k(8)
23906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23907 }
23908
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cn)23909 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cn) {
23910 TEST_REQUIRES_X86_XOP;
23911 GemmMicrokernelTester()
23912 .mr(1)
23913 .nr(4)
23914 .kr(2)
23915 .sr(4)
23916 .m(1)
23917 .n(4)
23918 .k(8)
23919 .cn_stride(7)
23920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23921 }
23922
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile)23923 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile) {
23924 TEST_REQUIRES_X86_XOP;
23925 for (uint32_t n = 1; n <= 4; n++) {
23926 for (uint32_t m = 1; m <= 1; m++) {
23927 GemmMicrokernelTester()
23928 .mr(1)
23929 .nr(4)
23930 .kr(2)
23931 .sr(4)
23932 .m(m)
23933 .n(n)
23934 .k(8)
23935 .iterations(1)
23936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23937 }
23938 }
23939 }
23940
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_m)23941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
23942 TEST_REQUIRES_X86_XOP;
23943 for (uint32_t m = 1; m <= 1; m++) {
23944 GemmMicrokernelTester()
23945 .mr(1)
23946 .nr(4)
23947 .kr(2)
23948 .sr(4)
23949 .m(m)
23950 .n(4)
23951 .k(8)
23952 .iterations(1)
23953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23954 }
23955 }
23956
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_n)23957 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
23958 TEST_REQUIRES_X86_XOP;
23959 for (uint32_t n = 1; n <= 4; n++) {
23960 GemmMicrokernelTester()
23961 .mr(1)
23962 .nr(4)
23963 .kr(2)
23964 .sr(4)
23965 .m(1)
23966 .n(n)
23967 .k(8)
23968 .iterations(1)
23969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23970 }
23971 }
23972
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8)23973 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8) {
23974 TEST_REQUIRES_X86_XOP;
23975 for (size_t k = 1; k < 8; k++) {
23976 GemmMicrokernelTester()
23977 .mr(1)
23978 .nr(4)
23979 .kr(2)
23980 .sr(4)
23981 .m(1)
23982 .n(4)
23983 .k(k)
23984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23985 }
23986 }
23987
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8_subtile)23988 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8_subtile) {
23989 TEST_REQUIRES_X86_XOP;
23990 for (size_t k = 1; k < 8; k++) {
23991 for (uint32_t n = 1; n <= 4; n++) {
23992 for (uint32_t m = 1; m <= 1; m++) {
23993 GemmMicrokernelTester()
23994 .mr(1)
23995 .nr(4)
23996 .kr(2)
23997 .sr(4)
23998 .m(m)
23999 .n(n)
24000 .k(k)
24001 .iterations(1)
24002 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24003 }
24004 }
24005 }
24006 }
24007
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8)24008 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8) {
24009 TEST_REQUIRES_X86_XOP;
24010 for (size_t k = 9; k < 16; k++) {
24011 GemmMicrokernelTester()
24012 .mr(1)
24013 .nr(4)
24014 .kr(2)
24015 .sr(4)
24016 .m(1)
24017 .n(4)
24018 .k(k)
24019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24020 }
24021 }
24022
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8_subtile)24023 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8_subtile) {
24024 TEST_REQUIRES_X86_XOP;
24025 for (size_t k = 9; k < 16; k++) {
24026 for (uint32_t n = 1; n <= 4; n++) {
24027 for (uint32_t m = 1; m <= 1; m++) {
24028 GemmMicrokernelTester()
24029 .mr(1)
24030 .nr(4)
24031 .kr(2)
24032 .sr(4)
24033 .m(m)
24034 .n(n)
24035 .k(k)
24036 .iterations(1)
24037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24038 }
24039 }
24040 }
24041 }
24042
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8)24043 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8) {
24044 TEST_REQUIRES_X86_XOP;
24045 for (size_t k = 16; k <= 80; k += 8) {
24046 GemmMicrokernelTester()
24047 .mr(1)
24048 .nr(4)
24049 .kr(2)
24050 .sr(4)
24051 .m(1)
24052 .n(4)
24053 .k(k)
24054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24055 }
24056 }
24057
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8_subtile)24058 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8_subtile) {
24059 TEST_REQUIRES_X86_XOP;
24060 for (size_t k = 16; k <= 80; k += 8) {
24061 for (uint32_t n = 1; n <= 4; n++) {
24062 for (uint32_t m = 1; m <= 1; m++) {
24063 GemmMicrokernelTester()
24064 .mr(1)
24065 .nr(4)
24066 .kr(2)
24067 .sr(4)
24068 .m(m)
24069 .n(n)
24070 .k(k)
24071 .iterations(1)
24072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24073 }
24074 }
24075 }
24076 }
24077
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4)24078 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4) {
24079 TEST_REQUIRES_X86_XOP;
24080 for (uint32_t n = 5; n < 8; n++) {
24081 for (size_t k = 1; k <= 40; k += 9) {
24082 GemmMicrokernelTester()
24083 .mr(1)
24084 .nr(4)
24085 .kr(2)
24086 .sr(4)
24087 .m(1)
24088 .n(n)
24089 .k(k)
24090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24091 }
24092 }
24093 }
24094
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_strided_cn)24095 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
24096 TEST_REQUIRES_X86_XOP;
24097 for (uint32_t n = 5; n < 8; n++) {
24098 for (size_t k = 1; k <= 40; k += 9) {
24099 GemmMicrokernelTester()
24100 .mr(1)
24101 .nr(4)
24102 .kr(2)
24103 .sr(4)
24104 .m(1)
24105 .n(n)
24106 .k(k)
24107 .cn_stride(7)
24108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24109 }
24110 }
24111 }
24112
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_subtile)24113 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_subtile) {
24114 TEST_REQUIRES_X86_XOP;
24115 for (uint32_t n = 5; n < 8; n++) {
24116 for (size_t k = 1; k <= 40; k += 9) {
24117 for (uint32_t m = 1; m <= 1; m++) {
24118 GemmMicrokernelTester()
24119 .mr(1)
24120 .nr(4)
24121 .kr(2)
24122 .sr(4)
24123 .m(m)
24124 .n(n)
24125 .k(k)
24126 .iterations(1)
24127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24128 }
24129 }
24130 }
24131 }
24132
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4)24133 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4) {
24134 TEST_REQUIRES_X86_XOP;
24135 for (uint32_t n = 8; n <= 12; n += 4) {
24136 for (size_t k = 1; k <= 40; k += 9) {
24137 GemmMicrokernelTester()
24138 .mr(1)
24139 .nr(4)
24140 .kr(2)
24141 .sr(4)
24142 .m(1)
24143 .n(n)
24144 .k(k)
24145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24146 }
24147 }
24148 }
24149
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_strided_cn)24150 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_strided_cn) {
24151 TEST_REQUIRES_X86_XOP;
24152 for (uint32_t n = 8; n <= 12; n += 4) {
24153 for (size_t k = 1; k <= 40; k += 9) {
24154 GemmMicrokernelTester()
24155 .mr(1)
24156 .nr(4)
24157 .kr(2)
24158 .sr(4)
24159 .m(1)
24160 .n(n)
24161 .k(k)
24162 .cn_stride(7)
24163 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24164 }
24165 }
24166 }
24167
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_subtile)24168 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_subtile) {
24169 TEST_REQUIRES_X86_XOP;
24170 for (uint32_t n = 8; n <= 12; n += 4) {
24171 for (size_t k = 1; k <= 40; k += 9) {
24172 for (uint32_t m = 1; m <= 1; m++) {
24173 GemmMicrokernelTester()
24174 .mr(1)
24175 .nr(4)
24176 .kr(2)
24177 .sr(4)
24178 .m(m)
24179 .n(n)
24180 .k(k)
24181 .iterations(1)
24182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24183 }
24184 }
24185 }
24186 }
24187
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel)24188 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel) {
24189 TEST_REQUIRES_X86_XOP;
24190 for (size_t k = 1; k <= 40; k += 9) {
24191 GemmMicrokernelTester()
24192 .mr(1)
24193 .nr(4)
24194 .kr(2)
24195 .sr(4)
24196 .m(1)
24197 .n(4)
24198 .k(k)
24199 .ks(3)
24200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24201 }
24202 }
24203
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel_subtile)24204 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel_subtile) {
24205 TEST_REQUIRES_X86_XOP;
24206 for (size_t k = 1; k <= 40; k += 9) {
24207 for (uint32_t n = 1; n <= 4; n++) {
24208 for (uint32_t m = 1; m <= 1; m++) {
24209 GemmMicrokernelTester()
24210 .mr(1)
24211 .nr(4)
24212 .kr(2)
24213 .sr(4)
24214 .m(m)
24215 .n(n)
24216 .k(k)
24217 .ks(3)
24218 .iterations(1)
24219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24220 }
24221 }
24222 }
24223 }
24224
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_small_kernel)24225 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
24226 TEST_REQUIRES_X86_XOP;
24227 for (uint32_t n = 5; n < 8; n++) {
24228 for (size_t k = 1; k <= 40; k += 9) {
24229 GemmMicrokernelTester()
24230 .mr(1)
24231 .nr(4)
24232 .kr(2)
24233 .sr(4)
24234 .m(1)
24235 .n(n)
24236 .k(k)
24237 .ks(3)
24238 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24239 }
24240 }
24241 }
24242
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_small_kernel)24243 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_small_kernel) {
24244 TEST_REQUIRES_X86_XOP;
24245 for (uint32_t n = 8; n <= 12; n += 4) {
24246 for (size_t k = 1; k <= 40; k += 9) {
24247 GemmMicrokernelTester()
24248 .mr(1)
24249 .nr(4)
24250 .kr(2)
24251 .sr(4)
24252 .m(1)
24253 .n(n)
24254 .k(k)
24255 .ks(3)
24256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24257 }
24258 }
24259 }
24260
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm_subtile)24261 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm_subtile) {
24262 TEST_REQUIRES_X86_XOP;
24263 for (size_t k = 1; k <= 40; k += 9) {
24264 for (uint32_t n = 1; n <= 4; n++) {
24265 for (uint32_t m = 1; m <= 1; m++) {
24266 GemmMicrokernelTester()
24267 .mr(1)
24268 .nr(4)
24269 .kr(2)
24270 .sr(4)
24271 .m(m)
24272 .n(n)
24273 .k(k)
24274 .cm_stride(7)
24275 .iterations(1)
24276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24277 }
24278 }
24279 }
24280 }
24281
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,a_offset)24282 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, a_offset) {
24283 TEST_REQUIRES_X86_XOP;
24284 for (size_t k = 1; k <= 40; k += 9) {
24285 GemmMicrokernelTester()
24286 .mr(1)
24287 .nr(4)
24288 .kr(2)
24289 .sr(4)
24290 .m(1)
24291 .n(4)
24292 .k(k)
24293 .ks(3)
24294 .a_offset(43)
24295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24296 }
24297 }
24298
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,zero)24299 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, zero) {
24300 TEST_REQUIRES_X86_XOP;
24301 for (size_t k = 1; k <= 40; k += 9) {
24302 for (uint32_t mz = 0; mz < 1; mz++) {
24303 GemmMicrokernelTester()
24304 .mr(1)
24305 .nr(4)
24306 .kr(2)
24307 .sr(4)
24308 .m(1)
24309 .n(4)
24310 .k(k)
24311 .ks(3)
24312 .a_offset(43)
24313 .zero_index(mz)
24314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315 }
24316 }
24317 }
24318
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmin)24319 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmin) {
24320 TEST_REQUIRES_X86_XOP;
24321 GemmMicrokernelTester()
24322 .mr(1)
24323 .nr(4)
24324 .kr(2)
24325 .sr(4)
24326 .m(1)
24327 .n(4)
24328 .k(8)
24329 .qmin(128)
24330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24331 }
24332
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmax)24333 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmax) {
24334 TEST_REQUIRES_X86_XOP;
24335 GemmMicrokernelTester()
24336 .mr(1)
24337 .nr(4)
24338 .kr(2)
24339 .sr(4)
24340 .m(1)
24341 .n(4)
24342 .k(8)
24343 .qmax(128)
24344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24345 }
24346
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm)24347 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm) {
24348 TEST_REQUIRES_X86_XOP;
24349 GemmMicrokernelTester()
24350 .mr(1)
24351 .nr(4)
24352 .kr(2)
24353 .sr(4)
24354 .m(1)
24355 .n(4)
24356 .k(8)
24357 .cm_stride(7)
24358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24359 }
24360 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24361
24362
24363 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8)24364 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8) {
24365 TEST_REQUIRES_X86_AVX;
24366 GemmMicrokernelTester()
24367 .mr(2)
24368 .nr(4)
24369 .kr(2)
24370 .sr(4)
24371 .m(2)
24372 .n(4)
24373 .k(8)
24374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24375 }
24376
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cn)24377 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cn) {
24378 TEST_REQUIRES_X86_AVX;
24379 GemmMicrokernelTester()
24380 .mr(2)
24381 .nr(4)
24382 .kr(2)
24383 .sr(4)
24384 .m(2)
24385 .n(4)
24386 .k(8)
24387 .cn_stride(7)
24388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24389 }
24390
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile)24391 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile) {
24392 TEST_REQUIRES_X86_AVX;
24393 for (uint32_t n = 1; n <= 4; n++) {
24394 for (uint32_t m = 1; m <= 2; m++) {
24395 GemmMicrokernelTester()
24396 .mr(2)
24397 .nr(4)
24398 .kr(2)
24399 .sr(4)
24400 .m(m)
24401 .n(n)
24402 .k(8)
24403 .iterations(1)
24404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24405 }
24406 }
24407 }
24408
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_m)24409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
24410 TEST_REQUIRES_X86_AVX;
24411 for (uint32_t m = 1; m <= 2; m++) {
24412 GemmMicrokernelTester()
24413 .mr(2)
24414 .nr(4)
24415 .kr(2)
24416 .sr(4)
24417 .m(m)
24418 .n(4)
24419 .k(8)
24420 .iterations(1)
24421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24422 }
24423 }
24424
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_n)24425 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
24426 TEST_REQUIRES_X86_AVX;
24427 for (uint32_t n = 1; n <= 4; n++) {
24428 GemmMicrokernelTester()
24429 .mr(2)
24430 .nr(4)
24431 .kr(2)
24432 .sr(4)
24433 .m(2)
24434 .n(n)
24435 .k(8)
24436 .iterations(1)
24437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24438 }
24439 }
24440
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8)24441 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8) {
24442 TEST_REQUIRES_X86_AVX;
24443 for (size_t k = 1; k < 8; k++) {
24444 GemmMicrokernelTester()
24445 .mr(2)
24446 .nr(4)
24447 .kr(2)
24448 .sr(4)
24449 .m(2)
24450 .n(4)
24451 .k(k)
24452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453 }
24454 }
24455
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8_subtile)24456 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8_subtile) {
24457 TEST_REQUIRES_X86_AVX;
24458 for (size_t k = 1; k < 8; k++) {
24459 for (uint32_t n = 1; n <= 4; n++) {
24460 for (uint32_t m = 1; m <= 2; m++) {
24461 GemmMicrokernelTester()
24462 .mr(2)
24463 .nr(4)
24464 .kr(2)
24465 .sr(4)
24466 .m(m)
24467 .n(n)
24468 .k(k)
24469 .iterations(1)
24470 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471 }
24472 }
24473 }
24474 }
24475
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8)24476 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8) {
24477 TEST_REQUIRES_X86_AVX;
24478 for (size_t k = 9; k < 16; k++) {
24479 GemmMicrokernelTester()
24480 .mr(2)
24481 .nr(4)
24482 .kr(2)
24483 .sr(4)
24484 .m(2)
24485 .n(4)
24486 .k(k)
24487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24488 }
24489 }
24490
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8_subtile)24491 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8_subtile) {
24492 TEST_REQUIRES_X86_AVX;
24493 for (size_t k = 9; k < 16; k++) {
24494 for (uint32_t n = 1; n <= 4; n++) {
24495 for (uint32_t m = 1; m <= 2; m++) {
24496 GemmMicrokernelTester()
24497 .mr(2)
24498 .nr(4)
24499 .kr(2)
24500 .sr(4)
24501 .m(m)
24502 .n(n)
24503 .k(k)
24504 .iterations(1)
24505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24506 }
24507 }
24508 }
24509 }
24510
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8)24511 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8) {
24512 TEST_REQUIRES_X86_AVX;
24513 for (size_t k = 16; k <= 80; k += 8) {
24514 GemmMicrokernelTester()
24515 .mr(2)
24516 .nr(4)
24517 .kr(2)
24518 .sr(4)
24519 .m(2)
24520 .n(4)
24521 .k(k)
24522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24523 }
24524 }
24525
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8_subtile)24526 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8_subtile) {
24527 TEST_REQUIRES_X86_AVX;
24528 for (size_t k = 16; k <= 80; k += 8) {
24529 for (uint32_t n = 1; n <= 4; n++) {
24530 for (uint32_t m = 1; m <= 2; m++) {
24531 GemmMicrokernelTester()
24532 .mr(2)
24533 .nr(4)
24534 .kr(2)
24535 .sr(4)
24536 .m(m)
24537 .n(n)
24538 .k(k)
24539 .iterations(1)
24540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24541 }
24542 }
24543 }
24544 }
24545
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4)24546 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4) {
24547 TEST_REQUIRES_X86_AVX;
24548 for (uint32_t n = 5; n < 8; n++) {
24549 for (size_t k = 1; k <= 40; k += 9) {
24550 GemmMicrokernelTester()
24551 .mr(2)
24552 .nr(4)
24553 .kr(2)
24554 .sr(4)
24555 .m(2)
24556 .n(n)
24557 .k(k)
24558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24559 }
24560 }
24561 }
24562
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_strided_cn)24563 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
24564 TEST_REQUIRES_X86_AVX;
24565 for (uint32_t n = 5; n < 8; n++) {
24566 for (size_t k = 1; k <= 40; k += 9) {
24567 GemmMicrokernelTester()
24568 .mr(2)
24569 .nr(4)
24570 .kr(2)
24571 .sr(4)
24572 .m(2)
24573 .n(n)
24574 .k(k)
24575 .cn_stride(7)
24576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24577 }
24578 }
24579 }
24580
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_subtile)24581 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_subtile) {
24582 TEST_REQUIRES_X86_AVX;
24583 for (uint32_t n = 5; n < 8; n++) {
24584 for (size_t k = 1; k <= 40; k += 9) {
24585 for (uint32_t m = 1; m <= 2; m++) {
24586 GemmMicrokernelTester()
24587 .mr(2)
24588 .nr(4)
24589 .kr(2)
24590 .sr(4)
24591 .m(m)
24592 .n(n)
24593 .k(k)
24594 .iterations(1)
24595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24596 }
24597 }
24598 }
24599 }
24600
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4)24601 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4) {
24602 TEST_REQUIRES_X86_AVX;
24603 for (uint32_t n = 8; n <= 12; n += 4) {
24604 for (size_t k = 1; k <= 40; k += 9) {
24605 GemmMicrokernelTester()
24606 .mr(2)
24607 .nr(4)
24608 .kr(2)
24609 .sr(4)
24610 .m(2)
24611 .n(n)
24612 .k(k)
24613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24614 }
24615 }
24616 }
24617
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_strided_cn)24618 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_strided_cn) {
24619 TEST_REQUIRES_X86_AVX;
24620 for (uint32_t n = 8; n <= 12; n += 4) {
24621 for (size_t k = 1; k <= 40; k += 9) {
24622 GemmMicrokernelTester()
24623 .mr(2)
24624 .nr(4)
24625 .kr(2)
24626 .sr(4)
24627 .m(2)
24628 .n(n)
24629 .k(k)
24630 .cn_stride(7)
24631 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24632 }
24633 }
24634 }
24635
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_subtile)24636 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_subtile) {
24637 TEST_REQUIRES_X86_AVX;
24638 for (uint32_t n = 8; n <= 12; n += 4) {
24639 for (size_t k = 1; k <= 40; k += 9) {
24640 for (uint32_t m = 1; m <= 2; m++) {
24641 GemmMicrokernelTester()
24642 .mr(2)
24643 .nr(4)
24644 .kr(2)
24645 .sr(4)
24646 .m(m)
24647 .n(n)
24648 .k(k)
24649 .iterations(1)
24650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24651 }
24652 }
24653 }
24654 }
24655
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel)24656 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel) {
24657 TEST_REQUIRES_X86_AVX;
24658 for (size_t k = 1; k <= 40; k += 9) {
24659 GemmMicrokernelTester()
24660 .mr(2)
24661 .nr(4)
24662 .kr(2)
24663 .sr(4)
24664 .m(2)
24665 .n(4)
24666 .k(k)
24667 .ks(3)
24668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24669 }
24670 }
24671
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel_subtile)24672 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel_subtile) {
24673 TEST_REQUIRES_X86_AVX;
24674 for (size_t k = 1; k <= 40; k += 9) {
24675 for (uint32_t n = 1; n <= 4; n++) {
24676 for (uint32_t m = 1; m <= 2; m++) {
24677 GemmMicrokernelTester()
24678 .mr(2)
24679 .nr(4)
24680 .kr(2)
24681 .sr(4)
24682 .m(m)
24683 .n(n)
24684 .k(k)
24685 .ks(3)
24686 .iterations(1)
24687 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24688 }
24689 }
24690 }
24691 }
24692
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_small_kernel)24693 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
24694 TEST_REQUIRES_X86_AVX;
24695 for (uint32_t n = 5; n < 8; n++) {
24696 for (size_t k = 1; k <= 40; k += 9) {
24697 GemmMicrokernelTester()
24698 .mr(2)
24699 .nr(4)
24700 .kr(2)
24701 .sr(4)
24702 .m(2)
24703 .n(n)
24704 .k(k)
24705 .ks(3)
24706 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24707 }
24708 }
24709 }
24710
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_small_kernel)24711 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_small_kernel) {
24712 TEST_REQUIRES_X86_AVX;
24713 for (uint32_t n = 8; n <= 12; n += 4) {
24714 for (size_t k = 1; k <= 40; k += 9) {
24715 GemmMicrokernelTester()
24716 .mr(2)
24717 .nr(4)
24718 .kr(2)
24719 .sr(4)
24720 .m(2)
24721 .n(n)
24722 .k(k)
24723 .ks(3)
24724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24725 }
24726 }
24727 }
24728
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm_subtile)24729 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm_subtile) {
24730 TEST_REQUIRES_X86_AVX;
24731 for (size_t k = 1; k <= 40; k += 9) {
24732 for (uint32_t n = 1; n <= 4; n++) {
24733 for (uint32_t m = 1; m <= 2; m++) {
24734 GemmMicrokernelTester()
24735 .mr(2)
24736 .nr(4)
24737 .kr(2)
24738 .sr(4)
24739 .m(m)
24740 .n(n)
24741 .k(k)
24742 .cm_stride(7)
24743 .iterations(1)
24744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24745 }
24746 }
24747 }
24748 }
24749
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,a_offset)24750 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, a_offset) {
24751 TEST_REQUIRES_X86_AVX;
24752 for (size_t k = 1; k <= 40; k += 9) {
24753 GemmMicrokernelTester()
24754 .mr(2)
24755 .nr(4)
24756 .kr(2)
24757 .sr(4)
24758 .m(2)
24759 .n(4)
24760 .k(k)
24761 .ks(3)
24762 .a_offset(83)
24763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24764 }
24765 }
24766
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,zero)24767 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, zero) {
24768 TEST_REQUIRES_X86_AVX;
24769 for (size_t k = 1; k <= 40; k += 9) {
24770 for (uint32_t mz = 0; mz < 2; mz++) {
24771 GemmMicrokernelTester()
24772 .mr(2)
24773 .nr(4)
24774 .kr(2)
24775 .sr(4)
24776 .m(2)
24777 .n(4)
24778 .k(k)
24779 .ks(3)
24780 .a_offset(83)
24781 .zero_index(mz)
24782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24783 }
24784 }
24785 }
24786
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmin)24787 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmin) {
24788 TEST_REQUIRES_X86_AVX;
24789 GemmMicrokernelTester()
24790 .mr(2)
24791 .nr(4)
24792 .kr(2)
24793 .sr(4)
24794 .m(2)
24795 .n(4)
24796 .k(8)
24797 .qmin(128)
24798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24799 }
24800
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmax)24801 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmax) {
24802 TEST_REQUIRES_X86_AVX;
24803 GemmMicrokernelTester()
24804 .mr(2)
24805 .nr(4)
24806 .kr(2)
24807 .sr(4)
24808 .m(2)
24809 .n(4)
24810 .k(8)
24811 .qmax(128)
24812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24813 }
24814
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm)24815 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm) {
24816 TEST_REQUIRES_X86_AVX;
24817 GemmMicrokernelTester()
24818 .mr(2)
24819 .nr(4)
24820 .kr(2)
24821 .sr(4)
24822 .m(2)
24823 .n(4)
24824 .k(8)
24825 .cm_stride(7)
24826 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24827 }
24828 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24829
24830
24831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8)24832 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8) {
24833 TEST_REQUIRES_X86_XOP;
24834 GemmMicrokernelTester()
24835 .mr(2)
24836 .nr(4)
24837 .kr(2)
24838 .sr(4)
24839 .m(2)
24840 .n(4)
24841 .k(8)
24842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24843 }
24844
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cn)24845 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cn) {
24846 TEST_REQUIRES_X86_XOP;
24847 GemmMicrokernelTester()
24848 .mr(2)
24849 .nr(4)
24850 .kr(2)
24851 .sr(4)
24852 .m(2)
24853 .n(4)
24854 .k(8)
24855 .cn_stride(7)
24856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24857 }
24858
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile)24859 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile) {
24860 TEST_REQUIRES_X86_XOP;
24861 for (uint32_t n = 1; n <= 4; n++) {
24862 for (uint32_t m = 1; m <= 2; m++) {
24863 GemmMicrokernelTester()
24864 .mr(2)
24865 .nr(4)
24866 .kr(2)
24867 .sr(4)
24868 .m(m)
24869 .n(n)
24870 .k(8)
24871 .iterations(1)
24872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24873 }
24874 }
24875 }
24876
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_m)24877 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
24878 TEST_REQUIRES_X86_XOP;
24879 for (uint32_t m = 1; m <= 2; m++) {
24880 GemmMicrokernelTester()
24881 .mr(2)
24882 .nr(4)
24883 .kr(2)
24884 .sr(4)
24885 .m(m)
24886 .n(4)
24887 .k(8)
24888 .iterations(1)
24889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24890 }
24891 }
24892
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_n)24893 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
24894 TEST_REQUIRES_X86_XOP;
24895 for (uint32_t n = 1; n <= 4; n++) {
24896 GemmMicrokernelTester()
24897 .mr(2)
24898 .nr(4)
24899 .kr(2)
24900 .sr(4)
24901 .m(2)
24902 .n(n)
24903 .k(8)
24904 .iterations(1)
24905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24906 }
24907 }
24908
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8)24909 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8) {
24910 TEST_REQUIRES_X86_XOP;
24911 for (size_t k = 1; k < 8; k++) {
24912 GemmMicrokernelTester()
24913 .mr(2)
24914 .nr(4)
24915 .kr(2)
24916 .sr(4)
24917 .m(2)
24918 .n(4)
24919 .k(k)
24920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24921 }
24922 }
24923
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8_subtile)24924 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8_subtile) {
24925 TEST_REQUIRES_X86_XOP;
24926 for (size_t k = 1; k < 8; k++) {
24927 for (uint32_t n = 1; n <= 4; n++) {
24928 for (uint32_t m = 1; m <= 2; m++) {
24929 GemmMicrokernelTester()
24930 .mr(2)
24931 .nr(4)
24932 .kr(2)
24933 .sr(4)
24934 .m(m)
24935 .n(n)
24936 .k(k)
24937 .iterations(1)
24938 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24939 }
24940 }
24941 }
24942 }
24943
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8)24944 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8) {
24945 TEST_REQUIRES_X86_XOP;
24946 for (size_t k = 9; k < 16; k++) {
24947 GemmMicrokernelTester()
24948 .mr(2)
24949 .nr(4)
24950 .kr(2)
24951 .sr(4)
24952 .m(2)
24953 .n(4)
24954 .k(k)
24955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24956 }
24957 }
24958
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8_subtile)24959 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8_subtile) {
24960 TEST_REQUIRES_X86_XOP;
24961 for (size_t k = 9; k < 16; k++) {
24962 for (uint32_t n = 1; n <= 4; n++) {
24963 for (uint32_t m = 1; m <= 2; m++) {
24964 GemmMicrokernelTester()
24965 .mr(2)
24966 .nr(4)
24967 .kr(2)
24968 .sr(4)
24969 .m(m)
24970 .n(n)
24971 .k(k)
24972 .iterations(1)
24973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24974 }
24975 }
24976 }
24977 }
24978
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8)24979 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8) {
24980 TEST_REQUIRES_X86_XOP;
24981 for (size_t k = 16; k <= 80; k += 8) {
24982 GemmMicrokernelTester()
24983 .mr(2)
24984 .nr(4)
24985 .kr(2)
24986 .sr(4)
24987 .m(2)
24988 .n(4)
24989 .k(k)
24990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24991 }
24992 }
24993
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8_subtile)24994 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8_subtile) {
24995 TEST_REQUIRES_X86_XOP;
24996 for (size_t k = 16; k <= 80; k += 8) {
24997 for (uint32_t n = 1; n <= 4; n++) {
24998 for (uint32_t m = 1; m <= 2; m++) {
24999 GemmMicrokernelTester()
25000 .mr(2)
25001 .nr(4)
25002 .kr(2)
25003 .sr(4)
25004 .m(m)
25005 .n(n)
25006 .k(k)
25007 .iterations(1)
25008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25009 }
25010 }
25011 }
25012 }
25013
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4)25014 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4) {
25015 TEST_REQUIRES_X86_XOP;
25016 for (uint32_t n = 5; n < 8; n++) {
25017 for (size_t k = 1; k <= 40; k += 9) {
25018 GemmMicrokernelTester()
25019 .mr(2)
25020 .nr(4)
25021 .kr(2)
25022 .sr(4)
25023 .m(2)
25024 .n(n)
25025 .k(k)
25026 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25027 }
25028 }
25029 }
25030
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_strided_cn)25031 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
25032 TEST_REQUIRES_X86_XOP;
25033 for (uint32_t n = 5; n < 8; n++) {
25034 for (size_t k = 1; k <= 40; k += 9) {
25035 GemmMicrokernelTester()
25036 .mr(2)
25037 .nr(4)
25038 .kr(2)
25039 .sr(4)
25040 .m(2)
25041 .n(n)
25042 .k(k)
25043 .cn_stride(7)
25044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25045 }
25046 }
25047 }
25048
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_subtile)25049 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_subtile) {
25050 TEST_REQUIRES_X86_XOP;
25051 for (uint32_t n = 5; n < 8; n++) {
25052 for (size_t k = 1; k <= 40; k += 9) {
25053 for (uint32_t m = 1; m <= 2; m++) {
25054 GemmMicrokernelTester()
25055 .mr(2)
25056 .nr(4)
25057 .kr(2)
25058 .sr(4)
25059 .m(m)
25060 .n(n)
25061 .k(k)
25062 .iterations(1)
25063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25064 }
25065 }
25066 }
25067 }
25068
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4)25069 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4) {
25070 TEST_REQUIRES_X86_XOP;
25071 for (uint32_t n = 8; n <= 12; n += 4) {
25072 for (size_t k = 1; k <= 40; k += 9) {
25073 GemmMicrokernelTester()
25074 .mr(2)
25075 .nr(4)
25076 .kr(2)
25077 .sr(4)
25078 .m(2)
25079 .n(n)
25080 .k(k)
25081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25082 }
25083 }
25084 }
25085
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_strided_cn)25086 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_strided_cn) {
25087 TEST_REQUIRES_X86_XOP;
25088 for (uint32_t n = 8; n <= 12; n += 4) {
25089 for (size_t k = 1; k <= 40; k += 9) {
25090 GemmMicrokernelTester()
25091 .mr(2)
25092 .nr(4)
25093 .kr(2)
25094 .sr(4)
25095 .m(2)
25096 .n(n)
25097 .k(k)
25098 .cn_stride(7)
25099 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25100 }
25101 }
25102 }
25103
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_subtile)25104 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_subtile) {
25105 TEST_REQUIRES_X86_XOP;
25106 for (uint32_t n = 8; n <= 12; n += 4) {
25107 for (size_t k = 1; k <= 40; k += 9) {
25108 for (uint32_t m = 1; m <= 2; m++) {
25109 GemmMicrokernelTester()
25110 .mr(2)
25111 .nr(4)
25112 .kr(2)
25113 .sr(4)
25114 .m(m)
25115 .n(n)
25116 .k(k)
25117 .iterations(1)
25118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25119 }
25120 }
25121 }
25122 }
25123
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel)25124 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel) {
25125 TEST_REQUIRES_X86_XOP;
25126 for (size_t k = 1; k <= 40; k += 9) {
25127 GemmMicrokernelTester()
25128 .mr(2)
25129 .nr(4)
25130 .kr(2)
25131 .sr(4)
25132 .m(2)
25133 .n(4)
25134 .k(k)
25135 .ks(3)
25136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25137 }
25138 }
25139
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel_subtile)25140 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel_subtile) {
25141 TEST_REQUIRES_X86_XOP;
25142 for (size_t k = 1; k <= 40; k += 9) {
25143 for (uint32_t n = 1; n <= 4; n++) {
25144 for (uint32_t m = 1; m <= 2; m++) {
25145 GemmMicrokernelTester()
25146 .mr(2)
25147 .nr(4)
25148 .kr(2)
25149 .sr(4)
25150 .m(m)
25151 .n(n)
25152 .k(k)
25153 .ks(3)
25154 .iterations(1)
25155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25156 }
25157 }
25158 }
25159 }
25160
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_small_kernel)25161 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
25162 TEST_REQUIRES_X86_XOP;
25163 for (uint32_t n = 5; n < 8; n++) {
25164 for (size_t k = 1; k <= 40; k += 9) {
25165 GemmMicrokernelTester()
25166 .mr(2)
25167 .nr(4)
25168 .kr(2)
25169 .sr(4)
25170 .m(2)
25171 .n(n)
25172 .k(k)
25173 .ks(3)
25174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25175 }
25176 }
25177 }
25178
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_small_kernel)25179 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_small_kernel) {
25180 TEST_REQUIRES_X86_XOP;
25181 for (uint32_t n = 8; n <= 12; n += 4) {
25182 for (size_t k = 1; k <= 40; k += 9) {
25183 GemmMicrokernelTester()
25184 .mr(2)
25185 .nr(4)
25186 .kr(2)
25187 .sr(4)
25188 .m(2)
25189 .n(n)
25190 .k(k)
25191 .ks(3)
25192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25193 }
25194 }
25195 }
25196
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm_subtile)25197 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm_subtile) {
25198 TEST_REQUIRES_X86_XOP;
25199 for (size_t k = 1; k <= 40; k += 9) {
25200 for (uint32_t n = 1; n <= 4; n++) {
25201 for (uint32_t m = 1; m <= 2; m++) {
25202 GemmMicrokernelTester()
25203 .mr(2)
25204 .nr(4)
25205 .kr(2)
25206 .sr(4)
25207 .m(m)
25208 .n(n)
25209 .k(k)
25210 .cm_stride(7)
25211 .iterations(1)
25212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25213 }
25214 }
25215 }
25216 }
25217
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,a_offset)25218 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, a_offset) {
25219 TEST_REQUIRES_X86_XOP;
25220 for (size_t k = 1; k <= 40; k += 9) {
25221 GemmMicrokernelTester()
25222 .mr(2)
25223 .nr(4)
25224 .kr(2)
25225 .sr(4)
25226 .m(2)
25227 .n(4)
25228 .k(k)
25229 .ks(3)
25230 .a_offset(83)
25231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25232 }
25233 }
25234
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,zero)25235 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, zero) {
25236 TEST_REQUIRES_X86_XOP;
25237 for (size_t k = 1; k <= 40; k += 9) {
25238 for (uint32_t mz = 0; mz < 2; mz++) {
25239 GemmMicrokernelTester()
25240 .mr(2)
25241 .nr(4)
25242 .kr(2)
25243 .sr(4)
25244 .m(2)
25245 .n(4)
25246 .k(k)
25247 .ks(3)
25248 .a_offset(83)
25249 .zero_index(mz)
25250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25251 }
25252 }
25253 }
25254
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmin)25255 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmin) {
25256 TEST_REQUIRES_X86_XOP;
25257 GemmMicrokernelTester()
25258 .mr(2)
25259 .nr(4)
25260 .kr(2)
25261 .sr(4)
25262 .m(2)
25263 .n(4)
25264 .k(8)
25265 .qmin(128)
25266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25267 }
25268
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmax)25269 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmax) {
25270 TEST_REQUIRES_X86_XOP;
25271 GemmMicrokernelTester()
25272 .mr(2)
25273 .nr(4)
25274 .kr(2)
25275 .sr(4)
25276 .m(2)
25277 .n(4)
25278 .k(8)
25279 .qmax(128)
25280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25281 }
25282
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm)25283 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm) {
25284 TEST_REQUIRES_X86_XOP;
25285 GemmMicrokernelTester()
25286 .mr(2)
25287 .nr(4)
25288 .kr(2)
25289 .sr(4)
25290 .m(2)
25291 .n(4)
25292 .k(8)
25293 .cm_stride(7)
25294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25295 }
25296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25297
25298
25299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8)25300 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8) {
25301 TEST_REQUIRES_X86_AVX;
25302 GemmMicrokernelTester()
25303 .mr(4)
25304 .nr(4)
25305 .kr(2)
25306 .sr(4)
25307 .m(4)
25308 .n(4)
25309 .k(8)
25310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25311 }
25312
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cn)25313 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cn) {
25314 TEST_REQUIRES_X86_AVX;
25315 GemmMicrokernelTester()
25316 .mr(4)
25317 .nr(4)
25318 .kr(2)
25319 .sr(4)
25320 .m(4)
25321 .n(4)
25322 .k(8)
25323 .cn_stride(7)
25324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25325 }
25326
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile)25327 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile) {
25328 TEST_REQUIRES_X86_AVX;
25329 for (uint32_t n = 1; n <= 4; n++) {
25330 for (uint32_t m = 1; m <= 4; m++) {
25331 GemmMicrokernelTester()
25332 .mr(4)
25333 .nr(4)
25334 .kr(2)
25335 .sr(4)
25336 .m(m)
25337 .n(n)
25338 .k(8)
25339 .iterations(1)
25340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25341 }
25342 }
25343 }
25344
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_m)25345 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
25346 TEST_REQUIRES_X86_AVX;
25347 for (uint32_t m = 1; m <= 4; m++) {
25348 GemmMicrokernelTester()
25349 .mr(4)
25350 .nr(4)
25351 .kr(2)
25352 .sr(4)
25353 .m(m)
25354 .n(4)
25355 .k(8)
25356 .iterations(1)
25357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25358 }
25359 }
25360
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_n)25361 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
25362 TEST_REQUIRES_X86_AVX;
25363 for (uint32_t n = 1; n <= 4; n++) {
25364 GemmMicrokernelTester()
25365 .mr(4)
25366 .nr(4)
25367 .kr(2)
25368 .sr(4)
25369 .m(4)
25370 .n(n)
25371 .k(8)
25372 .iterations(1)
25373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25374 }
25375 }
25376
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8)25377 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8) {
25378 TEST_REQUIRES_X86_AVX;
25379 for (size_t k = 1; k < 8; k++) {
25380 GemmMicrokernelTester()
25381 .mr(4)
25382 .nr(4)
25383 .kr(2)
25384 .sr(4)
25385 .m(4)
25386 .n(4)
25387 .k(k)
25388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25389 }
25390 }
25391
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8_subtile)25392 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8_subtile) {
25393 TEST_REQUIRES_X86_AVX;
25394 for (size_t k = 1; k < 8; k++) {
25395 for (uint32_t n = 1; n <= 4; n++) {
25396 for (uint32_t m = 1; m <= 4; m++) {
25397 GemmMicrokernelTester()
25398 .mr(4)
25399 .nr(4)
25400 .kr(2)
25401 .sr(4)
25402 .m(m)
25403 .n(n)
25404 .k(k)
25405 .iterations(1)
25406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25407 }
25408 }
25409 }
25410 }
25411
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8)25412 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8) {
25413 TEST_REQUIRES_X86_AVX;
25414 for (size_t k = 9; k < 16; k++) {
25415 GemmMicrokernelTester()
25416 .mr(4)
25417 .nr(4)
25418 .kr(2)
25419 .sr(4)
25420 .m(4)
25421 .n(4)
25422 .k(k)
25423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25424 }
25425 }
25426
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8_subtile)25427 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8_subtile) {
25428 TEST_REQUIRES_X86_AVX;
25429 for (size_t k = 9; k < 16; k++) {
25430 for (uint32_t n = 1; n <= 4; n++) {
25431 for (uint32_t m = 1; m <= 4; m++) {
25432 GemmMicrokernelTester()
25433 .mr(4)
25434 .nr(4)
25435 .kr(2)
25436 .sr(4)
25437 .m(m)
25438 .n(n)
25439 .k(k)
25440 .iterations(1)
25441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25442 }
25443 }
25444 }
25445 }
25446
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8)25447 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8) {
25448 TEST_REQUIRES_X86_AVX;
25449 for (size_t k = 16; k <= 80; k += 8) {
25450 GemmMicrokernelTester()
25451 .mr(4)
25452 .nr(4)
25453 .kr(2)
25454 .sr(4)
25455 .m(4)
25456 .n(4)
25457 .k(k)
25458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25459 }
25460 }
25461
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8_subtile)25462 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8_subtile) {
25463 TEST_REQUIRES_X86_AVX;
25464 for (size_t k = 16; k <= 80; k += 8) {
25465 for (uint32_t n = 1; n <= 4; n++) {
25466 for (uint32_t m = 1; m <= 4; m++) {
25467 GemmMicrokernelTester()
25468 .mr(4)
25469 .nr(4)
25470 .kr(2)
25471 .sr(4)
25472 .m(m)
25473 .n(n)
25474 .k(k)
25475 .iterations(1)
25476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25477 }
25478 }
25479 }
25480 }
25481
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4)25482 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4) {
25483 TEST_REQUIRES_X86_AVX;
25484 for (uint32_t n = 5; n < 8; n++) {
25485 for (size_t k = 1; k <= 40; k += 9) {
25486 GemmMicrokernelTester()
25487 .mr(4)
25488 .nr(4)
25489 .kr(2)
25490 .sr(4)
25491 .m(4)
25492 .n(n)
25493 .k(k)
25494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25495 }
25496 }
25497 }
25498
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_strided_cn)25499 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
25500 TEST_REQUIRES_X86_AVX;
25501 for (uint32_t n = 5; n < 8; n++) {
25502 for (size_t k = 1; k <= 40; k += 9) {
25503 GemmMicrokernelTester()
25504 .mr(4)
25505 .nr(4)
25506 .kr(2)
25507 .sr(4)
25508 .m(4)
25509 .n(n)
25510 .k(k)
25511 .cn_stride(7)
25512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25513 }
25514 }
25515 }
25516
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_subtile)25517 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_subtile) {
25518 TEST_REQUIRES_X86_AVX;
25519 for (uint32_t n = 5; n < 8; n++) {
25520 for (size_t k = 1; k <= 40; k += 9) {
25521 for (uint32_t m = 1; m <= 4; m++) {
25522 GemmMicrokernelTester()
25523 .mr(4)
25524 .nr(4)
25525 .kr(2)
25526 .sr(4)
25527 .m(m)
25528 .n(n)
25529 .k(k)
25530 .iterations(1)
25531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25532 }
25533 }
25534 }
25535 }
25536
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4)25537 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4) {
25538 TEST_REQUIRES_X86_AVX;
25539 for (uint32_t n = 8; n <= 12; n += 4) {
25540 for (size_t k = 1; k <= 40; k += 9) {
25541 GemmMicrokernelTester()
25542 .mr(4)
25543 .nr(4)
25544 .kr(2)
25545 .sr(4)
25546 .m(4)
25547 .n(n)
25548 .k(k)
25549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25550 }
25551 }
25552 }
25553
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_strided_cn)25554 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_strided_cn) {
25555 TEST_REQUIRES_X86_AVX;
25556 for (uint32_t n = 8; n <= 12; n += 4) {
25557 for (size_t k = 1; k <= 40; k += 9) {
25558 GemmMicrokernelTester()
25559 .mr(4)
25560 .nr(4)
25561 .kr(2)
25562 .sr(4)
25563 .m(4)
25564 .n(n)
25565 .k(k)
25566 .cn_stride(7)
25567 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25568 }
25569 }
25570 }
25571
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_subtile)25572 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_subtile) {
25573 TEST_REQUIRES_X86_AVX;
25574 for (uint32_t n = 8; n <= 12; n += 4) {
25575 for (size_t k = 1; k <= 40; k += 9) {
25576 for (uint32_t m = 1; m <= 4; m++) {
25577 GemmMicrokernelTester()
25578 .mr(4)
25579 .nr(4)
25580 .kr(2)
25581 .sr(4)
25582 .m(m)
25583 .n(n)
25584 .k(k)
25585 .iterations(1)
25586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25587 }
25588 }
25589 }
25590 }
25591
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel)25592 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel) {
25593 TEST_REQUIRES_X86_AVX;
25594 for (size_t k = 1; k <= 40; k += 9) {
25595 GemmMicrokernelTester()
25596 .mr(4)
25597 .nr(4)
25598 .kr(2)
25599 .sr(4)
25600 .m(4)
25601 .n(4)
25602 .k(k)
25603 .ks(3)
25604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25605 }
25606 }
25607
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel_subtile)25608 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel_subtile) {
25609 TEST_REQUIRES_X86_AVX;
25610 for (size_t k = 1; k <= 40; k += 9) {
25611 for (uint32_t n = 1; n <= 4; n++) {
25612 for (uint32_t m = 1; m <= 4; m++) {
25613 GemmMicrokernelTester()
25614 .mr(4)
25615 .nr(4)
25616 .kr(2)
25617 .sr(4)
25618 .m(m)
25619 .n(n)
25620 .k(k)
25621 .ks(3)
25622 .iterations(1)
25623 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25624 }
25625 }
25626 }
25627 }
25628
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_small_kernel)25629 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
25630 TEST_REQUIRES_X86_AVX;
25631 for (uint32_t n = 5; n < 8; n++) {
25632 for (size_t k = 1; k <= 40; k += 9) {
25633 GemmMicrokernelTester()
25634 .mr(4)
25635 .nr(4)
25636 .kr(2)
25637 .sr(4)
25638 .m(4)
25639 .n(n)
25640 .k(k)
25641 .ks(3)
25642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25643 }
25644 }
25645 }
25646
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_small_kernel)25647 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_small_kernel) {
25648 TEST_REQUIRES_X86_AVX;
25649 for (uint32_t n = 8; n <= 12; n += 4) {
25650 for (size_t k = 1; k <= 40; k += 9) {
25651 GemmMicrokernelTester()
25652 .mr(4)
25653 .nr(4)
25654 .kr(2)
25655 .sr(4)
25656 .m(4)
25657 .n(n)
25658 .k(k)
25659 .ks(3)
25660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25661 }
25662 }
25663 }
25664
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm_subtile)25665 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm_subtile) {
25666 TEST_REQUIRES_X86_AVX;
25667 for (size_t k = 1; k <= 40; k += 9) {
25668 for (uint32_t n = 1; n <= 4; n++) {
25669 for (uint32_t m = 1; m <= 4; m++) {
25670 GemmMicrokernelTester()
25671 .mr(4)
25672 .nr(4)
25673 .kr(2)
25674 .sr(4)
25675 .m(m)
25676 .n(n)
25677 .k(k)
25678 .cm_stride(7)
25679 .iterations(1)
25680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25681 }
25682 }
25683 }
25684 }
25685
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,a_offset)25686 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, a_offset) {
25687 TEST_REQUIRES_X86_AVX;
25688 for (size_t k = 1; k <= 40; k += 9) {
25689 GemmMicrokernelTester()
25690 .mr(4)
25691 .nr(4)
25692 .kr(2)
25693 .sr(4)
25694 .m(4)
25695 .n(4)
25696 .k(k)
25697 .ks(3)
25698 .a_offset(163)
25699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25700 }
25701 }
25702
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,zero)25703 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, zero) {
25704 TEST_REQUIRES_X86_AVX;
25705 for (size_t k = 1; k <= 40; k += 9) {
25706 for (uint32_t mz = 0; mz < 4; mz++) {
25707 GemmMicrokernelTester()
25708 .mr(4)
25709 .nr(4)
25710 .kr(2)
25711 .sr(4)
25712 .m(4)
25713 .n(4)
25714 .k(k)
25715 .ks(3)
25716 .a_offset(163)
25717 .zero_index(mz)
25718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25719 }
25720 }
25721 }
25722
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmin)25723 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmin) {
25724 TEST_REQUIRES_X86_AVX;
25725 GemmMicrokernelTester()
25726 .mr(4)
25727 .nr(4)
25728 .kr(2)
25729 .sr(4)
25730 .m(4)
25731 .n(4)
25732 .k(8)
25733 .qmin(128)
25734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25735 }
25736
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmax)25737 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmax) {
25738 TEST_REQUIRES_X86_AVX;
25739 GemmMicrokernelTester()
25740 .mr(4)
25741 .nr(4)
25742 .kr(2)
25743 .sr(4)
25744 .m(4)
25745 .n(4)
25746 .k(8)
25747 .qmax(128)
25748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25749 }
25750
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm)25751 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm) {
25752 TEST_REQUIRES_X86_AVX;
25753 GemmMicrokernelTester()
25754 .mr(4)
25755 .nr(4)
25756 .kr(2)
25757 .sr(4)
25758 .m(4)
25759 .n(4)
25760 .k(8)
25761 .cm_stride(7)
25762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25763 }
25764 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25765
25766
25767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8)25768 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8) {
25769 TEST_REQUIRES_X86_XOP;
25770 GemmMicrokernelTester()
25771 .mr(4)
25772 .nr(4)
25773 .kr(2)
25774 .sr(4)
25775 .m(4)
25776 .n(4)
25777 .k(8)
25778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25779 }
25780
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cn)25781 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cn) {
25782 TEST_REQUIRES_X86_XOP;
25783 GemmMicrokernelTester()
25784 .mr(4)
25785 .nr(4)
25786 .kr(2)
25787 .sr(4)
25788 .m(4)
25789 .n(4)
25790 .k(8)
25791 .cn_stride(7)
25792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25793 }
25794
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile)25795 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile) {
25796 TEST_REQUIRES_X86_XOP;
25797 for (uint32_t n = 1; n <= 4; n++) {
25798 for (uint32_t m = 1; m <= 4; m++) {
25799 GemmMicrokernelTester()
25800 .mr(4)
25801 .nr(4)
25802 .kr(2)
25803 .sr(4)
25804 .m(m)
25805 .n(n)
25806 .k(8)
25807 .iterations(1)
25808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25809 }
25810 }
25811 }
25812
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_m)25813 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
25814 TEST_REQUIRES_X86_XOP;
25815 for (uint32_t m = 1; m <= 4; m++) {
25816 GemmMicrokernelTester()
25817 .mr(4)
25818 .nr(4)
25819 .kr(2)
25820 .sr(4)
25821 .m(m)
25822 .n(4)
25823 .k(8)
25824 .iterations(1)
25825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25826 }
25827 }
25828
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_n)25829 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
25830 TEST_REQUIRES_X86_XOP;
25831 for (uint32_t n = 1; n <= 4; n++) {
25832 GemmMicrokernelTester()
25833 .mr(4)
25834 .nr(4)
25835 .kr(2)
25836 .sr(4)
25837 .m(4)
25838 .n(n)
25839 .k(8)
25840 .iterations(1)
25841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25842 }
25843 }
25844
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8)25845 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8) {
25846 TEST_REQUIRES_X86_XOP;
25847 for (size_t k = 1; k < 8; k++) {
25848 GemmMicrokernelTester()
25849 .mr(4)
25850 .nr(4)
25851 .kr(2)
25852 .sr(4)
25853 .m(4)
25854 .n(4)
25855 .k(k)
25856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857 }
25858 }
25859
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8_subtile)25860 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8_subtile) {
25861 TEST_REQUIRES_X86_XOP;
25862 for (size_t k = 1; k < 8; k++) {
25863 for (uint32_t n = 1; n <= 4; n++) {
25864 for (uint32_t m = 1; m <= 4; m++) {
25865 GemmMicrokernelTester()
25866 .mr(4)
25867 .nr(4)
25868 .kr(2)
25869 .sr(4)
25870 .m(m)
25871 .n(n)
25872 .k(k)
25873 .iterations(1)
25874 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25875 }
25876 }
25877 }
25878 }
25879
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8)25880 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8) {
25881 TEST_REQUIRES_X86_XOP;
25882 for (size_t k = 9; k < 16; k++) {
25883 GemmMicrokernelTester()
25884 .mr(4)
25885 .nr(4)
25886 .kr(2)
25887 .sr(4)
25888 .m(4)
25889 .n(4)
25890 .k(k)
25891 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25892 }
25893 }
25894
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8_subtile)25895 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8_subtile) {
25896 TEST_REQUIRES_X86_XOP;
25897 for (size_t k = 9; k < 16; k++) {
25898 for (uint32_t n = 1; n <= 4; n++) {
25899 for (uint32_t m = 1; m <= 4; m++) {
25900 GemmMicrokernelTester()
25901 .mr(4)
25902 .nr(4)
25903 .kr(2)
25904 .sr(4)
25905 .m(m)
25906 .n(n)
25907 .k(k)
25908 .iterations(1)
25909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25910 }
25911 }
25912 }
25913 }
25914
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8)25915 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8) {
25916 TEST_REQUIRES_X86_XOP;
25917 for (size_t k = 16; k <= 80; k += 8) {
25918 GemmMicrokernelTester()
25919 .mr(4)
25920 .nr(4)
25921 .kr(2)
25922 .sr(4)
25923 .m(4)
25924 .n(4)
25925 .k(k)
25926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25927 }
25928 }
25929
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8_subtile)25930 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8_subtile) {
25931 TEST_REQUIRES_X86_XOP;
25932 for (size_t k = 16; k <= 80; k += 8) {
25933 for (uint32_t n = 1; n <= 4; n++) {
25934 for (uint32_t m = 1; m <= 4; m++) {
25935 GemmMicrokernelTester()
25936 .mr(4)
25937 .nr(4)
25938 .kr(2)
25939 .sr(4)
25940 .m(m)
25941 .n(n)
25942 .k(k)
25943 .iterations(1)
25944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25945 }
25946 }
25947 }
25948 }
25949
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4)25950 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4) {
25951 TEST_REQUIRES_X86_XOP;
25952 for (uint32_t n = 5; n < 8; n++) {
25953 for (size_t k = 1; k <= 40; k += 9) {
25954 GemmMicrokernelTester()
25955 .mr(4)
25956 .nr(4)
25957 .kr(2)
25958 .sr(4)
25959 .m(4)
25960 .n(n)
25961 .k(k)
25962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25963 }
25964 }
25965 }
25966
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_strided_cn)25967 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
25968 TEST_REQUIRES_X86_XOP;
25969 for (uint32_t n = 5; n < 8; n++) {
25970 for (size_t k = 1; k <= 40; k += 9) {
25971 GemmMicrokernelTester()
25972 .mr(4)
25973 .nr(4)
25974 .kr(2)
25975 .sr(4)
25976 .m(4)
25977 .n(n)
25978 .k(k)
25979 .cn_stride(7)
25980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25981 }
25982 }
25983 }
25984
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_subtile)25985 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_subtile) {
25986 TEST_REQUIRES_X86_XOP;
25987 for (uint32_t n = 5; n < 8; n++) {
25988 for (size_t k = 1; k <= 40; k += 9) {
25989 for (uint32_t m = 1; m <= 4; m++) {
25990 GemmMicrokernelTester()
25991 .mr(4)
25992 .nr(4)
25993 .kr(2)
25994 .sr(4)
25995 .m(m)
25996 .n(n)
25997 .k(k)
25998 .iterations(1)
25999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26000 }
26001 }
26002 }
26003 }
26004
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4)26005 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4) {
26006 TEST_REQUIRES_X86_XOP;
26007 for (uint32_t n = 8; n <= 12; n += 4) {
26008 for (size_t k = 1; k <= 40; k += 9) {
26009 GemmMicrokernelTester()
26010 .mr(4)
26011 .nr(4)
26012 .kr(2)
26013 .sr(4)
26014 .m(4)
26015 .n(n)
26016 .k(k)
26017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26018 }
26019 }
26020 }
26021
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_strided_cn)26022 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26023 TEST_REQUIRES_X86_XOP;
26024 for (uint32_t n = 8; n <= 12; n += 4) {
26025 for (size_t k = 1; k <= 40; k += 9) {
26026 GemmMicrokernelTester()
26027 .mr(4)
26028 .nr(4)
26029 .kr(2)
26030 .sr(4)
26031 .m(4)
26032 .n(n)
26033 .k(k)
26034 .cn_stride(7)
26035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26036 }
26037 }
26038 }
26039
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_subtile)26040 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_subtile) {
26041 TEST_REQUIRES_X86_XOP;
26042 for (uint32_t n = 8; n <= 12; n += 4) {
26043 for (size_t k = 1; k <= 40; k += 9) {
26044 for (uint32_t m = 1; m <= 4; m++) {
26045 GemmMicrokernelTester()
26046 .mr(4)
26047 .nr(4)
26048 .kr(2)
26049 .sr(4)
26050 .m(m)
26051 .n(n)
26052 .k(k)
26053 .iterations(1)
26054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26055 }
26056 }
26057 }
26058 }
26059
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel)26060 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel) {
26061 TEST_REQUIRES_X86_XOP;
26062 for (size_t k = 1; k <= 40; k += 9) {
26063 GemmMicrokernelTester()
26064 .mr(4)
26065 .nr(4)
26066 .kr(2)
26067 .sr(4)
26068 .m(4)
26069 .n(4)
26070 .k(k)
26071 .ks(3)
26072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26073 }
26074 }
26075
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel_subtile)26076 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel_subtile) {
26077 TEST_REQUIRES_X86_XOP;
26078 for (size_t k = 1; k <= 40; k += 9) {
26079 for (uint32_t n = 1; n <= 4; n++) {
26080 for (uint32_t m = 1; m <= 4; m++) {
26081 GemmMicrokernelTester()
26082 .mr(4)
26083 .nr(4)
26084 .kr(2)
26085 .sr(4)
26086 .m(m)
26087 .n(n)
26088 .k(k)
26089 .ks(3)
26090 .iterations(1)
26091 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092 }
26093 }
26094 }
26095 }
26096
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_small_kernel)26097 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
26098 TEST_REQUIRES_X86_XOP;
26099 for (uint32_t n = 5; n < 8; n++) {
26100 for (size_t k = 1; k <= 40; k += 9) {
26101 GemmMicrokernelTester()
26102 .mr(4)
26103 .nr(4)
26104 .kr(2)
26105 .sr(4)
26106 .m(4)
26107 .n(n)
26108 .k(k)
26109 .ks(3)
26110 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26111 }
26112 }
26113 }
26114
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_small_kernel)26115 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_small_kernel) {
26116 TEST_REQUIRES_X86_XOP;
26117 for (uint32_t n = 8; n <= 12; n += 4) {
26118 for (size_t k = 1; k <= 40; k += 9) {
26119 GemmMicrokernelTester()
26120 .mr(4)
26121 .nr(4)
26122 .kr(2)
26123 .sr(4)
26124 .m(4)
26125 .n(n)
26126 .k(k)
26127 .ks(3)
26128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26129 }
26130 }
26131 }
26132
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm_subtile)26133 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm_subtile) {
26134 TEST_REQUIRES_X86_XOP;
26135 for (size_t k = 1; k <= 40; k += 9) {
26136 for (uint32_t n = 1; n <= 4; n++) {
26137 for (uint32_t m = 1; m <= 4; m++) {
26138 GemmMicrokernelTester()
26139 .mr(4)
26140 .nr(4)
26141 .kr(2)
26142 .sr(4)
26143 .m(m)
26144 .n(n)
26145 .k(k)
26146 .cm_stride(7)
26147 .iterations(1)
26148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26149 }
26150 }
26151 }
26152 }
26153
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,a_offset)26154 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, a_offset) {
26155 TEST_REQUIRES_X86_XOP;
26156 for (size_t k = 1; k <= 40; k += 9) {
26157 GemmMicrokernelTester()
26158 .mr(4)
26159 .nr(4)
26160 .kr(2)
26161 .sr(4)
26162 .m(4)
26163 .n(4)
26164 .k(k)
26165 .ks(3)
26166 .a_offset(163)
26167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26168 }
26169 }
26170
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,zero)26171 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, zero) {
26172 TEST_REQUIRES_X86_XOP;
26173 for (size_t k = 1; k <= 40; k += 9) {
26174 for (uint32_t mz = 0; mz < 4; mz++) {
26175 GemmMicrokernelTester()
26176 .mr(4)
26177 .nr(4)
26178 .kr(2)
26179 .sr(4)
26180 .m(4)
26181 .n(4)
26182 .k(k)
26183 .ks(3)
26184 .a_offset(163)
26185 .zero_index(mz)
26186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26187 }
26188 }
26189 }
26190
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmin)26191 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmin) {
26192 TEST_REQUIRES_X86_XOP;
26193 GemmMicrokernelTester()
26194 .mr(4)
26195 .nr(4)
26196 .kr(2)
26197 .sr(4)
26198 .m(4)
26199 .n(4)
26200 .k(8)
26201 .qmin(128)
26202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26203 }
26204
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmax)26205 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmax) {
26206 TEST_REQUIRES_X86_XOP;
26207 GemmMicrokernelTester()
26208 .mr(4)
26209 .nr(4)
26210 .kr(2)
26211 .sr(4)
26212 .m(4)
26213 .n(4)
26214 .k(8)
26215 .qmax(128)
26216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26217 }
26218
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm)26219 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm) {
26220 TEST_REQUIRES_X86_XOP;
26221 GemmMicrokernelTester()
26222 .mr(4)
26223 .nr(4)
26224 .kr(2)
26225 .sr(4)
26226 .m(4)
26227 .n(4)
26228 .k(8)
26229 .cm_stride(7)
26230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26231 }
26232 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26233
26234
26235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8)26236 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8) {
26237 TEST_REQUIRES_X86_SSE41;
26238 GemmMicrokernelTester()
26239 .mr(1)
26240 .nr(4)
26241 .kr(2)
26242 .sr(4)
26243 .m(1)
26244 .n(4)
26245 .k(8)
26246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26247 }
26248
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cn)26249 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cn) {
26250 TEST_REQUIRES_X86_SSE41;
26251 GemmMicrokernelTester()
26252 .mr(1)
26253 .nr(4)
26254 .kr(2)
26255 .sr(4)
26256 .m(1)
26257 .n(4)
26258 .k(8)
26259 .cn_stride(7)
26260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26261 }
26262
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile)26263 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile) {
26264 TEST_REQUIRES_X86_SSE41;
26265 for (uint32_t n = 1; n <= 4; n++) {
26266 for (uint32_t m = 1; m <= 1; m++) {
26267 GemmMicrokernelTester()
26268 .mr(1)
26269 .nr(4)
26270 .kr(2)
26271 .sr(4)
26272 .m(m)
26273 .n(n)
26274 .k(8)
26275 .iterations(1)
26276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277 }
26278 }
26279 }
26280
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_m)26281 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
26282 TEST_REQUIRES_X86_SSE41;
26283 for (uint32_t m = 1; m <= 1; m++) {
26284 GemmMicrokernelTester()
26285 .mr(1)
26286 .nr(4)
26287 .kr(2)
26288 .sr(4)
26289 .m(m)
26290 .n(4)
26291 .k(8)
26292 .iterations(1)
26293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26294 }
26295 }
26296
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_n)26297 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
26298 TEST_REQUIRES_X86_SSE41;
26299 for (uint32_t n = 1; n <= 4; n++) {
26300 GemmMicrokernelTester()
26301 .mr(1)
26302 .nr(4)
26303 .kr(2)
26304 .sr(4)
26305 .m(1)
26306 .n(n)
26307 .k(8)
26308 .iterations(1)
26309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26310 }
26311 }
26312
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8)26313 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8) {
26314 TEST_REQUIRES_X86_SSE41;
26315 for (size_t k = 1; k < 8; k++) {
26316 GemmMicrokernelTester()
26317 .mr(1)
26318 .nr(4)
26319 .kr(2)
26320 .sr(4)
26321 .m(1)
26322 .n(4)
26323 .k(k)
26324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26325 }
26326 }
26327
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8_subtile)26328 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8_subtile) {
26329 TEST_REQUIRES_X86_SSE41;
26330 for (size_t k = 1; k < 8; k++) {
26331 for (uint32_t n = 1; n <= 4; n++) {
26332 for (uint32_t m = 1; m <= 1; m++) {
26333 GemmMicrokernelTester()
26334 .mr(1)
26335 .nr(4)
26336 .kr(2)
26337 .sr(4)
26338 .m(m)
26339 .n(n)
26340 .k(k)
26341 .iterations(1)
26342 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26343 }
26344 }
26345 }
26346 }
26347
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8)26348 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8) {
26349 TEST_REQUIRES_X86_SSE41;
26350 for (size_t k = 9; k < 16; k++) {
26351 GemmMicrokernelTester()
26352 .mr(1)
26353 .nr(4)
26354 .kr(2)
26355 .sr(4)
26356 .m(1)
26357 .n(4)
26358 .k(k)
26359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26360 }
26361 }
26362
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8_subtile)26363 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8_subtile) {
26364 TEST_REQUIRES_X86_SSE41;
26365 for (size_t k = 9; k < 16; k++) {
26366 for (uint32_t n = 1; n <= 4; n++) {
26367 for (uint32_t m = 1; m <= 1; m++) {
26368 GemmMicrokernelTester()
26369 .mr(1)
26370 .nr(4)
26371 .kr(2)
26372 .sr(4)
26373 .m(m)
26374 .n(n)
26375 .k(k)
26376 .iterations(1)
26377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26378 }
26379 }
26380 }
26381 }
26382
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8)26383 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8) {
26384 TEST_REQUIRES_X86_SSE41;
26385 for (size_t k = 16; k <= 80; k += 8) {
26386 GemmMicrokernelTester()
26387 .mr(1)
26388 .nr(4)
26389 .kr(2)
26390 .sr(4)
26391 .m(1)
26392 .n(4)
26393 .k(k)
26394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26395 }
26396 }
26397
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8_subtile)26398 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8_subtile) {
26399 TEST_REQUIRES_X86_SSE41;
26400 for (size_t k = 16; k <= 80; k += 8) {
26401 for (uint32_t n = 1; n <= 4; n++) {
26402 for (uint32_t m = 1; m <= 1; m++) {
26403 GemmMicrokernelTester()
26404 .mr(1)
26405 .nr(4)
26406 .kr(2)
26407 .sr(4)
26408 .m(m)
26409 .n(n)
26410 .k(k)
26411 .iterations(1)
26412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26413 }
26414 }
26415 }
26416 }
26417
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4)26418 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4) {
26419 TEST_REQUIRES_X86_SSE41;
26420 for (uint32_t n = 5; n < 8; n++) {
26421 for (size_t k = 1; k <= 40; k += 9) {
26422 GemmMicrokernelTester()
26423 .mr(1)
26424 .nr(4)
26425 .kr(2)
26426 .sr(4)
26427 .m(1)
26428 .n(n)
26429 .k(k)
26430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26431 }
26432 }
26433 }
26434
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_strided_cn)26435 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
26436 TEST_REQUIRES_X86_SSE41;
26437 for (uint32_t n = 5; n < 8; n++) {
26438 for (size_t k = 1; k <= 40; k += 9) {
26439 GemmMicrokernelTester()
26440 .mr(1)
26441 .nr(4)
26442 .kr(2)
26443 .sr(4)
26444 .m(1)
26445 .n(n)
26446 .k(k)
26447 .cn_stride(7)
26448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26449 }
26450 }
26451 }
26452
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_subtile)26453 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_subtile) {
26454 TEST_REQUIRES_X86_SSE41;
26455 for (uint32_t n = 5; n < 8; n++) {
26456 for (size_t k = 1; k <= 40; k += 9) {
26457 for (uint32_t m = 1; m <= 1; m++) {
26458 GemmMicrokernelTester()
26459 .mr(1)
26460 .nr(4)
26461 .kr(2)
26462 .sr(4)
26463 .m(m)
26464 .n(n)
26465 .k(k)
26466 .iterations(1)
26467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26468 }
26469 }
26470 }
26471 }
26472
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4)26473 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4) {
26474 TEST_REQUIRES_X86_SSE41;
26475 for (uint32_t n = 8; n <= 12; n += 4) {
26476 for (size_t k = 1; k <= 40; k += 9) {
26477 GemmMicrokernelTester()
26478 .mr(1)
26479 .nr(4)
26480 .kr(2)
26481 .sr(4)
26482 .m(1)
26483 .n(n)
26484 .k(k)
26485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26486 }
26487 }
26488 }
26489
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_strided_cn)26490 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
26491 TEST_REQUIRES_X86_SSE41;
26492 for (uint32_t n = 8; n <= 12; n += 4) {
26493 for (size_t k = 1; k <= 40; k += 9) {
26494 GemmMicrokernelTester()
26495 .mr(1)
26496 .nr(4)
26497 .kr(2)
26498 .sr(4)
26499 .m(1)
26500 .n(n)
26501 .k(k)
26502 .cn_stride(7)
26503 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26504 }
26505 }
26506 }
26507
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_subtile)26508 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_subtile) {
26509 TEST_REQUIRES_X86_SSE41;
26510 for (uint32_t n = 8; n <= 12; n += 4) {
26511 for (size_t k = 1; k <= 40; k += 9) {
26512 for (uint32_t m = 1; m <= 1; m++) {
26513 GemmMicrokernelTester()
26514 .mr(1)
26515 .nr(4)
26516 .kr(2)
26517 .sr(4)
26518 .m(m)
26519 .n(n)
26520 .k(k)
26521 .iterations(1)
26522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26523 }
26524 }
26525 }
26526 }
26527
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel)26528 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel) {
26529 TEST_REQUIRES_X86_SSE41;
26530 for (size_t k = 1; k <= 40; k += 9) {
26531 GemmMicrokernelTester()
26532 .mr(1)
26533 .nr(4)
26534 .kr(2)
26535 .sr(4)
26536 .m(1)
26537 .n(4)
26538 .k(k)
26539 .ks(3)
26540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26541 }
26542 }
26543
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel_subtile)26544 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel_subtile) {
26545 TEST_REQUIRES_X86_SSE41;
26546 for (size_t k = 1; k <= 40; k += 9) {
26547 for (uint32_t n = 1; n <= 4; n++) {
26548 for (uint32_t m = 1; m <= 1; m++) {
26549 GemmMicrokernelTester()
26550 .mr(1)
26551 .nr(4)
26552 .kr(2)
26553 .sr(4)
26554 .m(m)
26555 .n(n)
26556 .k(k)
26557 .ks(3)
26558 .iterations(1)
26559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26560 }
26561 }
26562 }
26563 }
26564
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_small_kernel)26565 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
26566 TEST_REQUIRES_X86_SSE41;
26567 for (uint32_t n = 5; n < 8; n++) {
26568 for (size_t k = 1; k <= 40; k += 9) {
26569 GemmMicrokernelTester()
26570 .mr(1)
26571 .nr(4)
26572 .kr(2)
26573 .sr(4)
26574 .m(1)
26575 .n(n)
26576 .k(k)
26577 .ks(3)
26578 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579 }
26580 }
26581 }
26582
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_small_kernel)26583 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
26584 TEST_REQUIRES_X86_SSE41;
26585 for (uint32_t n = 8; n <= 12; n += 4) {
26586 for (size_t k = 1; k <= 40; k += 9) {
26587 GemmMicrokernelTester()
26588 .mr(1)
26589 .nr(4)
26590 .kr(2)
26591 .sr(4)
26592 .m(1)
26593 .n(n)
26594 .k(k)
26595 .ks(3)
26596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26597 }
26598 }
26599 }
26600
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm_subtile)26601 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm_subtile) {
26602 TEST_REQUIRES_X86_SSE41;
26603 for (size_t k = 1; k <= 40; k += 9) {
26604 for (uint32_t n = 1; n <= 4; n++) {
26605 for (uint32_t m = 1; m <= 1; m++) {
26606 GemmMicrokernelTester()
26607 .mr(1)
26608 .nr(4)
26609 .kr(2)
26610 .sr(4)
26611 .m(m)
26612 .n(n)
26613 .k(k)
26614 .cm_stride(7)
26615 .iterations(1)
26616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26617 }
26618 }
26619 }
26620 }
26621
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,a_offset)26622 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, a_offset) {
26623 TEST_REQUIRES_X86_SSE41;
26624 for (size_t k = 1; k <= 40; k += 9) {
26625 GemmMicrokernelTester()
26626 .mr(1)
26627 .nr(4)
26628 .kr(2)
26629 .sr(4)
26630 .m(1)
26631 .n(4)
26632 .k(k)
26633 .ks(3)
26634 .a_offset(43)
26635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26636 }
26637 }
26638
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,zero)26639 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, zero) {
26640 TEST_REQUIRES_X86_SSE41;
26641 for (size_t k = 1; k <= 40; k += 9) {
26642 for (uint32_t mz = 0; mz < 1; mz++) {
26643 GemmMicrokernelTester()
26644 .mr(1)
26645 .nr(4)
26646 .kr(2)
26647 .sr(4)
26648 .m(1)
26649 .n(4)
26650 .k(k)
26651 .ks(3)
26652 .a_offset(43)
26653 .zero_index(mz)
26654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26655 }
26656 }
26657 }
26658
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmin)26659 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmin) {
26660 TEST_REQUIRES_X86_SSE41;
26661 GemmMicrokernelTester()
26662 .mr(1)
26663 .nr(4)
26664 .kr(2)
26665 .sr(4)
26666 .m(1)
26667 .n(4)
26668 .k(8)
26669 .qmin(128)
26670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26671 }
26672
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmax)26673 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmax) {
26674 TEST_REQUIRES_X86_SSE41;
26675 GemmMicrokernelTester()
26676 .mr(1)
26677 .nr(4)
26678 .kr(2)
26679 .sr(4)
26680 .m(1)
26681 .n(4)
26682 .k(8)
26683 .qmax(128)
26684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26685 }
26686
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm)26687 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm) {
26688 TEST_REQUIRES_X86_SSE41;
26689 GemmMicrokernelTester()
26690 .mr(1)
26691 .nr(4)
26692 .kr(2)
26693 .sr(4)
26694 .m(1)
26695 .n(4)
26696 .k(8)
26697 .cm_stride(7)
26698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26699 }
26700 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26701
26702
26703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8)26704 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8) {
26705 TEST_REQUIRES_X86_SSE2;
26706 GemmMicrokernelTester()
26707 .mr(2)
26708 .nr(4)
26709 .kr(2)
26710 .sr(4)
26711 .m(2)
26712 .n(4)
26713 .k(8)
26714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26715 }
26716
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cn)26717 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cn) {
26718 TEST_REQUIRES_X86_SSE2;
26719 GemmMicrokernelTester()
26720 .mr(2)
26721 .nr(4)
26722 .kr(2)
26723 .sr(4)
26724 .m(2)
26725 .n(4)
26726 .k(8)
26727 .cn_stride(7)
26728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26729 }
26730
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile)26731 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile) {
26732 TEST_REQUIRES_X86_SSE2;
26733 for (uint32_t n = 1; n <= 4; n++) {
26734 for (uint32_t m = 1; m <= 2; m++) {
26735 GemmMicrokernelTester()
26736 .mr(2)
26737 .nr(4)
26738 .kr(2)
26739 .sr(4)
26740 .m(m)
26741 .n(n)
26742 .k(8)
26743 .iterations(1)
26744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26745 }
26746 }
26747 }
26748
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_m)26749 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
26750 TEST_REQUIRES_X86_SSE2;
26751 for (uint32_t m = 1; m <= 2; m++) {
26752 GemmMicrokernelTester()
26753 .mr(2)
26754 .nr(4)
26755 .kr(2)
26756 .sr(4)
26757 .m(m)
26758 .n(4)
26759 .k(8)
26760 .iterations(1)
26761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26762 }
26763 }
26764
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_n)26765 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
26766 TEST_REQUIRES_X86_SSE2;
26767 for (uint32_t n = 1; n <= 4; n++) {
26768 GemmMicrokernelTester()
26769 .mr(2)
26770 .nr(4)
26771 .kr(2)
26772 .sr(4)
26773 .m(2)
26774 .n(n)
26775 .k(8)
26776 .iterations(1)
26777 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26778 }
26779 }
26780
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8)26781 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8) {
26782 TEST_REQUIRES_X86_SSE2;
26783 for (size_t k = 1; k < 8; k++) {
26784 GemmMicrokernelTester()
26785 .mr(2)
26786 .nr(4)
26787 .kr(2)
26788 .sr(4)
26789 .m(2)
26790 .n(4)
26791 .k(k)
26792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26793 }
26794 }
26795
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8_subtile)26796 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8_subtile) {
26797 TEST_REQUIRES_X86_SSE2;
26798 for (size_t k = 1; k < 8; k++) {
26799 for (uint32_t n = 1; n <= 4; n++) {
26800 for (uint32_t m = 1; m <= 2; m++) {
26801 GemmMicrokernelTester()
26802 .mr(2)
26803 .nr(4)
26804 .kr(2)
26805 .sr(4)
26806 .m(m)
26807 .n(n)
26808 .k(k)
26809 .iterations(1)
26810 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26811 }
26812 }
26813 }
26814 }
26815
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8)26816 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8) {
26817 TEST_REQUIRES_X86_SSE2;
26818 for (size_t k = 9; k < 16; k++) {
26819 GemmMicrokernelTester()
26820 .mr(2)
26821 .nr(4)
26822 .kr(2)
26823 .sr(4)
26824 .m(2)
26825 .n(4)
26826 .k(k)
26827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26828 }
26829 }
26830
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8_subtile)26831 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8_subtile) {
26832 TEST_REQUIRES_X86_SSE2;
26833 for (size_t k = 9; k < 16; k++) {
26834 for (uint32_t n = 1; n <= 4; n++) {
26835 for (uint32_t m = 1; m <= 2; m++) {
26836 GemmMicrokernelTester()
26837 .mr(2)
26838 .nr(4)
26839 .kr(2)
26840 .sr(4)
26841 .m(m)
26842 .n(n)
26843 .k(k)
26844 .iterations(1)
26845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26846 }
26847 }
26848 }
26849 }
26850
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8)26851 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8) {
26852 TEST_REQUIRES_X86_SSE2;
26853 for (size_t k = 16; k <= 80; k += 8) {
26854 GemmMicrokernelTester()
26855 .mr(2)
26856 .nr(4)
26857 .kr(2)
26858 .sr(4)
26859 .m(2)
26860 .n(4)
26861 .k(k)
26862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26863 }
26864 }
26865
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8_subtile)26866 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8_subtile) {
26867 TEST_REQUIRES_X86_SSE2;
26868 for (size_t k = 16; k <= 80; k += 8) {
26869 for (uint32_t n = 1; n <= 4; n++) {
26870 for (uint32_t m = 1; m <= 2; m++) {
26871 GemmMicrokernelTester()
26872 .mr(2)
26873 .nr(4)
26874 .kr(2)
26875 .sr(4)
26876 .m(m)
26877 .n(n)
26878 .k(k)
26879 .iterations(1)
26880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26881 }
26882 }
26883 }
26884 }
26885
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4)26886 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4) {
26887 TEST_REQUIRES_X86_SSE2;
26888 for (uint32_t n = 5; n < 8; n++) {
26889 for (size_t k = 1; k <= 40; k += 9) {
26890 GemmMicrokernelTester()
26891 .mr(2)
26892 .nr(4)
26893 .kr(2)
26894 .sr(4)
26895 .m(2)
26896 .n(n)
26897 .k(k)
26898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26899 }
26900 }
26901 }
26902
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_strided_cn)26903 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
26904 TEST_REQUIRES_X86_SSE2;
26905 for (uint32_t n = 5; n < 8; n++) {
26906 for (size_t k = 1; k <= 40; k += 9) {
26907 GemmMicrokernelTester()
26908 .mr(2)
26909 .nr(4)
26910 .kr(2)
26911 .sr(4)
26912 .m(2)
26913 .n(n)
26914 .k(k)
26915 .cn_stride(7)
26916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26917 }
26918 }
26919 }
26920
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_subtile)26921 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_subtile) {
26922 TEST_REQUIRES_X86_SSE2;
26923 for (uint32_t n = 5; n < 8; n++) {
26924 for (size_t k = 1; k <= 40; k += 9) {
26925 for (uint32_t m = 1; m <= 2; m++) {
26926 GemmMicrokernelTester()
26927 .mr(2)
26928 .nr(4)
26929 .kr(2)
26930 .sr(4)
26931 .m(m)
26932 .n(n)
26933 .k(k)
26934 .iterations(1)
26935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26936 }
26937 }
26938 }
26939 }
26940
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4)26941 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4) {
26942 TEST_REQUIRES_X86_SSE2;
26943 for (uint32_t n = 8; n <= 12; n += 4) {
26944 for (size_t k = 1; k <= 40; k += 9) {
26945 GemmMicrokernelTester()
26946 .mr(2)
26947 .nr(4)
26948 .kr(2)
26949 .sr(4)
26950 .m(2)
26951 .n(n)
26952 .k(k)
26953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26954 }
26955 }
26956 }
26957
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_strided_cn)26958 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
26959 TEST_REQUIRES_X86_SSE2;
26960 for (uint32_t n = 8; n <= 12; n += 4) {
26961 for (size_t k = 1; k <= 40; k += 9) {
26962 GemmMicrokernelTester()
26963 .mr(2)
26964 .nr(4)
26965 .kr(2)
26966 .sr(4)
26967 .m(2)
26968 .n(n)
26969 .k(k)
26970 .cn_stride(7)
26971 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26972 }
26973 }
26974 }
26975
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_subtile)26976 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_subtile) {
26977 TEST_REQUIRES_X86_SSE2;
26978 for (uint32_t n = 8; n <= 12; n += 4) {
26979 for (size_t k = 1; k <= 40; k += 9) {
26980 for (uint32_t m = 1; m <= 2; m++) {
26981 GemmMicrokernelTester()
26982 .mr(2)
26983 .nr(4)
26984 .kr(2)
26985 .sr(4)
26986 .m(m)
26987 .n(n)
26988 .k(k)
26989 .iterations(1)
26990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26991 }
26992 }
26993 }
26994 }
26995
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel)26996 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel) {
26997 TEST_REQUIRES_X86_SSE2;
26998 for (size_t k = 1; k <= 40; k += 9) {
26999 GemmMicrokernelTester()
27000 .mr(2)
27001 .nr(4)
27002 .kr(2)
27003 .sr(4)
27004 .m(2)
27005 .n(4)
27006 .k(k)
27007 .ks(3)
27008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27009 }
27010 }
27011
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel_subtile)27012 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel_subtile) {
27013 TEST_REQUIRES_X86_SSE2;
27014 for (size_t k = 1; k <= 40; k += 9) {
27015 for (uint32_t n = 1; n <= 4; n++) {
27016 for (uint32_t m = 1; m <= 2; m++) {
27017 GemmMicrokernelTester()
27018 .mr(2)
27019 .nr(4)
27020 .kr(2)
27021 .sr(4)
27022 .m(m)
27023 .n(n)
27024 .k(k)
27025 .ks(3)
27026 .iterations(1)
27027 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27028 }
27029 }
27030 }
27031 }
27032
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_small_kernel)27033 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
27034 TEST_REQUIRES_X86_SSE2;
27035 for (uint32_t n = 5; n < 8; n++) {
27036 for (size_t k = 1; k <= 40; k += 9) {
27037 GemmMicrokernelTester()
27038 .mr(2)
27039 .nr(4)
27040 .kr(2)
27041 .sr(4)
27042 .m(2)
27043 .n(n)
27044 .k(k)
27045 .ks(3)
27046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27047 }
27048 }
27049 }
27050
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_small_kernel)27051 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
27052 TEST_REQUIRES_X86_SSE2;
27053 for (uint32_t n = 8; n <= 12; n += 4) {
27054 for (size_t k = 1; k <= 40; k += 9) {
27055 GemmMicrokernelTester()
27056 .mr(2)
27057 .nr(4)
27058 .kr(2)
27059 .sr(4)
27060 .m(2)
27061 .n(n)
27062 .k(k)
27063 .ks(3)
27064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27065 }
27066 }
27067 }
27068
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm_subtile)27069 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm_subtile) {
27070 TEST_REQUIRES_X86_SSE2;
27071 for (size_t k = 1; k <= 40; k += 9) {
27072 for (uint32_t n = 1; n <= 4; n++) {
27073 for (uint32_t m = 1; m <= 2; m++) {
27074 GemmMicrokernelTester()
27075 .mr(2)
27076 .nr(4)
27077 .kr(2)
27078 .sr(4)
27079 .m(m)
27080 .n(n)
27081 .k(k)
27082 .cm_stride(7)
27083 .iterations(1)
27084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27085 }
27086 }
27087 }
27088 }
27089
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,a_offset)27090 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, a_offset) {
27091 TEST_REQUIRES_X86_SSE2;
27092 for (size_t k = 1; k <= 40; k += 9) {
27093 GemmMicrokernelTester()
27094 .mr(2)
27095 .nr(4)
27096 .kr(2)
27097 .sr(4)
27098 .m(2)
27099 .n(4)
27100 .k(k)
27101 .ks(3)
27102 .a_offset(83)
27103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27104 }
27105 }
27106
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,zero)27107 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, zero) {
27108 TEST_REQUIRES_X86_SSE2;
27109 for (size_t k = 1; k <= 40; k += 9) {
27110 for (uint32_t mz = 0; mz < 2; mz++) {
27111 GemmMicrokernelTester()
27112 .mr(2)
27113 .nr(4)
27114 .kr(2)
27115 .sr(4)
27116 .m(2)
27117 .n(4)
27118 .k(k)
27119 .ks(3)
27120 .a_offset(83)
27121 .zero_index(mz)
27122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27123 }
27124 }
27125 }
27126
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmin)27127 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmin) {
27128 TEST_REQUIRES_X86_SSE2;
27129 GemmMicrokernelTester()
27130 .mr(2)
27131 .nr(4)
27132 .kr(2)
27133 .sr(4)
27134 .m(2)
27135 .n(4)
27136 .k(8)
27137 .qmin(128)
27138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27139 }
27140
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmax)27141 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmax) {
27142 TEST_REQUIRES_X86_SSE2;
27143 GemmMicrokernelTester()
27144 .mr(2)
27145 .nr(4)
27146 .kr(2)
27147 .sr(4)
27148 .m(2)
27149 .n(4)
27150 .k(8)
27151 .qmax(128)
27152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27153 }
27154
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm)27155 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm) {
27156 TEST_REQUIRES_X86_SSE2;
27157 GemmMicrokernelTester()
27158 .mr(2)
27159 .nr(4)
27160 .kr(2)
27161 .sr(4)
27162 .m(2)
27163 .n(4)
27164 .k(8)
27165 .cm_stride(7)
27166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27167 }
27168 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27169
27170
27171 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8)27172 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8) {
27173 TEST_REQUIRES_X86_SSE41;
27174 GemmMicrokernelTester()
27175 .mr(2)
27176 .nr(4)
27177 .kr(2)
27178 .sr(4)
27179 .m(2)
27180 .n(4)
27181 .k(8)
27182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27183 }
27184
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cn)27185 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cn) {
27186 TEST_REQUIRES_X86_SSE41;
27187 GemmMicrokernelTester()
27188 .mr(2)
27189 .nr(4)
27190 .kr(2)
27191 .sr(4)
27192 .m(2)
27193 .n(4)
27194 .k(8)
27195 .cn_stride(7)
27196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27197 }
27198
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile)27199 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile) {
27200 TEST_REQUIRES_X86_SSE41;
27201 for (uint32_t n = 1; n <= 4; n++) {
27202 for (uint32_t m = 1; m <= 2; m++) {
27203 GemmMicrokernelTester()
27204 .mr(2)
27205 .nr(4)
27206 .kr(2)
27207 .sr(4)
27208 .m(m)
27209 .n(n)
27210 .k(8)
27211 .iterations(1)
27212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27213 }
27214 }
27215 }
27216
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_m)27217 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
27218 TEST_REQUIRES_X86_SSE41;
27219 for (uint32_t m = 1; m <= 2; m++) {
27220 GemmMicrokernelTester()
27221 .mr(2)
27222 .nr(4)
27223 .kr(2)
27224 .sr(4)
27225 .m(m)
27226 .n(4)
27227 .k(8)
27228 .iterations(1)
27229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27230 }
27231 }
27232
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_n)27233 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
27234 TEST_REQUIRES_X86_SSE41;
27235 for (uint32_t n = 1; n <= 4; n++) {
27236 GemmMicrokernelTester()
27237 .mr(2)
27238 .nr(4)
27239 .kr(2)
27240 .sr(4)
27241 .m(2)
27242 .n(n)
27243 .k(8)
27244 .iterations(1)
27245 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27246 }
27247 }
27248
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8)27249 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8) {
27250 TEST_REQUIRES_X86_SSE41;
27251 for (size_t k = 1; k < 8; k++) {
27252 GemmMicrokernelTester()
27253 .mr(2)
27254 .nr(4)
27255 .kr(2)
27256 .sr(4)
27257 .m(2)
27258 .n(4)
27259 .k(k)
27260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27261 }
27262 }
27263
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8_subtile)27264 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8_subtile) {
27265 TEST_REQUIRES_X86_SSE41;
27266 for (size_t k = 1; k < 8; k++) {
27267 for (uint32_t n = 1; n <= 4; n++) {
27268 for (uint32_t m = 1; m <= 2; m++) {
27269 GemmMicrokernelTester()
27270 .mr(2)
27271 .nr(4)
27272 .kr(2)
27273 .sr(4)
27274 .m(m)
27275 .n(n)
27276 .k(k)
27277 .iterations(1)
27278 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27279 }
27280 }
27281 }
27282 }
27283
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8)27284 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8) {
27285 TEST_REQUIRES_X86_SSE41;
27286 for (size_t k = 9; k < 16; k++) {
27287 GemmMicrokernelTester()
27288 .mr(2)
27289 .nr(4)
27290 .kr(2)
27291 .sr(4)
27292 .m(2)
27293 .n(4)
27294 .k(k)
27295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27296 }
27297 }
27298
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8_subtile)27299 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8_subtile) {
27300 TEST_REQUIRES_X86_SSE41;
27301 for (size_t k = 9; k < 16; k++) {
27302 for (uint32_t n = 1; n <= 4; n++) {
27303 for (uint32_t m = 1; m <= 2; m++) {
27304 GemmMicrokernelTester()
27305 .mr(2)
27306 .nr(4)
27307 .kr(2)
27308 .sr(4)
27309 .m(m)
27310 .n(n)
27311 .k(k)
27312 .iterations(1)
27313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27314 }
27315 }
27316 }
27317 }
27318
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8)27319 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8) {
27320 TEST_REQUIRES_X86_SSE41;
27321 for (size_t k = 16; k <= 80; k += 8) {
27322 GemmMicrokernelTester()
27323 .mr(2)
27324 .nr(4)
27325 .kr(2)
27326 .sr(4)
27327 .m(2)
27328 .n(4)
27329 .k(k)
27330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27331 }
27332 }
27333
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8_subtile)27334 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8_subtile) {
27335 TEST_REQUIRES_X86_SSE41;
27336 for (size_t k = 16; k <= 80; k += 8) {
27337 for (uint32_t n = 1; n <= 4; n++) {
27338 for (uint32_t m = 1; m <= 2; m++) {
27339 GemmMicrokernelTester()
27340 .mr(2)
27341 .nr(4)
27342 .kr(2)
27343 .sr(4)
27344 .m(m)
27345 .n(n)
27346 .k(k)
27347 .iterations(1)
27348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27349 }
27350 }
27351 }
27352 }
27353
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4)27354 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4) {
27355 TEST_REQUIRES_X86_SSE41;
27356 for (uint32_t n = 5; n < 8; n++) {
27357 for (size_t k = 1; k <= 40; k += 9) {
27358 GemmMicrokernelTester()
27359 .mr(2)
27360 .nr(4)
27361 .kr(2)
27362 .sr(4)
27363 .m(2)
27364 .n(n)
27365 .k(k)
27366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27367 }
27368 }
27369 }
27370
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_strided_cn)27371 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
27372 TEST_REQUIRES_X86_SSE41;
27373 for (uint32_t n = 5; n < 8; n++) {
27374 for (size_t k = 1; k <= 40; k += 9) {
27375 GemmMicrokernelTester()
27376 .mr(2)
27377 .nr(4)
27378 .kr(2)
27379 .sr(4)
27380 .m(2)
27381 .n(n)
27382 .k(k)
27383 .cn_stride(7)
27384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27385 }
27386 }
27387 }
27388
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_subtile)27389 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_subtile) {
27390 TEST_REQUIRES_X86_SSE41;
27391 for (uint32_t n = 5; n < 8; n++) {
27392 for (size_t k = 1; k <= 40; k += 9) {
27393 for (uint32_t m = 1; m <= 2; m++) {
27394 GemmMicrokernelTester()
27395 .mr(2)
27396 .nr(4)
27397 .kr(2)
27398 .sr(4)
27399 .m(m)
27400 .n(n)
27401 .k(k)
27402 .iterations(1)
27403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27404 }
27405 }
27406 }
27407 }
27408
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4)27409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4) {
27410 TEST_REQUIRES_X86_SSE41;
27411 for (uint32_t n = 8; n <= 12; n += 4) {
27412 for (size_t k = 1; k <= 40; k += 9) {
27413 GemmMicrokernelTester()
27414 .mr(2)
27415 .nr(4)
27416 .kr(2)
27417 .sr(4)
27418 .m(2)
27419 .n(n)
27420 .k(k)
27421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27422 }
27423 }
27424 }
27425
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_strided_cn)27426 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
27427 TEST_REQUIRES_X86_SSE41;
27428 for (uint32_t n = 8; n <= 12; n += 4) {
27429 for (size_t k = 1; k <= 40; k += 9) {
27430 GemmMicrokernelTester()
27431 .mr(2)
27432 .nr(4)
27433 .kr(2)
27434 .sr(4)
27435 .m(2)
27436 .n(n)
27437 .k(k)
27438 .cn_stride(7)
27439 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27440 }
27441 }
27442 }
27443
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_subtile)27444 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_subtile) {
27445 TEST_REQUIRES_X86_SSE41;
27446 for (uint32_t n = 8; n <= 12; n += 4) {
27447 for (size_t k = 1; k <= 40; k += 9) {
27448 for (uint32_t m = 1; m <= 2; m++) {
27449 GemmMicrokernelTester()
27450 .mr(2)
27451 .nr(4)
27452 .kr(2)
27453 .sr(4)
27454 .m(m)
27455 .n(n)
27456 .k(k)
27457 .iterations(1)
27458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27459 }
27460 }
27461 }
27462 }
27463
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel)27464 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel) {
27465 TEST_REQUIRES_X86_SSE41;
27466 for (size_t k = 1; k <= 40; k += 9) {
27467 GemmMicrokernelTester()
27468 .mr(2)
27469 .nr(4)
27470 .kr(2)
27471 .sr(4)
27472 .m(2)
27473 .n(4)
27474 .k(k)
27475 .ks(3)
27476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27477 }
27478 }
27479
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel_subtile)27480 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel_subtile) {
27481 TEST_REQUIRES_X86_SSE41;
27482 for (size_t k = 1; k <= 40; k += 9) {
27483 for (uint32_t n = 1; n <= 4; n++) {
27484 for (uint32_t m = 1; m <= 2; m++) {
27485 GemmMicrokernelTester()
27486 .mr(2)
27487 .nr(4)
27488 .kr(2)
27489 .sr(4)
27490 .m(m)
27491 .n(n)
27492 .k(k)
27493 .ks(3)
27494 .iterations(1)
27495 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27496 }
27497 }
27498 }
27499 }
27500
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_small_kernel)27501 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
27502 TEST_REQUIRES_X86_SSE41;
27503 for (uint32_t n = 5; n < 8; n++) {
27504 for (size_t k = 1; k <= 40; k += 9) {
27505 GemmMicrokernelTester()
27506 .mr(2)
27507 .nr(4)
27508 .kr(2)
27509 .sr(4)
27510 .m(2)
27511 .n(n)
27512 .k(k)
27513 .ks(3)
27514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27515 }
27516 }
27517 }
27518
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_small_kernel)27519 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
27520 TEST_REQUIRES_X86_SSE41;
27521 for (uint32_t n = 8; n <= 12; n += 4) {
27522 for (size_t k = 1; k <= 40; k += 9) {
27523 GemmMicrokernelTester()
27524 .mr(2)
27525 .nr(4)
27526 .kr(2)
27527 .sr(4)
27528 .m(2)
27529 .n(n)
27530 .k(k)
27531 .ks(3)
27532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27533 }
27534 }
27535 }
27536
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm_subtile)27537 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm_subtile) {
27538 TEST_REQUIRES_X86_SSE41;
27539 for (size_t k = 1; k <= 40; k += 9) {
27540 for (uint32_t n = 1; n <= 4; n++) {
27541 for (uint32_t m = 1; m <= 2; m++) {
27542 GemmMicrokernelTester()
27543 .mr(2)
27544 .nr(4)
27545 .kr(2)
27546 .sr(4)
27547 .m(m)
27548 .n(n)
27549 .k(k)
27550 .cm_stride(7)
27551 .iterations(1)
27552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27553 }
27554 }
27555 }
27556 }
27557
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,a_offset)27558 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, a_offset) {
27559 TEST_REQUIRES_X86_SSE41;
27560 for (size_t k = 1; k <= 40; k += 9) {
27561 GemmMicrokernelTester()
27562 .mr(2)
27563 .nr(4)
27564 .kr(2)
27565 .sr(4)
27566 .m(2)
27567 .n(4)
27568 .k(k)
27569 .ks(3)
27570 .a_offset(83)
27571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27572 }
27573 }
27574
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,zero)27575 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, zero) {
27576 TEST_REQUIRES_X86_SSE41;
27577 for (size_t k = 1; k <= 40; k += 9) {
27578 for (uint32_t mz = 0; mz < 2; mz++) {
27579 GemmMicrokernelTester()
27580 .mr(2)
27581 .nr(4)
27582 .kr(2)
27583 .sr(4)
27584 .m(2)
27585 .n(4)
27586 .k(k)
27587 .ks(3)
27588 .a_offset(83)
27589 .zero_index(mz)
27590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27591 }
27592 }
27593 }
27594
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmin)27595 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmin) {
27596 TEST_REQUIRES_X86_SSE41;
27597 GemmMicrokernelTester()
27598 .mr(2)
27599 .nr(4)
27600 .kr(2)
27601 .sr(4)
27602 .m(2)
27603 .n(4)
27604 .k(8)
27605 .qmin(128)
27606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27607 }
27608
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmax)27609 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmax) {
27610 TEST_REQUIRES_X86_SSE41;
27611 GemmMicrokernelTester()
27612 .mr(2)
27613 .nr(4)
27614 .kr(2)
27615 .sr(4)
27616 .m(2)
27617 .n(4)
27618 .k(8)
27619 .qmax(128)
27620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27621 }
27622
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm)27623 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm) {
27624 TEST_REQUIRES_X86_SSE41;
27625 GemmMicrokernelTester()
27626 .mr(2)
27627 .nr(4)
27628 .kr(2)
27629 .sr(4)
27630 .m(2)
27631 .n(4)
27632 .k(8)
27633 .cm_stride(7)
27634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27635 }
27636 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27637
27638
27639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8)27640 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8) {
27641 TEST_REQUIRES_X86_SSE2;
27642 GemmMicrokernelTester()
27643 .mr(3)
27644 .nr(4)
27645 .kr(2)
27646 .sr(4)
27647 .m(3)
27648 .n(4)
27649 .k(8)
27650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27651 }
27652
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cn)27653 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cn) {
27654 TEST_REQUIRES_X86_SSE2;
27655 GemmMicrokernelTester()
27656 .mr(3)
27657 .nr(4)
27658 .kr(2)
27659 .sr(4)
27660 .m(3)
27661 .n(4)
27662 .k(8)
27663 .cn_stride(7)
27664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27665 }
27666
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile)27667 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile) {
27668 TEST_REQUIRES_X86_SSE2;
27669 for (uint32_t n = 1; n <= 4; n++) {
27670 for (uint32_t m = 1; m <= 3; m++) {
27671 GemmMicrokernelTester()
27672 .mr(3)
27673 .nr(4)
27674 .kr(2)
27675 .sr(4)
27676 .m(m)
27677 .n(n)
27678 .k(8)
27679 .iterations(1)
27680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27681 }
27682 }
27683 }
27684
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_m)27685 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
27686 TEST_REQUIRES_X86_SSE2;
27687 for (uint32_t m = 1; m <= 3; m++) {
27688 GemmMicrokernelTester()
27689 .mr(3)
27690 .nr(4)
27691 .kr(2)
27692 .sr(4)
27693 .m(m)
27694 .n(4)
27695 .k(8)
27696 .iterations(1)
27697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27698 }
27699 }
27700
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_n)27701 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
27702 TEST_REQUIRES_X86_SSE2;
27703 for (uint32_t n = 1; n <= 4; n++) {
27704 GemmMicrokernelTester()
27705 .mr(3)
27706 .nr(4)
27707 .kr(2)
27708 .sr(4)
27709 .m(3)
27710 .n(n)
27711 .k(8)
27712 .iterations(1)
27713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27714 }
27715 }
27716
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8)27717 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8) {
27718 TEST_REQUIRES_X86_SSE2;
27719 for (size_t k = 1; k < 8; k++) {
27720 GemmMicrokernelTester()
27721 .mr(3)
27722 .nr(4)
27723 .kr(2)
27724 .sr(4)
27725 .m(3)
27726 .n(4)
27727 .k(k)
27728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27729 }
27730 }
27731
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8_subtile)27732 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8_subtile) {
27733 TEST_REQUIRES_X86_SSE2;
27734 for (size_t k = 1; k < 8; k++) {
27735 for (uint32_t n = 1; n <= 4; n++) {
27736 for (uint32_t m = 1; m <= 3; m++) {
27737 GemmMicrokernelTester()
27738 .mr(3)
27739 .nr(4)
27740 .kr(2)
27741 .sr(4)
27742 .m(m)
27743 .n(n)
27744 .k(k)
27745 .iterations(1)
27746 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27747 }
27748 }
27749 }
27750 }
27751
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8)27752 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8) {
27753 TEST_REQUIRES_X86_SSE2;
27754 for (size_t k = 9; k < 16; k++) {
27755 GemmMicrokernelTester()
27756 .mr(3)
27757 .nr(4)
27758 .kr(2)
27759 .sr(4)
27760 .m(3)
27761 .n(4)
27762 .k(k)
27763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27764 }
27765 }
27766
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8_subtile)27767 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8_subtile) {
27768 TEST_REQUIRES_X86_SSE2;
27769 for (size_t k = 9; k < 16; k++) {
27770 for (uint32_t n = 1; n <= 4; n++) {
27771 for (uint32_t m = 1; m <= 3; m++) {
27772 GemmMicrokernelTester()
27773 .mr(3)
27774 .nr(4)
27775 .kr(2)
27776 .sr(4)
27777 .m(m)
27778 .n(n)
27779 .k(k)
27780 .iterations(1)
27781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27782 }
27783 }
27784 }
27785 }
27786
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8)27787 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8) {
27788 TEST_REQUIRES_X86_SSE2;
27789 for (size_t k = 16; k <= 80; k += 8) {
27790 GemmMicrokernelTester()
27791 .mr(3)
27792 .nr(4)
27793 .kr(2)
27794 .sr(4)
27795 .m(3)
27796 .n(4)
27797 .k(k)
27798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27799 }
27800 }
27801
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8_subtile)27802 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8_subtile) {
27803 TEST_REQUIRES_X86_SSE2;
27804 for (size_t k = 16; k <= 80; k += 8) {
27805 for (uint32_t n = 1; n <= 4; n++) {
27806 for (uint32_t m = 1; m <= 3; m++) {
27807 GemmMicrokernelTester()
27808 .mr(3)
27809 .nr(4)
27810 .kr(2)
27811 .sr(4)
27812 .m(m)
27813 .n(n)
27814 .k(k)
27815 .iterations(1)
27816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27817 }
27818 }
27819 }
27820 }
27821
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4)27822 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4) {
27823 TEST_REQUIRES_X86_SSE2;
27824 for (uint32_t n = 5; n < 8; n++) {
27825 for (size_t k = 1; k <= 40; k += 9) {
27826 GemmMicrokernelTester()
27827 .mr(3)
27828 .nr(4)
27829 .kr(2)
27830 .sr(4)
27831 .m(3)
27832 .n(n)
27833 .k(k)
27834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27835 }
27836 }
27837 }
27838
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_strided_cn)27839 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
27840 TEST_REQUIRES_X86_SSE2;
27841 for (uint32_t n = 5; n < 8; n++) {
27842 for (size_t k = 1; k <= 40; k += 9) {
27843 GemmMicrokernelTester()
27844 .mr(3)
27845 .nr(4)
27846 .kr(2)
27847 .sr(4)
27848 .m(3)
27849 .n(n)
27850 .k(k)
27851 .cn_stride(7)
27852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27853 }
27854 }
27855 }
27856
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_subtile)27857 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_subtile) {
27858 TEST_REQUIRES_X86_SSE2;
27859 for (uint32_t n = 5; n < 8; n++) {
27860 for (size_t k = 1; k <= 40; k += 9) {
27861 for (uint32_t m = 1; m <= 3; m++) {
27862 GemmMicrokernelTester()
27863 .mr(3)
27864 .nr(4)
27865 .kr(2)
27866 .sr(4)
27867 .m(m)
27868 .n(n)
27869 .k(k)
27870 .iterations(1)
27871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27872 }
27873 }
27874 }
27875 }
27876
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4)27877 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4) {
27878 TEST_REQUIRES_X86_SSE2;
27879 for (uint32_t n = 8; n <= 12; n += 4) {
27880 for (size_t k = 1; k <= 40; k += 9) {
27881 GemmMicrokernelTester()
27882 .mr(3)
27883 .nr(4)
27884 .kr(2)
27885 .sr(4)
27886 .m(3)
27887 .n(n)
27888 .k(k)
27889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27890 }
27891 }
27892 }
27893
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_strided_cn)27894 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
27895 TEST_REQUIRES_X86_SSE2;
27896 for (uint32_t n = 8; n <= 12; n += 4) {
27897 for (size_t k = 1; k <= 40; k += 9) {
27898 GemmMicrokernelTester()
27899 .mr(3)
27900 .nr(4)
27901 .kr(2)
27902 .sr(4)
27903 .m(3)
27904 .n(n)
27905 .k(k)
27906 .cn_stride(7)
27907 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27908 }
27909 }
27910 }
27911
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_subtile)27912 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_subtile) {
27913 TEST_REQUIRES_X86_SSE2;
27914 for (uint32_t n = 8; n <= 12; n += 4) {
27915 for (size_t k = 1; k <= 40; k += 9) {
27916 for (uint32_t m = 1; m <= 3; m++) {
27917 GemmMicrokernelTester()
27918 .mr(3)
27919 .nr(4)
27920 .kr(2)
27921 .sr(4)
27922 .m(m)
27923 .n(n)
27924 .k(k)
27925 .iterations(1)
27926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27927 }
27928 }
27929 }
27930 }
27931
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel)27932 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel) {
27933 TEST_REQUIRES_X86_SSE2;
27934 for (size_t k = 1; k <= 40; k += 9) {
27935 GemmMicrokernelTester()
27936 .mr(3)
27937 .nr(4)
27938 .kr(2)
27939 .sr(4)
27940 .m(3)
27941 .n(4)
27942 .k(k)
27943 .ks(3)
27944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27945 }
27946 }
27947
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel_subtile)27948 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel_subtile) {
27949 TEST_REQUIRES_X86_SSE2;
27950 for (size_t k = 1; k <= 40; k += 9) {
27951 for (uint32_t n = 1; n <= 4; n++) {
27952 for (uint32_t m = 1; m <= 3; m++) {
27953 GemmMicrokernelTester()
27954 .mr(3)
27955 .nr(4)
27956 .kr(2)
27957 .sr(4)
27958 .m(m)
27959 .n(n)
27960 .k(k)
27961 .ks(3)
27962 .iterations(1)
27963 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27964 }
27965 }
27966 }
27967 }
27968
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_small_kernel)27969 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
27970 TEST_REQUIRES_X86_SSE2;
27971 for (uint32_t n = 5; n < 8; n++) {
27972 for (size_t k = 1; k <= 40; k += 9) {
27973 GemmMicrokernelTester()
27974 .mr(3)
27975 .nr(4)
27976 .kr(2)
27977 .sr(4)
27978 .m(3)
27979 .n(n)
27980 .k(k)
27981 .ks(3)
27982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27983 }
27984 }
27985 }
27986
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_small_kernel)27987 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
27988 TEST_REQUIRES_X86_SSE2;
27989 for (uint32_t n = 8; n <= 12; n += 4) {
27990 for (size_t k = 1; k <= 40; k += 9) {
27991 GemmMicrokernelTester()
27992 .mr(3)
27993 .nr(4)
27994 .kr(2)
27995 .sr(4)
27996 .m(3)
27997 .n(n)
27998 .k(k)
27999 .ks(3)
28000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28001 }
28002 }
28003 }
28004
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm_subtile)28005 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm_subtile) {
28006 TEST_REQUIRES_X86_SSE2;
28007 for (size_t k = 1; k <= 40; k += 9) {
28008 for (uint32_t n = 1; n <= 4; n++) {
28009 for (uint32_t m = 1; m <= 3; m++) {
28010 GemmMicrokernelTester()
28011 .mr(3)
28012 .nr(4)
28013 .kr(2)
28014 .sr(4)
28015 .m(m)
28016 .n(n)
28017 .k(k)
28018 .cm_stride(7)
28019 .iterations(1)
28020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28021 }
28022 }
28023 }
28024 }
28025
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,a_offset)28026 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, a_offset) {
28027 TEST_REQUIRES_X86_SSE2;
28028 for (size_t k = 1; k <= 40; k += 9) {
28029 GemmMicrokernelTester()
28030 .mr(3)
28031 .nr(4)
28032 .kr(2)
28033 .sr(4)
28034 .m(3)
28035 .n(4)
28036 .k(k)
28037 .ks(3)
28038 .a_offset(127)
28039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28040 }
28041 }
28042
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,zero)28043 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, zero) {
28044 TEST_REQUIRES_X86_SSE2;
28045 for (size_t k = 1; k <= 40; k += 9) {
28046 for (uint32_t mz = 0; mz < 3; mz++) {
28047 GemmMicrokernelTester()
28048 .mr(3)
28049 .nr(4)
28050 .kr(2)
28051 .sr(4)
28052 .m(3)
28053 .n(4)
28054 .k(k)
28055 .ks(3)
28056 .a_offset(127)
28057 .zero_index(mz)
28058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28059 }
28060 }
28061 }
28062
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmin)28063 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmin) {
28064 TEST_REQUIRES_X86_SSE2;
28065 GemmMicrokernelTester()
28066 .mr(3)
28067 .nr(4)
28068 .kr(2)
28069 .sr(4)
28070 .m(3)
28071 .n(4)
28072 .k(8)
28073 .qmin(128)
28074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28075 }
28076
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmax)28077 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmax) {
28078 TEST_REQUIRES_X86_SSE2;
28079 GemmMicrokernelTester()
28080 .mr(3)
28081 .nr(4)
28082 .kr(2)
28083 .sr(4)
28084 .m(3)
28085 .n(4)
28086 .k(8)
28087 .qmax(128)
28088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28089 }
28090
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm)28091 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm) {
28092 TEST_REQUIRES_X86_SSE2;
28093 GemmMicrokernelTester()
28094 .mr(3)
28095 .nr(4)
28096 .kr(2)
28097 .sr(4)
28098 .m(3)
28099 .n(4)
28100 .k(8)
28101 .cm_stride(7)
28102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28103 }
28104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28105
28106
28107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8)28108 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8) {
28109 TEST_REQUIRES_X86_SSE41;
28110 GemmMicrokernelTester()
28111 .mr(3)
28112 .nr(4)
28113 .kr(2)
28114 .sr(4)
28115 .m(3)
28116 .n(4)
28117 .k(8)
28118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119 }
28120
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cn)28121 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cn) {
28122 TEST_REQUIRES_X86_SSE41;
28123 GemmMicrokernelTester()
28124 .mr(3)
28125 .nr(4)
28126 .kr(2)
28127 .sr(4)
28128 .m(3)
28129 .n(4)
28130 .k(8)
28131 .cn_stride(7)
28132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28133 }
28134
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile)28135 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile) {
28136 TEST_REQUIRES_X86_SSE41;
28137 for (uint32_t n = 1; n <= 4; n++) {
28138 for (uint32_t m = 1; m <= 3; m++) {
28139 GemmMicrokernelTester()
28140 .mr(3)
28141 .nr(4)
28142 .kr(2)
28143 .sr(4)
28144 .m(m)
28145 .n(n)
28146 .k(8)
28147 .iterations(1)
28148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28149 }
28150 }
28151 }
28152
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_m)28153 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
28154 TEST_REQUIRES_X86_SSE41;
28155 for (uint32_t m = 1; m <= 3; m++) {
28156 GemmMicrokernelTester()
28157 .mr(3)
28158 .nr(4)
28159 .kr(2)
28160 .sr(4)
28161 .m(m)
28162 .n(4)
28163 .k(8)
28164 .iterations(1)
28165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28166 }
28167 }
28168
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_n)28169 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
28170 TEST_REQUIRES_X86_SSE41;
28171 for (uint32_t n = 1; n <= 4; n++) {
28172 GemmMicrokernelTester()
28173 .mr(3)
28174 .nr(4)
28175 .kr(2)
28176 .sr(4)
28177 .m(3)
28178 .n(n)
28179 .k(8)
28180 .iterations(1)
28181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28182 }
28183 }
28184
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8)28185 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8) {
28186 TEST_REQUIRES_X86_SSE41;
28187 for (size_t k = 1; k < 8; k++) {
28188 GemmMicrokernelTester()
28189 .mr(3)
28190 .nr(4)
28191 .kr(2)
28192 .sr(4)
28193 .m(3)
28194 .n(4)
28195 .k(k)
28196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28197 }
28198 }
28199
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8_subtile)28200 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8_subtile) {
28201 TEST_REQUIRES_X86_SSE41;
28202 for (size_t k = 1; k < 8; k++) {
28203 for (uint32_t n = 1; n <= 4; n++) {
28204 for (uint32_t m = 1; m <= 3; m++) {
28205 GemmMicrokernelTester()
28206 .mr(3)
28207 .nr(4)
28208 .kr(2)
28209 .sr(4)
28210 .m(m)
28211 .n(n)
28212 .k(k)
28213 .iterations(1)
28214 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28215 }
28216 }
28217 }
28218 }
28219
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8)28220 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8) {
28221 TEST_REQUIRES_X86_SSE41;
28222 for (size_t k = 9; k < 16; k++) {
28223 GemmMicrokernelTester()
28224 .mr(3)
28225 .nr(4)
28226 .kr(2)
28227 .sr(4)
28228 .m(3)
28229 .n(4)
28230 .k(k)
28231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28232 }
28233 }
28234
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8_subtile)28235 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8_subtile) {
28236 TEST_REQUIRES_X86_SSE41;
28237 for (size_t k = 9; k < 16; k++) {
28238 for (uint32_t n = 1; n <= 4; n++) {
28239 for (uint32_t m = 1; m <= 3; m++) {
28240 GemmMicrokernelTester()
28241 .mr(3)
28242 .nr(4)
28243 .kr(2)
28244 .sr(4)
28245 .m(m)
28246 .n(n)
28247 .k(k)
28248 .iterations(1)
28249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250 }
28251 }
28252 }
28253 }
28254
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8)28255 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8) {
28256 TEST_REQUIRES_X86_SSE41;
28257 for (size_t k = 16; k <= 80; k += 8) {
28258 GemmMicrokernelTester()
28259 .mr(3)
28260 .nr(4)
28261 .kr(2)
28262 .sr(4)
28263 .m(3)
28264 .n(4)
28265 .k(k)
28266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267 }
28268 }
28269
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8_subtile)28270 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8_subtile) {
28271 TEST_REQUIRES_X86_SSE41;
28272 for (size_t k = 16; k <= 80; k += 8) {
28273 for (uint32_t n = 1; n <= 4; n++) {
28274 for (uint32_t m = 1; m <= 3; m++) {
28275 GemmMicrokernelTester()
28276 .mr(3)
28277 .nr(4)
28278 .kr(2)
28279 .sr(4)
28280 .m(m)
28281 .n(n)
28282 .k(k)
28283 .iterations(1)
28284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28285 }
28286 }
28287 }
28288 }
28289
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4)28290 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4) {
28291 TEST_REQUIRES_X86_SSE41;
28292 for (uint32_t n = 5; n < 8; n++) {
28293 for (size_t k = 1; k <= 40; k += 9) {
28294 GemmMicrokernelTester()
28295 .mr(3)
28296 .nr(4)
28297 .kr(2)
28298 .sr(4)
28299 .m(3)
28300 .n(n)
28301 .k(k)
28302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28303 }
28304 }
28305 }
28306
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_strided_cn)28307 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
28308 TEST_REQUIRES_X86_SSE41;
28309 for (uint32_t n = 5; n < 8; n++) {
28310 for (size_t k = 1; k <= 40; k += 9) {
28311 GemmMicrokernelTester()
28312 .mr(3)
28313 .nr(4)
28314 .kr(2)
28315 .sr(4)
28316 .m(3)
28317 .n(n)
28318 .k(k)
28319 .cn_stride(7)
28320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28321 }
28322 }
28323 }
28324
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_subtile)28325 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_subtile) {
28326 TEST_REQUIRES_X86_SSE41;
28327 for (uint32_t n = 5; n < 8; n++) {
28328 for (size_t k = 1; k <= 40; k += 9) {
28329 for (uint32_t m = 1; m <= 3; m++) {
28330 GemmMicrokernelTester()
28331 .mr(3)
28332 .nr(4)
28333 .kr(2)
28334 .sr(4)
28335 .m(m)
28336 .n(n)
28337 .k(k)
28338 .iterations(1)
28339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28340 }
28341 }
28342 }
28343 }
28344
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4)28345 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4) {
28346 TEST_REQUIRES_X86_SSE41;
28347 for (uint32_t n = 8; n <= 12; n += 4) {
28348 for (size_t k = 1; k <= 40; k += 9) {
28349 GemmMicrokernelTester()
28350 .mr(3)
28351 .nr(4)
28352 .kr(2)
28353 .sr(4)
28354 .m(3)
28355 .n(n)
28356 .k(k)
28357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28358 }
28359 }
28360 }
28361
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_strided_cn)28362 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
28363 TEST_REQUIRES_X86_SSE41;
28364 for (uint32_t n = 8; n <= 12; n += 4) {
28365 for (size_t k = 1; k <= 40; k += 9) {
28366 GemmMicrokernelTester()
28367 .mr(3)
28368 .nr(4)
28369 .kr(2)
28370 .sr(4)
28371 .m(3)
28372 .n(n)
28373 .k(k)
28374 .cn_stride(7)
28375 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28376 }
28377 }
28378 }
28379
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_subtile)28380 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_subtile) {
28381 TEST_REQUIRES_X86_SSE41;
28382 for (uint32_t n = 8; n <= 12; n += 4) {
28383 for (size_t k = 1; k <= 40; k += 9) {
28384 for (uint32_t m = 1; m <= 3; m++) {
28385 GemmMicrokernelTester()
28386 .mr(3)
28387 .nr(4)
28388 .kr(2)
28389 .sr(4)
28390 .m(m)
28391 .n(n)
28392 .k(k)
28393 .iterations(1)
28394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28395 }
28396 }
28397 }
28398 }
28399
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel)28400 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel) {
28401 TEST_REQUIRES_X86_SSE41;
28402 for (size_t k = 1; k <= 40; k += 9) {
28403 GemmMicrokernelTester()
28404 .mr(3)
28405 .nr(4)
28406 .kr(2)
28407 .sr(4)
28408 .m(3)
28409 .n(4)
28410 .k(k)
28411 .ks(3)
28412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28413 }
28414 }
28415
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel_subtile)28416 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel_subtile) {
28417 TEST_REQUIRES_X86_SSE41;
28418 for (size_t k = 1; k <= 40; k += 9) {
28419 for (uint32_t n = 1; n <= 4; n++) {
28420 for (uint32_t m = 1; m <= 3; m++) {
28421 GemmMicrokernelTester()
28422 .mr(3)
28423 .nr(4)
28424 .kr(2)
28425 .sr(4)
28426 .m(m)
28427 .n(n)
28428 .k(k)
28429 .ks(3)
28430 .iterations(1)
28431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28432 }
28433 }
28434 }
28435 }
28436
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_small_kernel)28437 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
28438 TEST_REQUIRES_X86_SSE41;
28439 for (uint32_t n = 5; n < 8; n++) {
28440 for (size_t k = 1; k <= 40; k += 9) {
28441 GemmMicrokernelTester()
28442 .mr(3)
28443 .nr(4)
28444 .kr(2)
28445 .sr(4)
28446 .m(3)
28447 .n(n)
28448 .k(k)
28449 .ks(3)
28450 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28451 }
28452 }
28453 }
28454
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_small_kernel)28455 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
28456 TEST_REQUIRES_X86_SSE41;
28457 for (uint32_t n = 8; n <= 12; n += 4) {
28458 for (size_t k = 1; k <= 40; k += 9) {
28459 GemmMicrokernelTester()
28460 .mr(3)
28461 .nr(4)
28462 .kr(2)
28463 .sr(4)
28464 .m(3)
28465 .n(n)
28466 .k(k)
28467 .ks(3)
28468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28469 }
28470 }
28471 }
28472
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm_subtile)28473 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm_subtile) {
28474 TEST_REQUIRES_X86_SSE41;
28475 for (size_t k = 1; k <= 40; k += 9) {
28476 for (uint32_t n = 1; n <= 4; n++) {
28477 for (uint32_t m = 1; m <= 3; m++) {
28478 GemmMicrokernelTester()
28479 .mr(3)
28480 .nr(4)
28481 .kr(2)
28482 .sr(4)
28483 .m(m)
28484 .n(n)
28485 .k(k)
28486 .cm_stride(7)
28487 .iterations(1)
28488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28489 }
28490 }
28491 }
28492 }
28493
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,a_offset)28494 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, a_offset) {
28495 TEST_REQUIRES_X86_SSE41;
28496 for (size_t k = 1; k <= 40; k += 9) {
28497 GemmMicrokernelTester()
28498 .mr(3)
28499 .nr(4)
28500 .kr(2)
28501 .sr(4)
28502 .m(3)
28503 .n(4)
28504 .k(k)
28505 .ks(3)
28506 .a_offset(127)
28507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28508 }
28509 }
28510
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,zero)28511 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, zero) {
28512 TEST_REQUIRES_X86_SSE41;
28513 for (size_t k = 1; k <= 40; k += 9) {
28514 for (uint32_t mz = 0; mz < 3; mz++) {
28515 GemmMicrokernelTester()
28516 .mr(3)
28517 .nr(4)
28518 .kr(2)
28519 .sr(4)
28520 .m(3)
28521 .n(4)
28522 .k(k)
28523 .ks(3)
28524 .a_offset(127)
28525 .zero_index(mz)
28526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28527 }
28528 }
28529 }
28530
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmin)28531 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmin) {
28532 TEST_REQUIRES_X86_SSE41;
28533 GemmMicrokernelTester()
28534 .mr(3)
28535 .nr(4)
28536 .kr(2)
28537 .sr(4)
28538 .m(3)
28539 .n(4)
28540 .k(8)
28541 .qmin(128)
28542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28543 }
28544
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmax)28545 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmax) {
28546 TEST_REQUIRES_X86_SSE41;
28547 GemmMicrokernelTester()
28548 .mr(3)
28549 .nr(4)
28550 .kr(2)
28551 .sr(4)
28552 .m(3)
28553 .n(4)
28554 .k(8)
28555 .qmax(128)
28556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557 }
28558
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm)28559 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm) {
28560 TEST_REQUIRES_X86_SSE41;
28561 GemmMicrokernelTester()
28562 .mr(3)
28563 .nr(4)
28564 .kr(2)
28565 .sr(4)
28566 .m(3)
28567 .n(4)
28568 .k(8)
28569 .cm_stride(7)
28570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28571 }
28572 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28573
28574
28575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8)28576 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8) {
28577 TEST_REQUIRES_X86_XOP;
28578 GemmMicrokernelTester()
28579 .mr(1)
28580 .nr(4)
28581 .kr(2)
28582 .sr(4)
28583 .m(1)
28584 .n(4)
28585 .k(8)
28586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28587 }
28588
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cn)28589 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cn) {
28590 TEST_REQUIRES_X86_XOP;
28591 GemmMicrokernelTester()
28592 .mr(1)
28593 .nr(4)
28594 .kr(2)
28595 .sr(4)
28596 .m(1)
28597 .n(4)
28598 .k(8)
28599 .cn_stride(7)
28600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28601 }
28602
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile)28603 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile) {
28604 TEST_REQUIRES_X86_XOP;
28605 for (uint32_t n = 1; n <= 4; n++) {
28606 for (uint32_t m = 1; m <= 1; m++) {
28607 GemmMicrokernelTester()
28608 .mr(1)
28609 .nr(4)
28610 .kr(2)
28611 .sr(4)
28612 .m(m)
28613 .n(n)
28614 .k(8)
28615 .iterations(1)
28616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28617 }
28618 }
28619 }
28620
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_m)28621 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
28622 TEST_REQUIRES_X86_XOP;
28623 for (uint32_t m = 1; m <= 1; m++) {
28624 GemmMicrokernelTester()
28625 .mr(1)
28626 .nr(4)
28627 .kr(2)
28628 .sr(4)
28629 .m(m)
28630 .n(4)
28631 .k(8)
28632 .iterations(1)
28633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28634 }
28635 }
28636
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_n)28637 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
28638 TEST_REQUIRES_X86_XOP;
28639 for (uint32_t n = 1; n <= 4; n++) {
28640 GemmMicrokernelTester()
28641 .mr(1)
28642 .nr(4)
28643 .kr(2)
28644 .sr(4)
28645 .m(1)
28646 .n(n)
28647 .k(8)
28648 .iterations(1)
28649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28650 }
28651 }
28652
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8)28653 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8) {
28654 TEST_REQUIRES_X86_XOP;
28655 for (size_t k = 1; k < 8; k++) {
28656 GemmMicrokernelTester()
28657 .mr(1)
28658 .nr(4)
28659 .kr(2)
28660 .sr(4)
28661 .m(1)
28662 .n(4)
28663 .k(k)
28664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28665 }
28666 }
28667
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_subtile)28668 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_subtile) {
28669 TEST_REQUIRES_X86_XOP;
28670 for (size_t k = 1; k < 8; k++) {
28671 for (uint32_t n = 1; n <= 4; n++) {
28672 for (uint32_t m = 1; m <= 1; m++) {
28673 GemmMicrokernelTester()
28674 .mr(1)
28675 .nr(4)
28676 .kr(2)
28677 .sr(4)
28678 .m(m)
28679 .n(n)
28680 .k(k)
28681 .iterations(1)
28682 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28683 }
28684 }
28685 }
28686 }
28687
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8)28688 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8) {
28689 TEST_REQUIRES_X86_XOP;
28690 for (size_t k = 9; k < 16; k++) {
28691 GemmMicrokernelTester()
28692 .mr(1)
28693 .nr(4)
28694 .kr(2)
28695 .sr(4)
28696 .m(1)
28697 .n(4)
28698 .k(k)
28699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28700 }
28701 }
28702
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_subtile)28703 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_subtile) {
28704 TEST_REQUIRES_X86_XOP;
28705 for (size_t k = 9; k < 16; k++) {
28706 for (uint32_t n = 1; n <= 4; n++) {
28707 for (uint32_t m = 1; m <= 1; m++) {
28708 GemmMicrokernelTester()
28709 .mr(1)
28710 .nr(4)
28711 .kr(2)
28712 .sr(4)
28713 .m(m)
28714 .n(n)
28715 .k(k)
28716 .iterations(1)
28717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28718 }
28719 }
28720 }
28721 }
28722
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8)28723 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8) {
28724 TEST_REQUIRES_X86_XOP;
28725 for (size_t k = 16; k <= 80; k += 8) {
28726 GemmMicrokernelTester()
28727 .mr(1)
28728 .nr(4)
28729 .kr(2)
28730 .sr(4)
28731 .m(1)
28732 .n(4)
28733 .k(k)
28734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28735 }
28736 }
28737
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_subtile)28738 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_subtile) {
28739 TEST_REQUIRES_X86_XOP;
28740 for (size_t k = 16; k <= 80; k += 8) {
28741 for (uint32_t n = 1; n <= 4; n++) {
28742 for (uint32_t m = 1; m <= 1; m++) {
28743 GemmMicrokernelTester()
28744 .mr(1)
28745 .nr(4)
28746 .kr(2)
28747 .sr(4)
28748 .m(m)
28749 .n(n)
28750 .k(k)
28751 .iterations(1)
28752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28753 }
28754 }
28755 }
28756 }
28757
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4)28758 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4) {
28759 TEST_REQUIRES_X86_XOP;
28760 for (uint32_t n = 5; n < 8; n++) {
28761 for (size_t k = 1; k <= 40; k += 9) {
28762 GemmMicrokernelTester()
28763 .mr(1)
28764 .nr(4)
28765 .kr(2)
28766 .sr(4)
28767 .m(1)
28768 .n(n)
28769 .k(k)
28770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28771 }
28772 }
28773 }
28774
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_cn)28775 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
28776 TEST_REQUIRES_X86_XOP;
28777 for (uint32_t n = 5; n < 8; n++) {
28778 for (size_t k = 1; k <= 40; k += 9) {
28779 GemmMicrokernelTester()
28780 .mr(1)
28781 .nr(4)
28782 .kr(2)
28783 .sr(4)
28784 .m(1)
28785 .n(n)
28786 .k(k)
28787 .cn_stride(7)
28788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28789 }
28790 }
28791 }
28792
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_subtile)28793 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_subtile) {
28794 TEST_REQUIRES_X86_XOP;
28795 for (uint32_t n = 5; n < 8; n++) {
28796 for (size_t k = 1; k <= 40; k += 9) {
28797 for (uint32_t m = 1; m <= 1; m++) {
28798 GemmMicrokernelTester()
28799 .mr(1)
28800 .nr(4)
28801 .kr(2)
28802 .sr(4)
28803 .m(m)
28804 .n(n)
28805 .k(k)
28806 .iterations(1)
28807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28808 }
28809 }
28810 }
28811 }
28812
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4)28813 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4) {
28814 TEST_REQUIRES_X86_XOP;
28815 for (uint32_t n = 8; n <= 12; n += 4) {
28816 for (size_t k = 1; k <= 40; k += 9) {
28817 GemmMicrokernelTester()
28818 .mr(1)
28819 .nr(4)
28820 .kr(2)
28821 .sr(4)
28822 .m(1)
28823 .n(n)
28824 .k(k)
28825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28826 }
28827 }
28828 }
28829
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_cn)28830 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_cn) {
28831 TEST_REQUIRES_X86_XOP;
28832 for (uint32_t n = 8; n <= 12; n += 4) {
28833 for (size_t k = 1; k <= 40; k += 9) {
28834 GemmMicrokernelTester()
28835 .mr(1)
28836 .nr(4)
28837 .kr(2)
28838 .sr(4)
28839 .m(1)
28840 .n(n)
28841 .k(k)
28842 .cn_stride(7)
28843 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28844 }
28845 }
28846 }
28847
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_subtile)28848 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_subtile) {
28849 TEST_REQUIRES_X86_XOP;
28850 for (uint32_t n = 8; n <= 12; n += 4) {
28851 for (size_t k = 1; k <= 40; k += 9) {
28852 for (uint32_t m = 1; m <= 1; m++) {
28853 GemmMicrokernelTester()
28854 .mr(1)
28855 .nr(4)
28856 .kr(2)
28857 .sr(4)
28858 .m(m)
28859 .n(n)
28860 .k(k)
28861 .iterations(1)
28862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28863 }
28864 }
28865 }
28866 }
28867
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel)28868 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel) {
28869 TEST_REQUIRES_X86_XOP;
28870 for (size_t k = 1; k <= 40; k += 9) {
28871 GemmMicrokernelTester()
28872 .mr(1)
28873 .nr(4)
28874 .kr(2)
28875 .sr(4)
28876 .m(1)
28877 .n(4)
28878 .k(k)
28879 .ks(3)
28880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28881 }
28882 }
28883
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel_subtile)28884 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel_subtile) {
28885 TEST_REQUIRES_X86_XOP;
28886 for (size_t k = 1; k <= 40; k += 9) {
28887 for (uint32_t n = 1; n <= 4; n++) {
28888 for (uint32_t m = 1; m <= 1; m++) {
28889 GemmMicrokernelTester()
28890 .mr(1)
28891 .nr(4)
28892 .kr(2)
28893 .sr(4)
28894 .m(m)
28895 .n(n)
28896 .k(k)
28897 .ks(3)
28898 .iterations(1)
28899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28900 }
28901 }
28902 }
28903 }
28904
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_small_kernel)28905 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
28906 TEST_REQUIRES_X86_XOP;
28907 for (uint32_t n = 5; n < 8; n++) {
28908 for (size_t k = 1; k <= 40; k += 9) {
28909 GemmMicrokernelTester()
28910 .mr(1)
28911 .nr(4)
28912 .kr(2)
28913 .sr(4)
28914 .m(1)
28915 .n(n)
28916 .k(k)
28917 .ks(3)
28918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28919 }
28920 }
28921 }
28922
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_small_kernel)28923 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_small_kernel) {
28924 TEST_REQUIRES_X86_XOP;
28925 for (uint32_t n = 8; n <= 12; n += 4) {
28926 for (size_t k = 1; k <= 40; k += 9) {
28927 GemmMicrokernelTester()
28928 .mr(1)
28929 .nr(4)
28930 .kr(2)
28931 .sr(4)
28932 .m(1)
28933 .n(n)
28934 .k(k)
28935 .ks(3)
28936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28937 }
28938 }
28939 }
28940
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm_subtile)28941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm_subtile) {
28942 TEST_REQUIRES_X86_XOP;
28943 for (size_t k = 1; k <= 40; k += 9) {
28944 for (uint32_t n = 1; n <= 4; n++) {
28945 for (uint32_t m = 1; m <= 1; m++) {
28946 GemmMicrokernelTester()
28947 .mr(1)
28948 .nr(4)
28949 .kr(2)
28950 .sr(4)
28951 .m(m)
28952 .n(n)
28953 .k(k)
28954 .cm_stride(7)
28955 .iterations(1)
28956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28957 }
28958 }
28959 }
28960 }
28961
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,a_offset)28962 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, a_offset) {
28963 TEST_REQUIRES_X86_XOP;
28964 for (size_t k = 1; k <= 40; k += 9) {
28965 GemmMicrokernelTester()
28966 .mr(1)
28967 .nr(4)
28968 .kr(2)
28969 .sr(4)
28970 .m(1)
28971 .n(4)
28972 .k(k)
28973 .ks(3)
28974 .a_offset(43)
28975 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28976 }
28977 }
28978
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,zero)28979 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, zero) {
28980 TEST_REQUIRES_X86_XOP;
28981 for (size_t k = 1; k <= 40; k += 9) {
28982 for (uint32_t mz = 0; mz < 1; mz++) {
28983 GemmMicrokernelTester()
28984 .mr(1)
28985 .nr(4)
28986 .kr(2)
28987 .sr(4)
28988 .m(1)
28989 .n(4)
28990 .k(k)
28991 .ks(3)
28992 .a_offset(43)
28993 .zero_index(mz)
28994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28995 }
28996 }
28997 }
28998
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmin)28999 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmin) {
29000 TEST_REQUIRES_X86_XOP;
29001 GemmMicrokernelTester()
29002 .mr(1)
29003 .nr(4)
29004 .kr(2)
29005 .sr(4)
29006 .m(1)
29007 .n(4)
29008 .k(8)
29009 .qmin(128)
29010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29011 }
29012
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmax)29013 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmax) {
29014 TEST_REQUIRES_X86_XOP;
29015 GemmMicrokernelTester()
29016 .mr(1)
29017 .nr(4)
29018 .kr(2)
29019 .sr(4)
29020 .m(1)
29021 .n(4)
29022 .k(8)
29023 .qmax(128)
29024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29025 }
29026
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm)29027 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm) {
29028 TEST_REQUIRES_X86_XOP;
29029 GemmMicrokernelTester()
29030 .mr(1)
29031 .nr(4)
29032 .kr(2)
29033 .sr(4)
29034 .m(1)
29035 .n(4)
29036 .k(8)
29037 .cm_stride(7)
29038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29039 }
29040 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29041
29042
29043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8)29044 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8) {
29045 TEST_REQUIRES_X86_XOP;
29046 GemmMicrokernelTester()
29047 .mr(2)
29048 .nr(4)
29049 .kr(2)
29050 .sr(4)
29051 .m(2)
29052 .n(4)
29053 .k(8)
29054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29055 }
29056
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cn)29057 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cn) {
29058 TEST_REQUIRES_X86_XOP;
29059 GemmMicrokernelTester()
29060 .mr(2)
29061 .nr(4)
29062 .kr(2)
29063 .sr(4)
29064 .m(2)
29065 .n(4)
29066 .k(8)
29067 .cn_stride(7)
29068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29069 }
29070
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile)29071 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile) {
29072 TEST_REQUIRES_X86_XOP;
29073 for (uint32_t n = 1; n <= 4; n++) {
29074 for (uint32_t m = 1; m <= 2; m++) {
29075 GemmMicrokernelTester()
29076 .mr(2)
29077 .nr(4)
29078 .kr(2)
29079 .sr(4)
29080 .m(m)
29081 .n(n)
29082 .k(8)
29083 .iterations(1)
29084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29085 }
29086 }
29087 }
29088
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_m)29089 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
29090 TEST_REQUIRES_X86_XOP;
29091 for (uint32_t m = 1; m <= 2; m++) {
29092 GemmMicrokernelTester()
29093 .mr(2)
29094 .nr(4)
29095 .kr(2)
29096 .sr(4)
29097 .m(m)
29098 .n(4)
29099 .k(8)
29100 .iterations(1)
29101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29102 }
29103 }
29104
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_n)29105 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
29106 TEST_REQUIRES_X86_XOP;
29107 for (uint32_t n = 1; n <= 4; n++) {
29108 GemmMicrokernelTester()
29109 .mr(2)
29110 .nr(4)
29111 .kr(2)
29112 .sr(4)
29113 .m(2)
29114 .n(n)
29115 .k(8)
29116 .iterations(1)
29117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29118 }
29119 }
29120
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8)29121 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8) {
29122 TEST_REQUIRES_X86_XOP;
29123 for (size_t k = 1; k < 8; k++) {
29124 GemmMicrokernelTester()
29125 .mr(2)
29126 .nr(4)
29127 .kr(2)
29128 .sr(4)
29129 .m(2)
29130 .n(4)
29131 .k(k)
29132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29133 }
29134 }
29135
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_subtile)29136 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_subtile) {
29137 TEST_REQUIRES_X86_XOP;
29138 for (size_t k = 1; k < 8; k++) {
29139 for (uint32_t n = 1; n <= 4; n++) {
29140 for (uint32_t m = 1; m <= 2; m++) {
29141 GemmMicrokernelTester()
29142 .mr(2)
29143 .nr(4)
29144 .kr(2)
29145 .sr(4)
29146 .m(m)
29147 .n(n)
29148 .k(k)
29149 .iterations(1)
29150 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29151 }
29152 }
29153 }
29154 }
29155
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8)29156 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8) {
29157 TEST_REQUIRES_X86_XOP;
29158 for (size_t k = 9; k < 16; k++) {
29159 GemmMicrokernelTester()
29160 .mr(2)
29161 .nr(4)
29162 .kr(2)
29163 .sr(4)
29164 .m(2)
29165 .n(4)
29166 .k(k)
29167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29168 }
29169 }
29170
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_subtile)29171 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_subtile) {
29172 TEST_REQUIRES_X86_XOP;
29173 for (size_t k = 9; k < 16; k++) {
29174 for (uint32_t n = 1; n <= 4; n++) {
29175 for (uint32_t m = 1; m <= 2; m++) {
29176 GemmMicrokernelTester()
29177 .mr(2)
29178 .nr(4)
29179 .kr(2)
29180 .sr(4)
29181 .m(m)
29182 .n(n)
29183 .k(k)
29184 .iterations(1)
29185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29186 }
29187 }
29188 }
29189 }
29190
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8)29191 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8) {
29192 TEST_REQUIRES_X86_XOP;
29193 for (size_t k = 16; k <= 80; k += 8) {
29194 GemmMicrokernelTester()
29195 .mr(2)
29196 .nr(4)
29197 .kr(2)
29198 .sr(4)
29199 .m(2)
29200 .n(4)
29201 .k(k)
29202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29203 }
29204 }
29205
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_subtile)29206 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_subtile) {
29207 TEST_REQUIRES_X86_XOP;
29208 for (size_t k = 16; k <= 80; k += 8) {
29209 for (uint32_t n = 1; n <= 4; n++) {
29210 for (uint32_t m = 1; m <= 2; m++) {
29211 GemmMicrokernelTester()
29212 .mr(2)
29213 .nr(4)
29214 .kr(2)
29215 .sr(4)
29216 .m(m)
29217 .n(n)
29218 .k(k)
29219 .iterations(1)
29220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29221 }
29222 }
29223 }
29224 }
29225
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4)29226 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4) {
29227 TEST_REQUIRES_X86_XOP;
29228 for (uint32_t n = 5; n < 8; n++) {
29229 for (size_t k = 1; k <= 40; k += 9) {
29230 GemmMicrokernelTester()
29231 .mr(2)
29232 .nr(4)
29233 .kr(2)
29234 .sr(4)
29235 .m(2)
29236 .n(n)
29237 .k(k)
29238 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29239 }
29240 }
29241 }
29242
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_cn)29243 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
29244 TEST_REQUIRES_X86_XOP;
29245 for (uint32_t n = 5; n < 8; n++) {
29246 for (size_t k = 1; k <= 40; k += 9) {
29247 GemmMicrokernelTester()
29248 .mr(2)
29249 .nr(4)
29250 .kr(2)
29251 .sr(4)
29252 .m(2)
29253 .n(n)
29254 .k(k)
29255 .cn_stride(7)
29256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29257 }
29258 }
29259 }
29260
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_subtile)29261 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_subtile) {
29262 TEST_REQUIRES_X86_XOP;
29263 for (uint32_t n = 5; n < 8; n++) {
29264 for (size_t k = 1; k <= 40; k += 9) {
29265 for (uint32_t m = 1; m <= 2; m++) {
29266 GemmMicrokernelTester()
29267 .mr(2)
29268 .nr(4)
29269 .kr(2)
29270 .sr(4)
29271 .m(m)
29272 .n(n)
29273 .k(k)
29274 .iterations(1)
29275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29276 }
29277 }
29278 }
29279 }
29280
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4)29281 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4) {
29282 TEST_REQUIRES_X86_XOP;
29283 for (uint32_t n = 8; n <= 12; n += 4) {
29284 for (size_t k = 1; k <= 40; k += 9) {
29285 GemmMicrokernelTester()
29286 .mr(2)
29287 .nr(4)
29288 .kr(2)
29289 .sr(4)
29290 .m(2)
29291 .n(n)
29292 .k(k)
29293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29294 }
29295 }
29296 }
29297
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_cn)29298 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_cn) {
29299 TEST_REQUIRES_X86_XOP;
29300 for (uint32_t n = 8; n <= 12; n += 4) {
29301 for (size_t k = 1; k <= 40; k += 9) {
29302 GemmMicrokernelTester()
29303 .mr(2)
29304 .nr(4)
29305 .kr(2)
29306 .sr(4)
29307 .m(2)
29308 .n(n)
29309 .k(k)
29310 .cn_stride(7)
29311 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29312 }
29313 }
29314 }
29315
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_subtile)29316 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_subtile) {
29317 TEST_REQUIRES_X86_XOP;
29318 for (uint32_t n = 8; n <= 12; n += 4) {
29319 for (size_t k = 1; k <= 40; k += 9) {
29320 for (uint32_t m = 1; m <= 2; m++) {
29321 GemmMicrokernelTester()
29322 .mr(2)
29323 .nr(4)
29324 .kr(2)
29325 .sr(4)
29326 .m(m)
29327 .n(n)
29328 .k(k)
29329 .iterations(1)
29330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29331 }
29332 }
29333 }
29334 }
29335
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel)29336 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel) {
29337 TEST_REQUIRES_X86_XOP;
29338 for (size_t k = 1; k <= 40; k += 9) {
29339 GemmMicrokernelTester()
29340 .mr(2)
29341 .nr(4)
29342 .kr(2)
29343 .sr(4)
29344 .m(2)
29345 .n(4)
29346 .k(k)
29347 .ks(3)
29348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29349 }
29350 }
29351
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel_subtile)29352 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel_subtile) {
29353 TEST_REQUIRES_X86_XOP;
29354 for (size_t k = 1; k <= 40; k += 9) {
29355 for (uint32_t n = 1; n <= 4; n++) {
29356 for (uint32_t m = 1; m <= 2; m++) {
29357 GemmMicrokernelTester()
29358 .mr(2)
29359 .nr(4)
29360 .kr(2)
29361 .sr(4)
29362 .m(m)
29363 .n(n)
29364 .k(k)
29365 .ks(3)
29366 .iterations(1)
29367 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29368 }
29369 }
29370 }
29371 }
29372
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_small_kernel)29373 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
29374 TEST_REQUIRES_X86_XOP;
29375 for (uint32_t n = 5; n < 8; n++) {
29376 for (size_t k = 1; k <= 40; k += 9) {
29377 GemmMicrokernelTester()
29378 .mr(2)
29379 .nr(4)
29380 .kr(2)
29381 .sr(4)
29382 .m(2)
29383 .n(n)
29384 .k(k)
29385 .ks(3)
29386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29387 }
29388 }
29389 }
29390
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_small_kernel)29391 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_small_kernel) {
29392 TEST_REQUIRES_X86_XOP;
29393 for (uint32_t n = 8; n <= 12; n += 4) {
29394 for (size_t k = 1; k <= 40; k += 9) {
29395 GemmMicrokernelTester()
29396 .mr(2)
29397 .nr(4)
29398 .kr(2)
29399 .sr(4)
29400 .m(2)
29401 .n(n)
29402 .k(k)
29403 .ks(3)
29404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29405 }
29406 }
29407 }
29408
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm_subtile)29409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm_subtile) {
29410 TEST_REQUIRES_X86_XOP;
29411 for (size_t k = 1; k <= 40; k += 9) {
29412 for (uint32_t n = 1; n <= 4; n++) {
29413 for (uint32_t m = 1; m <= 2; m++) {
29414 GemmMicrokernelTester()
29415 .mr(2)
29416 .nr(4)
29417 .kr(2)
29418 .sr(4)
29419 .m(m)
29420 .n(n)
29421 .k(k)
29422 .cm_stride(7)
29423 .iterations(1)
29424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29425 }
29426 }
29427 }
29428 }
29429
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,a_offset)29430 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, a_offset) {
29431 TEST_REQUIRES_X86_XOP;
29432 for (size_t k = 1; k <= 40; k += 9) {
29433 GemmMicrokernelTester()
29434 .mr(2)
29435 .nr(4)
29436 .kr(2)
29437 .sr(4)
29438 .m(2)
29439 .n(4)
29440 .k(k)
29441 .ks(3)
29442 .a_offset(83)
29443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29444 }
29445 }
29446
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,zero)29447 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, zero) {
29448 TEST_REQUIRES_X86_XOP;
29449 for (size_t k = 1; k <= 40; k += 9) {
29450 for (uint32_t mz = 0; mz < 2; mz++) {
29451 GemmMicrokernelTester()
29452 .mr(2)
29453 .nr(4)
29454 .kr(2)
29455 .sr(4)
29456 .m(2)
29457 .n(4)
29458 .k(k)
29459 .ks(3)
29460 .a_offset(83)
29461 .zero_index(mz)
29462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29463 }
29464 }
29465 }
29466
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmin)29467 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmin) {
29468 TEST_REQUIRES_X86_XOP;
29469 GemmMicrokernelTester()
29470 .mr(2)
29471 .nr(4)
29472 .kr(2)
29473 .sr(4)
29474 .m(2)
29475 .n(4)
29476 .k(8)
29477 .qmin(128)
29478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29479 }
29480
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmax)29481 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmax) {
29482 TEST_REQUIRES_X86_XOP;
29483 GemmMicrokernelTester()
29484 .mr(2)
29485 .nr(4)
29486 .kr(2)
29487 .sr(4)
29488 .m(2)
29489 .n(4)
29490 .k(8)
29491 .qmax(128)
29492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29493 }
29494
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm)29495 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm) {
29496 TEST_REQUIRES_X86_XOP;
29497 GemmMicrokernelTester()
29498 .mr(2)
29499 .nr(4)
29500 .kr(2)
29501 .sr(4)
29502 .m(2)
29503 .n(4)
29504 .k(8)
29505 .cm_stride(7)
29506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29507 }
29508 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29509
29510
29511 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8)29512 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8) {
29513 TEST_REQUIRES_X86_AVX;
29514 GemmMicrokernelTester()
29515 .mr(3)
29516 .nr(4)
29517 .kr(2)
29518 .sr(4)
29519 .m(3)
29520 .n(4)
29521 .k(8)
29522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29523 }
29524
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cn)29525 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cn) {
29526 TEST_REQUIRES_X86_AVX;
29527 GemmMicrokernelTester()
29528 .mr(3)
29529 .nr(4)
29530 .kr(2)
29531 .sr(4)
29532 .m(3)
29533 .n(4)
29534 .k(8)
29535 .cn_stride(7)
29536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29537 }
29538
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile)29539 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile) {
29540 TEST_REQUIRES_X86_AVX;
29541 for (uint32_t n = 1; n <= 4; n++) {
29542 for (uint32_t m = 1; m <= 3; m++) {
29543 GemmMicrokernelTester()
29544 .mr(3)
29545 .nr(4)
29546 .kr(2)
29547 .sr(4)
29548 .m(m)
29549 .n(n)
29550 .k(8)
29551 .iterations(1)
29552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29553 }
29554 }
29555 }
29556
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_m)29557 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
29558 TEST_REQUIRES_X86_AVX;
29559 for (uint32_t m = 1; m <= 3; m++) {
29560 GemmMicrokernelTester()
29561 .mr(3)
29562 .nr(4)
29563 .kr(2)
29564 .sr(4)
29565 .m(m)
29566 .n(4)
29567 .k(8)
29568 .iterations(1)
29569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29570 }
29571 }
29572
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_n)29573 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
29574 TEST_REQUIRES_X86_AVX;
29575 for (uint32_t n = 1; n <= 4; n++) {
29576 GemmMicrokernelTester()
29577 .mr(3)
29578 .nr(4)
29579 .kr(2)
29580 .sr(4)
29581 .m(3)
29582 .n(n)
29583 .k(8)
29584 .iterations(1)
29585 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29586 }
29587 }
29588
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8)29589 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8) {
29590 TEST_REQUIRES_X86_AVX;
29591 for (size_t k = 1; k < 8; k++) {
29592 GemmMicrokernelTester()
29593 .mr(3)
29594 .nr(4)
29595 .kr(2)
29596 .sr(4)
29597 .m(3)
29598 .n(4)
29599 .k(k)
29600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29601 }
29602 }
29603
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_subtile)29604 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_subtile) {
29605 TEST_REQUIRES_X86_AVX;
29606 for (size_t k = 1; k < 8; k++) {
29607 for (uint32_t n = 1; n <= 4; n++) {
29608 for (uint32_t m = 1; m <= 3; m++) {
29609 GemmMicrokernelTester()
29610 .mr(3)
29611 .nr(4)
29612 .kr(2)
29613 .sr(4)
29614 .m(m)
29615 .n(n)
29616 .k(k)
29617 .iterations(1)
29618 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29619 }
29620 }
29621 }
29622 }
29623
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8)29624 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8) {
29625 TEST_REQUIRES_X86_AVX;
29626 for (size_t k = 9; k < 16; k++) {
29627 GemmMicrokernelTester()
29628 .mr(3)
29629 .nr(4)
29630 .kr(2)
29631 .sr(4)
29632 .m(3)
29633 .n(4)
29634 .k(k)
29635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29636 }
29637 }
29638
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_subtile)29639 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_subtile) {
29640 TEST_REQUIRES_X86_AVX;
29641 for (size_t k = 9; k < 16; k++) {
29642 for (uint32_t n = 1; n <= 4; n++) {
29643 for (uint32_t m = 1; m <= 3; m++) {
29644 GemmMicrokernelTester()
29645 .mr(3)
29646 .nr(4)
29647 .kr(2)
29648 .sr(4)
29649 .m(m)
29650 .n(n)
29651 .k(k)
29652 .iterations(1)
29653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29654 }
29655 }
29656 }
29657 }
29658
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8)29659 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8) {
29660 TEST_REQUIRES_X86_AVX;
29661 for (size_t k = 16; k <= 80; k += 8) {
29662 GemmMicrokernelTester()
29663 .mr(3)
29664 .nr(4)
29665 .kr(2)
29666 .sr(4)
29667 .m(3)
29668 .n(4)
29669 .k(k)
29670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29671 }
29672 }
29673
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_subtile)29674 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_subtile) {
29675 TEST_REQUIRES_X86_AVX;
29676 for (size_t k = 16; k <= 80; k += 8) {
29677 for (uint32_t n = 1; n <= 4; n++) {
29678 for (uint32_t m = 1; m <= 3; m++) {
29679 GemmMicrokernelTester()
29680 .mr(3)
29681 .nr(4)
29682 .kr(2)
29683 .sr(4)
29684 .m(m)
29685 .n(n)
29686 .k(k)
29687 .iterations(1)
29688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29689 }
29690 }
29691 }
29692 }
29693
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4)29694 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4) {
29695 TEST_REQUIRES_X86_AVX;
29696 for (uint32_t n = 5; n < 8; n++) {
29697 for (size_t k = 1; k <= 40; k += 9) {
29698 GemmMicrokernelTester()
29699 .mr(3)
29700 .nr(4)
29701 .kr(2)
29702 .sr(4)
29703 .m(3)
29704 .n(n)
29705 .k(k)
29706 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29707 }
29708 }
29709 }
29710
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_cn)29711 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
29712 TEST_REQUIRES_X86_AVX;
29713 for (uint32_t n = 5; n < 8; n++) {
29714 for (size_t k = 1; k <= 40; k += 9) {
29715 GemmMicrokernelTester()
29716 .mr(3)
29717 .nr(4)
29718 .kr(2)
29719 .sr(4)
29720 .m(3)
29721 .n(n)
29722 .k(k)
29723 .cn_stride(7)
29724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29725 }
29726 }
29727 }
29728
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_subtile)29729 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_subtile) {
29730 TEST_REQUIRES_X86_AVX;
29731 for (uint32_t n = 5; n < 8; n++) {
29732 for (size_t k = 1; k <= 40; k += 9) {
29733 for (uint32_t m = 1; m <= 3; m++) {
29734 GemmMicrokernelTester()
29735 .mr(3)
29736 .nr(4)
29737 .kr(2)
29738 .sr(4)
29739 .m(m)
29740 .n(n)
29741 .k(k)
29742 .iterations(1)
29743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29744 }
29745 }
29746 }
29747 }
29748
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4)29749 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4) {
29750 TEST_REQUIRES_X86_AVX;
29751 for (uint32_t n = 8; n <= 12; n += 4) {
29752 for (size_t k = 1; k <= 40; k += 9) {
29753 GemmMicrokernelTester()
29754 .mr(3)
29755 .nr(4)
29756 .kr(2)
29757 .sr(4)
29758 .m(3)
29759 .n(n)
29760 .k(k)
29761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29762 }
29763 }
29764 }
29765
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_cn)29766 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_cn) {
29767 TEST_REQUIRES_X86_AVX;
29768 for (uint32_t n = 8; n <= 12; n += 4) {
29769 for (size_t k = 1; k <= 40; k += 9) {
29770 GemmMicrokernelTester()
29771 .mr(3)
29772 .nr(4)
29773 .kr(2)
29774 .sr(4)
29775 .m(3)
29776 .n(n)
29777 .k(k)
29778 .cn_stride(7)
29779 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29780 }
29781 }
29782 }
29783
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_subtile)29784 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_subtile) {
29785 TEST_REQUIRES_X86_AVX;
29786 for (uint32_t n = 8; n <= 12; n += 4) {
29787 for (size_t k = 1; k <= 40; k += 9) {
29788 for (uint32_t m = 1; m <= 3; m++) {
29789 GemmMicrokernelTester()
29790 .mr(3)
29791 .nr(4)
29792 .kr(2)
29793 .sr(4)
29794 .m(m)
29795 .n(n)
29796 .k(k)
29797 .iterations(1)
29798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29799 }
29800 }
29801 }
29802 }
29803
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel)29804 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel) {
29805 TEST_REQUIRES_X86_AVX;
29806 for (size_t k = 1; k <= 40; k += 9) {
29807 GemmMicrokernelTester()
29808 .mr(3)
29809 .nr(4)
29810 .kr(2)
29811 .sr(4)
29812 .m(3)
29813 .n(4)
29814 .k(k)
29815 .ks(3)
29816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29817 }
29818 }
29819
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel_subtile)29820 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel_subtile) {
29821 TEST_REQUIRES_X86_AVX;
29822 for (size_t k = 1; k <= 40; k += 9) {
29823 for (uint32_t n = 1; n <= 4; n++) {
29824 for (uint32_t m = 1; m <= 3; m++) {
29825 GemmMicrokernelTester()
29826 .mr(3)
29827 .nr(4)
29828 .kr(2)
29829 .sr(4)
29830 .m(m)
29831 .n(n)
29832 .k(k)
29833 .ks(3)
29834 .iterations(1)
29835 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29836 }
29837 }
29838 }
29839 }
29840
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_small_kernel)29841 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
29842 TEST_REQUIRES_X86_AVX;
29843 for (uint32_t n = 5; n < 8; n++) {
29844 for (size_t k = 1; k <= 40; k += 9) {
29845 GemmMicrokernelTester()
29846 .mr(3)
29847 .nr(4)
29848 .kr(2)
29849 .sr(4)
29850 .m(3)
29851 .n(n)
29852 .k(k)
29853 .ks(3)
29854 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29855 }
29856 }
29857 }
29858
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_small_kernel)29859 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_small_kernel) {
29860 TEST_REQUIRES_X86_AVX;
29861 for (uint32_t n = 8; n <= 12; n += 4) {
29862 for (size_t k = 1; k <= 40; k += 9) {
29863 GemmMicrokernelTester()
29864 .mr(3)
29865 .nr(4)
29866 .kr(2)
29867 .sr(4)
29868 .m(3)
29869 .n(n)
29870 .k(k)
29871 .ks(3)
29872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29873 }
29874 }
29875 }
29876
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm_subtile)29877 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm_subtile) {
29878 TEST_REQUIRES_X86_AVX;
29879 for (size_t k = 1; k <= 40; k += 9) {
29880 for (uint32_t n = 1; n <= 4; n++) {
29881 for (uint32_t m = 1; m <= 3; m++) {
29882 GemmMicrokernelTester()
29883 .mr(3)
29884 .nr(4)
29885 .kr(2)
29886 .sr(4)
29887 .m(m)
29888 .n(n)
29889 .k(k)
29890 .cm_stride(7)
29891 .iterations(1)
29892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29893 }
29894 }
29895 }
29896 }
29897
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,a_offset)29898 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, a_offset) {
29899 TEST_REQUIRES_X86_AVX;
29900 for (size_t k = 1; k <= 40; k += 9) {
29901 GemmMicrokernelTester()
29902 .mr(3)
29903 .nr(4)
29904 .kr(2)
29905 .sr(4)
29906 .m(3)
29907 .n(4)
29908 .k(k)
29909 .ks(3)
29910 .a_offset(127)
29911 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29912 }
29913 }
29914
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,zero)29915 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, zero) {
29916 TEST_REQUIRES_X86_AVX;
29917 for (size_t k = 1; k <= 40; k += 9) {
29918 for (uint32_t mz = 0; mz < 3; mz++) {
29919 GemmMicrokernelTester()
29920 .mr(3)
29921 .nr(4)
29922 .kr(2)
29923 .sr(4)
29924 .m(3)
29925 .n(4)
29926 .k(k)
29927 .ks(3)
29928 .a_offset(127)
29929 .zero_index(mz)
29930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29931 }
29932 }
29933 }
29934
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmin)29935 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmin) {
29936 TEST_REQUIRES_X86_AVX;
29937 GemmMicrokernelTester()
29938 .mr(3)
29939 .nr(4)
29940 .kr(2)
29941 .sr(4)
29942 .m(3)
29943 .n(4)
29944 .k(8)
29945 .qmin(128)
29946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29947 }
29948
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmax)29949 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmax) {
29950 TEST_REQUIRES_X86_AVX;
29951 GemmMicrokernelTester()
29952 .mr(3)
29953 .nr(4)
29954 .kr(2)
29955 .sr(4)
29956 .m(3)
29957 .n(4)
29958 .k(8)
29959 .qmax(128)
29960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29961 }
29962
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm)29963 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm) {
29964 TEST_REQUIRES_X86_AVX;
29965 GemmMicrokernelTester()
29966 .mr(3)
29967 .nr(4)
29968 .kr(2)
29969 .sr(4)
29970 .m(3)
29971 .n(4)
29972 .k(8)
29973 .cm_stride(7)
29974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29975 }
29976 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29977
29978
29979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8)29980 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8) {
29981 TEST_REQUIRES_X86_AVX;
29982 GemmMicrokernelTester()
29983 .mr(4)
29984 .nr(4)
29985 .kr(2)
29986 .sr(4)
29987 .m(4)
29988 .n(4)
29989 .k(8)
29990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29991 }
29992
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cn)29993 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cn) {
29994 TEST_REQUIRES_X86_AVX;
29995 GemmMicrokernelTester()
29996 .mr(4)
29997 .nr(4)
29998 .kr(2)
29999 .sr(4)
30000 .m(4)
30001 .n(4)
30002 .k(8)
30003 .cn_stride(7)
30004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30005 }
30006
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile)30007 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile) {
30008 TEST_REQUIRES_X86_AVX;
30009 for (uint32_t n = 1; n <= 4; n++) {
30010 for (uint32_t m = 1; m <= 4; m++) {
30011 GemmMicrokernelTester()
30012 .mr(4)
30013 .nr(4)
30014 .kr(2)
30015 .sr(4)
30016 .m(m)
30017 .n(n)
30018 .k(8)
30019 .iterations(1)
30020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30021 }
30022 }
30023 }
30024
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_m)30025 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
30026 TEST_REQUIRES_X86_AVX;
30027 for (uint32_t m = 1; m <= 4; m++) {
30028 GemmMicrokernelTester()
30029 .mr(4)
30030 .nr(4)
30031 .kr(2)
30032 .sr(4)
30033 .m(m)
30034 .n(4)
30035 .k(8)
30036 .iterations(1)
30037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30038 }
30039 }
30040
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_n)30041 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
30042 TEST_REQUIRES_X86_AVX;
30043 for (uint32_t n = 1; n <= 4; n++) {
30044 GemmMicrokernelTester()
30045 .mr(4)
30046 .nr(4)
30047 .kr(2)
30048 .sr(4)
30049 .m(4)
30050 .n(n)
30051 .k(8)
30052 .iterations(1)
30053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30054 }
30055 }
30056
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8)30057 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8) {
30058 TEST_REQUIRES_X86_AVX;
30059 for (size_t k = 1; k < 8; k++) {
30060 GemmMicrokernelTester()
30061 .mr(4)
30062 .nr(4)
30063 .kr(2)
30064 .sr(4)
30065 .m(4)
30066 .n(4)
30067 .k(k)
30068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30069 }
30070 }
30071
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_subtile)30072 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_subtile) {
30073 TEST_REQUIRES_X86_AVX;
30074 for (size_t k = 1; k < 8; k++) {
30075 for (uint32_t n = 1; n <= 4; n++) {
30076 for (uint32_t m = 1; m <= 4; m++) {
30077 GemmMicrokernelTester()
30078 .mr(4)
30079 .nr(4)
30080 .kr(2)
30081 .sr(4)
30082 .m(m)
30083 .n(n)
30084 .k(k)
30085 .iterations(1)
30086 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30087 }
30088 }
30089 }
30090 }
30091
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8)30092 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8) {
30093 TEST_REQUIRES_X86_AVX;
30094 for (size_t k = 9; k < 16; k++) {
30095 GemmMicrokernelTester()
30096 .mr(4)
30097 .nr(4)
30098 .kr(2)
30099 .sr(4)
30100 .m(4)
30101 .n(4)
30102 .k(k)
30103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30104 }
30105 }
30106
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_subtile)30107 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_subtile) {
30108 TEST_REQUIRES_X86_AVX;
30109 for (size_t k = 9; k < 16; k++) {
30110 for (uint32_t n = 1; n <= 4; n++) {
30111 for (uint32_t m = 1; m <= 4; m++) {
30112 GemmMicrokernelTester()
30113 .mr(4)
30114 .nr(4)
30115 .kr(2)
30116 .sr(4)
30117 .m(m)
30118 .n(n)
30119 .k(k)
30120 .iterations(1)
30121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30122 }
30123 }
30124 }
30125 }
30126
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8)30127 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8) {
30128 TEST_REQUIRES_X86_AVX;
30129 for (size_t k = 16; k <= 80; k += 8) {
30130 GemmMicrokernelTester()
30131 .mr(4)
30132 .nr(4)
30133 .kr(2)
30134 .sr(4)
30135 .m(4)
30136 .n(4)
30137 .k(k)
30138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30139 }
30140 }
30141
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_subtile)30142 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_subtile) {
30143 TEST_REQUIRES_X86_AVX;
30144 for (size_t k = 16; k <= 80; k += 8) {
30145 for (uint32_t n = 1; n <= 4; n++) {
30146 for (uint32_t m = 1; m <= 4; m++) {
30147 GemmMicrokernelTester()
30148 .mr(4)
30149 .nr(4)
30150 .kr(2)
30151 .sr(4)
30152 .m(m)
30153 .n(n)
30154 .k(k)
30155 .iterations(1)
30156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30157 }
30158 }
30159 }
30160 }
30161
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4)30162 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4) {
30163 TEST_REQUIRES_X86_AVX;
30164 for (uint32_t n = 5; n < 8; n++) {
30165 for (size_t k = 1; k <= 40; k += 9) {
30166 GemmMicrokernelTester()
30167 .mr(4)
30168 .nr(4)
30169 .kr(2)
30170 .sr(4)
30171 .m(4)
30172 .n(n)
30173 .k(k)
30174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30175 }
30176 }
30177 }
30178
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_cn)30179 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
30180 TEST_REQUIRES_X86_AVX;
30181 for (uint32_t n = 5; n < 8; n++) {
30182 for (size_t k = 1; k <= 40; k += 9) {
30183 GemmMicrokernelTester()
30184 .mr(4)
30185 .nr(4)
30186 .kr(2)
30187 .sr(4)
30188 .m(4)
30189 .n(n)
30190 .k(k)
30191 .cn_stride(7)
30192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30193 }
30194 }
30195 }
30196
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_subtile)30197 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_subtile) {
30198 TEST_REQUIRES_X86_AVX;
30199 for (uint32_t n = 5; n < 8; n++) {
30200 for (size_t k = 1; k <= 40; k += 9) {
30201 for (uint32_t m = 1; m <= 4; m++) {
30202 GemmMicrokernelTester()
30203 .mr(4)
30204 .nr(4)
30205 .kr(2)
30206 .sr(4)
30207 .m(m)
30208 .n(n)
30209 .k(k)
30210 .iterations(1)
30211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30212 }
30213 }
30214 }
30215 }
30216
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4)30217 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4) {
30218 TEST_REQUIRES_X86_AVX;
30219 for (uint32_t n = 8; n <= 12; n += 4) {
30220 for (size_t k = 1; k <= 40; k += 9) {
30221 GemmMicrokernelTester()
30222 .mr(4)
30223 .nr(4)
30224 .kr(2)
30225 .sr(4)
30226 .m(4)
30227 .n(n)
30228 .k(k)
30229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30230 }
30231 }
30232 }
30233
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_cn)30234 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_cn) {
30235 TEST_REQUIRES_X86_AVX;
30236 for (uint32_t n = 8; n <= 12; n += 4) {
30237 for (size_t k = 1; k <= 40; k += 9) {
30238 GemmMicrokernelTester()
30239 .mr(4)
30240 .nr(4)
30241 .kr(2)
30242 .sr(4)
30243 .m(4)
30244 .n(n)
30245 .k(k)
30246 .cn_stride(7)
30247 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30248 }
30249 }
30250 }
30251
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_subtile)30252 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_subtile) {
30253 TEST_REQUIRES_X86_AVX;
30254 for (uint32_t n = 8; n <= 12; n += 4) {
30255 for (size_t k = 1; k <= 40; k += 9) {
30256 for (uint32_t m = 1; m <= 4; m++) {
30257 GemmMicrokernelTester()
30258 .mr(4)
30259 .nr(4)
30260 .kr(2)
30261 .sr(4)
30262 .m(m)
30263 .n(n)
30264 .k(k)
30265 .iterations(1)
30266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30267 }
30268 }
30269 }
30270 }
30271
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel)30272 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel) {
30273 TEST_REQUIRES_X86_AVX;
30274 for (size_t k = 1; k <= 40; k += 9) {
30275 GemmMicrokernelTester()
30276 .mr(4)
30277 .nr(4)
30278 .kr(2)
30279 .sr(4)
30280 .m(4)
30281 .n(4)
30282 .k(k)
30283 .ks(3)
30284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30285 }
30286 }
30287
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel_subtile)30288 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel_subtile) {
30289 TEST_REQUIRES_X86_AVX;
30290 for (size_t k = 1; k <= 40; k += 9) {
30291 for (uint32_t n = 1; n <= 4; n++) {
30292 for (uint32_t m = 1; m <= 4; m++) {
30293 GemmMicrokernelTester()
30294 .mr(4)
30295 .nr(4)
30296 .kr(2)
30297 .sr(4)
30298 .m(m)
30299 .n(n)
30300 .k(k)
30301 .ks(3)
30302 .iterations(1)
30303 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30304 }
30305 }
30306 }
30307 }
30308
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_small_kernel)30309 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
30310 TEST_REQUIRES_X86_AVX;
30311 for (uint32_t n = 5; n < 8; n++) {
30312 for (size_t k = 1; k <= 40; k += 9) {
30313 GemmMicrokernelTester()
30314 .mr(4)
30315 .nr(4)
30316 .kr(2)
30317 .sr(4)
30318 .m(4)
30319 .n(n)
30320 .k(k)
30321 .ks(3)
30322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30323 }
30324 }
30325 }
30326
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_small_kernel)30327 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_small_kernel) {
30328 TEST_REQUIRES_X86_AVX;
30329 for (uint32_t n = 8; n <= 12; n += 4) {
30330 for (size_t k = 1; k <= 40; k += 9) {
30331 GemmMicrokernelTester()
30332 .mr(4)
30333 .nr(4)
30334 .kr(2)
30335 .sr(4)
30336 .m(4)
30337 .n(n)
30338 .k(k)
30339 .ks(3)
30340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30341 }
30342 }
30343 }
30344
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm_subtile)30345 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm_subtile) {
30346 TEST_REQUIRES_X86_AVX;
30347 for (size_t k = 1; k <= 40; k += 9) {
30348 for (uint32_t n = 1; n <= 4; n++) {
30349 for (uint32_t m = 1; m <= 4; m++) {
30350 GemmMicrokernelTester()
30351 .mr(4)
30352 .nr(4)
30353 .kr(2)
30354 .sr(4)
30355 .m(m)
30356 .n(n)
30357 .k(k)
30358 .cm_stride(7)
30359 .iterations(1)
30360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30361 }
30362 }
30363 }
30364 }
30365
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,a_offset)30366 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, a_offset) {
30367 TEST_REQUIRES_X86_AVX;
30368 for (size_t k = 1; k <= 40; k += 9) {
30369 GemmMicrokernelTester()
30370 .mr(4)
30371 .nr(4)
30372 .kr(2)
30373 .sr(4)
30374 .m(4)
30375 .n(4)
30376 .k(k)
30377 .ks(3)
30378 .a_offset(163)
30379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30380 }
30381 }
30382
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,zero)30383 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, zero) {
30384 TEST_REQUIRES_X86_AVX;
30385 for (size_t k = 1; k <= 40; k += 9) {
30386 for (uint32_t mz = 0; mz < 4; mz++) {
30387 GemmMicrokernelTester()
30388 .mr(4)
30389 .nr(4)
30390 .kr(2)
30391 .sr(4)
30392 .m(4)
30393 .n(4)
30394 .k(k)
30395 .ks(3)
30396 .a_offset(163)
30397 .zero_index(mz)
30398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30399 }
30400 }
30401 }
30402
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmin)30403 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmin) {
30404 TEST_REQUIRES_X86_AVX;
30405 GemmMicrokernelTester()
30406 .mr(4)
30407 .nr(4)
30408 .kr(2)
30409 .sr(4)
30410 .m(4)
30411 .n(4)
30412 .k(8)
30413 .qmin(128)
30414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30415 }
30416
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmax)30417 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmax) {
30418 TEST_REQUIRES_X86_AVX;
30419 GemmMicrokernelTester()
30420 .mr(4)
30421 .nr(4)
30422 .kr(2)
30423 .sr(4)
30424 .m(4)
30425 .n(4)
30426 .k(8)
30427 .qmax(128)
30428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30429 }
30430
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm)30431 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm) {
30432 TEST_REQUIRES_X86_AVX;
30433 GemmMicrokernelTester()
30434 .mr(4)
30435 .nr(4)
30436 .kr(2)
30437 .sr(4)
30438 .m(4)
30439 .n(4)
30440 .k(8)
30441 .cm_stride(7)
30442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30443 }
30444 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30445
30446
30447 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_eq_8)30448 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8) {
30449 TEST_REQUIRES_X86_SSSE3;
30450 GemmMicrokernelTester()
30451 .mr(1)
30452 .nr(4)
30453 .kr(8)
30454 .sr(1)
30455 .m(1)
30456 .n(4)
30457 .k(8)
30458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30459 }
30460
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,strided_cn)30461 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, strided_cn) {
30462 TEST_REQUIRES_X86_SSSE3;
30463 GemmMicrokernelTester()
30464 .mr(1)
30465 .nr(4)
30466 .kr(8)
30467 .sr(1)
30468 .m(1)
30469 .n(4)
30470 .k(8)
30471 .cn_stride(7)
30472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30473 }
30474
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_eq_8_subtile)30475 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
30476 TEST_REQUIRES_X86_SSSE3;
30477 for (uint32_t n = 1; n <= 4; n++) {
30478 for (uint32_t m = 1; m <= 1; m++) {
30479 GemmMicrokernelTester()
30480 .mr(1)
30481 .nr(4)
30482 .kr(8)
30483 .sr(1)
30484 .m(m)
30485 .n(n)
30486 .k(8)
30487 .iterations(1)
30488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30489 }
30490 }
30491 }
30492
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_eq_8_subtile_m)30493 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
30494 TEST_REQUIRES_X86_SSSE3;
30495 for (uint32_t m = 1; m <= 1; m++) {
30496 GemmMicrokernelTester()
30497 .mr(1)
30498 .nr(4)
30499 .kr(8)
30500 .sr(1)
30501 .m(m)
30502 .n(4)
30503 .k(8)
30504 .iterations(1)
30505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30506 }
30507 }
30508
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_eq_8_subtile_n)30509 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
30510 TEST_REQUIRES_X86_SSSE3;
30511 for (uint32_t n = 1; n <= 4; n++) {
30512 GemmMicrokernelTester()
30513 .mr(1)
30514 .nr(4)
30515 .kr(8)
30516 .sr(1)
30517 .m(1)
30518 .n(n)
30519 .k(8)
30520 .iterations(1)
30521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30522 }
30523 }
30524
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_lt_8)30525 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_lt_8) {
30526 TEST_REQUIRES_X86_SSSE3;
30527 for (size_t k = 1; k < 8; k++) {
30528 GemmMicrokernelTester()
30529 .mr(1)
30530 .nr(4)
30531 .kr(8)
30532 .sr(1)
30533 .m(1)
30534 .n(4)
30535 .k(k)
30536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30537 }
30538 }
30539
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_lt_8_subtile)30540 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_lt_8_subtile) {
30541 TEST_REQUIRES_X86_SSSE3;
30542 for (size_t k = 1; k < 8; k++) {
30543 for (uint32_t n = 1; n <= 4; n++) {
30544 for (uint32_t m = 1; m <= 1; m++) {
30545 GemmMicrokernelTester()
30546 .mr(1)
30547 .nr(4)
30548 .kr(8)
30549 .sr(1)
30550 .m(m)
30551 .n(n)
30552 .k(k)
30553 .iterations(1)
30554 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30555 }
30556 }
30557 }
30558 }
30559
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_gt_8)30560 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_gt_8) {
30561 TEST_REQUIRES_X86_SSSE3;
30562 for (size_t k = 9; k < 16; k++) {
30563 GemmMicrokernelTester()
30564 .mr(1)
30565 .nr(4)
30566 .kr(8)
30567 .sr(1)
30568 .m(1)
30569 .n(4)
30570 .k(k)
30571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30572 }
30573 }
30574
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_gt_8_subtile)30575 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_gt_8_subtile) {
30576 TEST_REQUIRES_X86_SSSE3;
30577 for (size_t k = 9; k < 16; k++) {
30578 for (uint32_t n = 1; n <= 4; n++) {
30579 for (uint32_t m = 1; m <= 1; m++) {
30580 GemmMicrokernelTester()
30581 .mr(1)
30582 .nr(4)
30583 .kr(8)
30584 .sr(1)
30585 .m(m)
30586 .n(n)
30587 .k(k)
30588 .iterations(1)
30589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30590 }
30591 }
30592 }
30593 }
30594
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_div_8)30595 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_div_8) {
30596 TEST_REQUIRES_X86_SSSE3;
30597 for (size_t k = 16; k <= 80; k += 8) {
30598 GemmMicrokernelTester()
30599 .mr(1)
30600 .nr(4)
30601 .kr(8)
30602 .sr(1)
30603 .m(1)
30604 .n(4)
30605 .k(k)
30606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30607 }
30608 }
30609
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,k_div_8_subtile)30610 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_div_8_subtile) {
30611 TEST_REQUIRES_X86_SSSE3;
30612 for (size_t k = 16; k <= 80; k += 8) {
30613 for (uint32_t n = 1; n <= 4; n++) {
30614 for (uint32_t m = 1; m <= 1; m++) {
30615 GemmMicrokernelTester()
30616 .mr(1)
30617 .nr(4)
30618 .kr(8)
30619 .sr(1)
30620 .m(m)
30621 .n(n)
30622 .k(k)
30623 .iterations(1)
30624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30625 }
30626 }
30627 }
30628 }
30629
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_gt_4)30630 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4) {
30631 TEST_REQUIRES_X86_SSSE3;
30632 for (uint32_t n = 5; n < 8; n++) {
30633 for (size_t k = 1; k <= 40; k += 9) {
30634 GemmMicrokernelTester()
30635 .mr(1)
30636 .nr(4)
30637 .kr(8)
30638 .sr(1)
30639 .m(1)
30640 .n(n)
30641 .k(k)
30642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30643 }
30644 }
30645 }
30646
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_gt_4_strided_cn)30647 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
30648 TEST_REQUIRES_X86_SSSE3;
30649 for (uint32_t n = 5; n < 8; n++) {
30650 for (size_t k = 1; k <= 40; k += 9) {
30651 GemmMicrokernelTester()
30652 .mr(1)
30653 .nr(4)
30654 .kr(8)
30655 .sr(1)
30656 .m(1)
30657 .n(n)
30658 .k(k)
30659 .cn_stride(7)
30660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30661 }
30662 }
30663 }
30664
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_gt_4_subtile)30665 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4_subtile) {
30666 TEST_REQUIRES_X86_SSSE3;
30667 for (uint32_t n = 5; n < 8; n++) {
30668 for (size_t k = 1; k <= 40; k += 9) {
30669 for (uint32_t m = 1; m <= 1; m++) {
30670 GemmMicrokernelTester()
30671 .mr(1)
30672 .nr(4)
30673 .kr(8)
30674 .sr(1)
30675 .m(m)
30676 .n(n)
30677 .k(k)
30678 .iterations(1)
30679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30680 }
30681 }
30682 }
30683 }
30684
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_div_4)30685 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4) {
30686 TEST_REQUIRES_X86_SSSE3;
30687 for (uint32_t n = 8; n <= 12; n += 4) {
30688 for (size_t k = 1; k <= 40; k += 9) {
30689 GemmMicrokernelTester()
30690 .mr(1)
30691 .nr(4)
30692 .kr(8)
30693 .sr(1)
30694 .m(1)
30695 .n(n)
30696 .k(k)
30697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30698 }
30699 }
30700 }
30701
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_div_4_strided_cn)30702 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4_strided_cn) {
30703 TEST_REQUIRES_X86_SSSE3;
30704 for (uint32_t n = 8; n <= 12; n += 4) {
30705 for (size_t k = 1; k <= 40; k += 9) {
30706 GemmMicrokernelTester()
30707 .mr(1)
30708 .nr(4)
30709 .kr(8)
30710 .sr(1)
30711 .m(1)
30712 .n(n)
30713 .k(k)
30714 .cn_stride(7)
30715 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30716 }
30717 }
30718 }
30719
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_div_4_subtile)30720 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4_subtile) {
30721 TEST_REQUIRES_X86_SSSE3;
30722 for (uint32_t n = 8; n <= 12; n += 4) {
30723 for (size_t k = 1; k <= 40; k += 9) {
30724 for (uint32_t m = 1; m <= 1; m++) {
30725 GemmMicrokernelTester()
30726 .mr(1)
30727 .nr(4)
30728 .kr(8)
30729 .sr(1)
30730 .m(m)
30731 .n(n)
30732 .k(k)
30733 .iterations(1)
30734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30735 }
30736 }
30737 }
30738 }
30739
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,small_kernel)30740 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, small_kernel) {
30741 TEST_REQUIRES_X86_SSSE3;
30742 for (size_t k = 1; k <= 40; k += 9) {
30743 GemmMicrokernelTester()
30744 .mr(1)
30745 .nr(4)
30746 .kr(8)
30747 .sr(1)
30748 .m(1)
30749 .n(4)
30750 .k(k)
30751 .ks(3)
30752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30753 }
30754 }
30755
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,small_kernel_subtile)30756 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, small_kernel_subtile) {
30757 TEST_REQUIRES_X86_SSSE3;
30758 for (size_t k = 1; k <= 40; k += 9) {
30759 for (uint32_t n = 1; n <= 4; n++) {
30760 for (uint32_t m = 1; m <= 1; m++) {
30761 GemmMicrokernelTester()
30762 .mr(1)
30763 .nr(4)
30764 .kr(8)
30765 .sr(1)
30766 .m(m)
30767 .n(n)
30768 .k(k)
30769 .ks(3)
30770 .iterations(1)
30771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30772 }
30773 }
30774 }
30775 }
30776
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_gt_4_small_kernel)30777 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
30778 TEST_REQUIRES_X86_SSSE3;
30779 for (uint32_t n = 5; n < 8; n++) {
30780 for (size_t k = 1; k <= 40; k += 9) {
30781 GemmMicrokernelTester()
30782 .mr(1)
30783 .nr(4)
30784 .kr(8)
30785 .sr(1)
30786 .m(1)
30787 .n(n)
30788 .k(k)
30789 .ks(3)
30790 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30791 }
30792 }
30793 }
30794
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,n_div_4_small_kernel)30795 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4_small_kernel) {
30796 TEST_REQUIRES_X86_SSSE3;
30797 for (uint32_t n = 8; n <= 12; n += 4) {
30798 for (size_t k = 1; k <= 40; k += 9) {
30799 GemmMicrokernelTester()
30800 .mr(1)
30801 .nr(4)
30802 .kr(8)
30803 .sr(1)
30804 .m(1)
30805 .n(n)
30806 .k(k)
30807 .ks(3)
30808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30809 }
30810 }
30811 }
30812
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,strided_cm_subtile)30813 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, strided_cm_subtile) {
30814 TEST_REQUIRES_X86_SSSE3;
30815 for (size_t k = 1; k <= 40; k += 9) {
30816 for (uint32_t n = 1; n <= 4; n++) {
30817 for (uint32_t m = 1; m <= 1; m++) {
30818 GemmMicrokernelTester()
30819 .mr(1)
30820 .nr(4)
30821 .kr(8)
30822 .sr(1)
30823 .m(m)
30824 .n(n)
30825 .k(k)
30826 .cm_stride(7)
30827 .iterations(1)
30828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30829 }
30830 }
30831 }
30832 }
30833
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,a_offset)30834 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, a_offset) {
30835 TEST_REQUIRES_X86_SSSE3;
30836 for (size_t k = 1; k <= 40; k += 9) {
30837 GemmMicrokernelTester()
30838 .mr(1)
30839 .nr(4)
30840 .kr(8)
30841 .sr(1)
30842 .m(1)
30843 .n(4)
30844 .k(k)
30845 .ks(3)
30846 .a_offset(43)
30847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30848 }
30849 }
30850
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,zero)30851 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, zero) {
30852 TEST_REQUIRES_X86_SSSE3;
30853 for (size_t k = 1; k <= 40; k += 9) {
30854 for (uint32_t mz = 0; mz < 1; mz++) {
30855 GemmMicrokernelTester()
30856 .mr(1)
30857 .nr(4)
30858 .kr(8)
30859 .sr(1)
30860 .m(1)
30861 .n(4)
30862 .k(k)
30863 .ks(3)
30864 .a_offset(43)
30865 .zero_index(mz)
30866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30867 }
30868 }
30869 }
30870
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,qmin)30871 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, qmin) {
30872 TEST_REQUIRES_X86_SSSE3;
30873 GemmMicrokernelTester()
30874 .mr(1)
30875 .nr(4)
30876 .kr(8)
30877 .sr(1)
30878 .m(1)
30879 .n(4)
30880 .k(8)
30881 .qmin(128)
30882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30883 }
30884
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,qmax)30885 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, qmax) {
30886 TEST_REQUIRES_X86_SSSE3;
30887 GemmMicrokernelTester()
30888 .mr(1)
30889 .nr(4)
30890 .kr(8)
30891 .sr(1)
30892 .m(1)
30893 .n(4)
30894 .k(8)
30895 .qmax(128)
30896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30897 }
30898
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64,strided_cm)30899 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, strided_cm) {
30900 TEST_REQUIRES_X86_SSSE3;
30901 GemmMicrokernelTester()
30902 .mr(1)
30903 .nr(4)
30904 .kr(8)
30905 .sr(1)
30906 .m(1)
30907 .n(4)
30908 .k(8)
30909 .cm_stride(7)
30910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30911 }
30912 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30913
30914
30915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_eq_8)30916 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8) {
30917 TEST_REQUIRES_X86_SSSE3;
30918 GemmMicrokernelTester()
30919 .mr(2)
30920 .nr(4)
30921 .kr(8)
30922 .sr(1)
30923 .m(2)
30924 .n(4)
30925 .k(8)
30926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30927 }
30928
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,strided_cn)30929 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, strided_cn) {
30930 TEST_REQUIRES_X86_SSSE3;
30931 GemmMicrokernelTester()
30932 .mr(2)
30933 .nr(4)
30934 .kr(8)
30935 .sr(1)
30936 .m(2)
30937 .n(4)
30938 .k(8)
30939 .cn_stride(7)
30940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30941 }
30942
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_eq_8_subtile)30943 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
30944 TEST_REQUIRES_X86_SSSE3;
30945 for (uint32_t n = 1; n <= 4; n++) {
30946 for (uint32_t m = 1; m <= 2; m++) {
30947 GemmMicrokernelTester()
30948 .mr(2)
30949 .nr(4)
30950 .kr(8)
30951 .sr(1)
30952 .m(m)
30953 .n(n)
30954 .k(8)
30955 .iterations(1)
30956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30957 }
30958 }
30959 }
30960
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_eq_8_subtile_m)30961 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
30962 TEST_REQUIRES_X86_SSSE3;
30963 for (uint32_t m = 1; m <= 2; m++) {
30964 GemmMicrokernelTester()
30965 .mr(2)
30966 .nr(4)
30967 .kr(8)
30968 .sr(1)
30969 .m(m)
30970 .n(4)
30971 .k(8)
30972 .iterations(1)
30973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30974 }
30975 }
30976
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_eq_8_subtile_n)30977 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
30978 TEST_REQUIRES_X86_SSSE3;
30979 for (uint32_t n = 1; n <= 4; n++) {
30980 GemmMicrokernelTester()
30981 .mr(2)
30982 .nr(4)
30983 .kr(8)
30984 .sr(1)
30985 .m(2)
30986 .n(n)
30987 .k(8)
30988 .iterations(1)
30989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30990 }
30991 }
30992
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_lt_8)30993 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_lt_8) {
30994 TEST_REQUIRES_X86_SSSE3;
30995 for (size_t k = 1; k < 8; k++) {
30996 GemmMicrokernelTester()
30997 .mr(2)
30998 .nr(4)
30999 .kr(8)
31000 .sr(1)
31001 .m(2)
31002 .n(4)
31003 .k(k)
31004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31005 }
31006 }
31007
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_lt_8_subtile)31008 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_lt_8_subtile) {
31009 TEST_REQUIRES_X86_SSSE3;
31010 for (size_t k = 1; k < 8; k++) {
31011 for (uint32_t n = 1; n <= 4; n++) {
31012 for (uint32_t m = 1; m <= 2; m++) {
31013 GemmMicrokernelTester()
31014 .mr(2)
31015 .nr(4)
31016 .kr(8)
31017 .sr(1)
31018 .m(m)
31019 .n(n)
31020 .k(k)
31021 .iterations(1)
31022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31023 }
31024 }
31025 }
31026 }
31027
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_gt_8)31028 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_gt_8) {
31029 TEST_REQUIRES_X86_SSSE3;
31030 for (size_t k = 9; k < 16; k++) {
31031 GemmMicrokernelTester()
31032 .mr(2)
31033 .nr(4)
31034 .kr(8)
31035 .sr(1)
31036 .m(2)
31037 .n(4)
31038 .k(k)
31039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31040 }
31041 }
31042
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_gt_8_subtile)31043 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_gt_8_subtile) {
31044 TEST_REQUIRES_X86_SSSE3;
31045 for (size_t k = 9; k < 16; k++) {
31046 for (uint32_t n = 1; n <= 4; n++) {
31047 for (uint32_t m = 1; m <= 2; m++) {
31048 GemmMicrokernelTester()
31049 .mr(2)
31050 .nr(4)
31051 .kr(8)
31052 .sr(1)
31053 .m(m)
31054 .n(n)
31055 .k(k)
31056 .iterations(1)
31057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31058 }
31059 }
31060 }
31061 }
31062
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_div_8)31063 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_div_8) {
31064 TEST_REQUIRES_X86_SSSE3;
31065 for (size_t k = 16; k <= 80; k += 8) {
31066 GemmMicrokernelTester()
31067 .mr(2)
31068 .nr(4)
31069 .kr(8)
31070 .sr(1)
31071 .m(2)
31072 .n(4)
31073 .k(k)
31074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31075 }
31076 }
31077
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,k_div_8_subtile)31078 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_div_8_subtile) {
31079 TEST_REQUIRES_X86_SSSE3;
31080 for (size_t k = 16; k <= 80; k += 8) {
31081 for (uint32_t n = 1; n <= 4; n++) {
31082 for (uint32_t m = 1; m <= 2; m++) {
31083 GemmMicrokernelTester()
31084 .mr(2)
31085 .nr(4)
31086 .kr(8)
31087 .sr(1)
31088 .m(m)
31089 .n(n)
31090 .k(k)
31091 .iterations(1)
31092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31093 }
31094 }
31095 }
31096 }
31097
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_gt_4)31098 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4) {
31099 TEST_REQUIRES_X86_SSSE3;
31100 for (uint32_t n = 5; n < 8; n++) {
31101 for (size_t k = 1; k <= 40; k += 9) {
31102 GemmMicrokernelTester()
31103 .mr(2)
31104 .nr(4)
31105 .kr(8)
31106 .sr(1)
31107 .m(2)
31108 .n(n)
31109 .k(k)
31110 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31111 }
31112 }
31113 }
31114
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_gt_4_strided_cn)31115 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
31116 TEST_REQUIRES_X86_SSSE3;
31117 for (uint32_t n = 5; n < 8; n++) {
31118 for (size_t k = 1; k <= 40; k += 9) {
31119 GemmMicrokernelTester()
31120 .mr(2)
31121 .nr(4)
31122 .kr(8)
31123 .sr(1)
31124 .m(2)
31125 .n(n)
31126 .k(k)
31127 .cn_stride(7)
31128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31129 }
31130 }
31131 }
31132
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_gt_4_subtile)31133 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4_subtile) {
31134 TEST_REQUIRES_X86_SSSE3;
31135 for (uint32_t n = 5; n < 8; n++) {
31136 for (size_t k = 1; k <= 40; k += 9) {
31137 for (uint32_t m = 1; m <= 2; m++) {
31138 GemmMicrokernelTester()
31139 .mr(2)
31140 .nr(4)
31141 .kr(8)
31142 .sr(1)
31143 .m(m)
31144 .n(n)
31145 .k(k)
31146 .iterations(1)
31147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31148 }
31149 }
31150 }
31151 }
31152
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_div_4)31153 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4) {
31154 TEST_REQUIRES_X86_SSSE3;
31155 for (uint32_t n = 8; n <= 12; n += 4) {
31156 for (size_t k = 1; k <= 40; k += 9) {
31157 GemmMicrokernelTester()
31158 .mr(2)
31159 .nr(4)
31160 .kr(8)
31161 .sr(1)
31162 .m(2)
31163 .n(n)
31164 .k(k)
31165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31166 }
31167 }
31168 }
31169
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_div_4_strided_cn)31170 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4_strided_cn) {
31171 TEST_REQUIRES_X86_SSSE3;
31172 for (uint32_t n = 8; n <= 12; n += 4) {
31173 for (size_t k = 1; k <= 40; k += 9) {
31174 GemmMicrokernelTester()
31175 .mr(2)
31176 .nr(4)
31177 .kr(8)
31178 .sr(1)
31179 .m(2)
31180 .n(n)
31181 .k(k)
31182 .cn_stride(7)
31183 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31184 }
31185 }
31186 }
31187
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_div_4_subtile)31188 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4_subtile) {
31189 TEST_REQUIRES_X86_SSSE3;
31190 for (uint32_t n = 8; n <= 12; n += 4) {
31191 for (size_t k = 1; k <= 40; k += 9) {
31192 for (uint32_t m = 1; m <= 2; m++) {
31193 GemmMicrokernelTester()
31194 .mr(2)
31195 .nr(4)
31196 .kr(8)
31197 .sr(1)
31198 .m(m)
31199 .n(n)
31200 .k(k)
31201 .iterations(1)
31202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31203 }
31204 }
31205 }
31206 }
31207
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,small_kernel)31208 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, small_kernel) {
31209 TEST_REQUIRES_X86_SSSE3;
31210 for (size_t k = 1; k <= 40; k += 9) {
31211 GemmMicrokernelTester()
31212 .mr(2)
31213 .nr(4)
31214 .kr(8)
31215 .sr(1)
31216 .m(2)
31217 .n(4)
31218 .k(k)
31219 .ks(3)
31220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31221 }
31222 }
31223
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,small_kernel_subtile)31224 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, small_kernel_subtile) {
31225 TEST_REQUIRES_X86_SSSE3;
31226 for (size_t k = 1; k <= 40; k += 9) {
31227 for (uint32_t n = 1; n <= 4; n++) {
31228 for (uint32_t m = 1; m <= 2; m++) {
31229 GemmMicrokernelTester()
31230 .mr(2)
31231 .nr(4)
31232 .kr(8)
31233 .sr(1)
31234 .m(m)
31235 .n(n)
31236 .k(k)
31237 .ks(3)
31238 .iterations(1)
31239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31240 }
31241 }
31242 }
31243 }
31244
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_gt_4_small_kernel)31245 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
31246 TEST_REQUIRES_X86_SSSE3;
31247 for (uint32_t n = 5; n < 8; n++) {
31248 for (size_t k = 1; k <= 40; k += 9) {
31249 GemmMicrokernelTester()
31250 .mr(2)
31251 .nr(4)
31252 .kr(8)
31253 .sr(1)
31254 .m(2)
31255 .n(n)
31256 .k(k)
31257 .ks(3)
31258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31259 }
31260 }
31261 }
31262
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,n_div_4_small_kernel)31263 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4_small_kernel) {
31264 TEST_REQUIRES_X86_SSSE3;
31265 for (uint32_t n = 8; n <= 12; n += 4) {
31266 for (size_t k = 1; k <= 40; k += 9) {
31267 GemmMicrokernelTester()
31268 .mr(2)
31269 .nr(4)
31270 .kr(8)
31271 .sr(1)
31272 .m(2)
31273 .n(n)
31274 .k(k)
31275 .ks(3)
31276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31277 }
31278 }
31279 }
31280
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,strided_cm_subtile)31281 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, strided_cm_subtile) {
31282 TEST_REQUIRES_X86_SSSE3;
31283 for (size_t k = 1; k <= 40; k += 9) {
31284 for (uint32_t n = 1; n <= 4; n++) {
31285 for (uint32_t m = 1; m <= 2; m++) {
31286 GemmMicrokernelTester()
31287 .mr(2)
31288 .nr(4)
31289 .kr(8)
31290 .sr(1)
31291 .m(m)
31292 .n(n)
31293 .k(k)
31294 .cm_stride(7)
31295 .iterations(1)
31296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31297 }
31298 }
31299 }
31300 }
31301
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,a_offset)31302 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, a_offset) {
31303 TEST_REQUIRES_X86_SSSE3;
31304 for (size_t k = 1; k <= 40; k += 9) {
31305 GemmMicrokernelTester()
31306 .mr(2)
31307 .nr(4)
31308 .kr(8)
31309 .sr(1)
31310 .m(2)
31311 .n(4)
31312 .k(k)
31313 .ks(3)
31314 .a_offset(83)
31315 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31316 }
31317 }
31318
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,zero)31319 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, zero) {
31320 TEST_REQUIRES_X86_SSSE3;
31321 for (size_t k = 1; k <= 40; k += 9) {
31322 for (uint32_t mz = 0; mz < 2; mz++) {
31323 GemmMicrokernelTester()
31324 .mr(2)
31325 .nr(4)
31326 .kr(8)
31327 .sr(1)
31328 .m(2)
31329 .n(4)
31330 .k(k)
31331 .ks(3)
31332 .a_offset(83)
31333 .zero_index(mz)
31334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31335 }
31336 }
31337 }
31338
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,qmin)31339 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, qmin) {
31340 TEST_REQUIRES_X86_SSSE3;
31341 GemmMicrokernelTester()
31342 .mr(2)
31343 .nr(4)
31344 .kr(8)
31345 .sr(1)
31346 .m(2)
31347 .n(4)
31348 .k(8)
31349 .qmin(128)
31350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31351 }
31352
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,qmax)31353 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, qmax) {
31354 TEST_REQUIRES_X86_SSSE3;
31355 GemmMicrokernelTester()
31356 .mr(2)
31357 .nr(4)
31358 .kr(8)
31359 .sr(1)
31360 .m(2)
31361 .n(4)
31362 .k(8)
31363 .qmax(128)
31364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31365 }
31366
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64,strided_cm)31367 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, strided_cm) {
31368 TEST_REQUIRES_X86_SSSE3;
31369 GemmMicrokernelTester()
31370 .mr(2)
31371 .nr(4)
31372 .kr(8)
31373 .sr(1)
31374 .m(2)
31375 .n(4)
31376 .k(8)
31377 .cm_stride(7)
31378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31379 }
31380 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31381
31382
31383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8)31384 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
31385 TEST_REQUIRES_X86_SSE2;
31386 GemmMicrokernelTester()
31387 .mr(3)
31388 .nr(4)
31389 .kr(8)
31390 .sr(1)
31391 .m(3)
31392 .n(4)
31393 .k(8)
31394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31395 }
31396
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cn)31397 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
31398 TEST_REQUIRES_X86_SSE2;
31399 GemmMicrokernelTester()
31400 .mr(3)
31401 .nr(4)
31402 .kr(8)
31403 .sr(1)
31404 .m(3)
31405 .n(4)
31406 .k(8)
31407 .cn_stride(7)
31408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31409 }
31410
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile)31411 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
31412 TEST_REQUIRES_X86_SSE2;
31413 for (uint32_t n = 1; n <= 4; n++) {
31414 for (uint32_t m = 1; m <= 3; m++) {
31415 GemmMicrokernelTester()
31416 .mr(3)
31417 .nr(4)
31418 .kr(8)
31419 .sr(1)
31420 .m(m)
31421 .n(n)
31422 .k(8)
31423 .iterations(1)
31424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31425 }
31426 }
31427 }
31428
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_m)31429 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
31430 TEST_REQUIRES_X86_SSE2;
31431 for (uint32_t m = 1; m <= 3; m++) {
31432 GemmMicrokernelTester()
31433 .mr(3)
31434 .nr(4)
31435 .kr(8)
31436 .sr(1)
31437 .m(m)
31438 .n(4)
31439 .k(8)
31440 .iterations(1)
31441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31442 }
31443 }
31444
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_n)31445 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
31446 TEST_REQUIRES_X86_SSE2;
31447 for (uint32_t n = 1; n <= 4; n++) {
31448 GemmMicrokernelTester()
31449 .mr(3)
31450 .nr(4)
31451 .kr(8)
31452 .sr(1)
31453 .m(3)
31454 .n(n)
31455 .k(8)
31456 .iterations(1)
31457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31458 }
31459 }
31460
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8)31461 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
31462 TEST_REQUIRES_X86_SSE2;
31463 for (size_t k = 1; k < 8; k++) {
31464 GemmMicrokernelTester()
31465 .mr(3)
31466 .nr(4)
31467 .kr(8)
31468 .sr(1)
31469 .m(3)
31470 .n(4)
31471 .k(k)
31472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31473 }
31474 }
31475
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8_subtile)31476 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
31477 TEST_REQUIRES_X86_SSE2;
31478 for (size_t k = 1; k < 8; k++) {
31479 for (uint32_t n = 1; n <= 4; n++) {
31480 for (uint32_t m = 1; m <= 3; m++) {
31481 GemmMicrokernelTester()
31482 .mr(3)
31483 .nr(4)
31484 .kr(8)
31485 .sr(1)
31486 .m(m)
31487 .n(n)
31488 .k(k)
31489 .iterations(1)
31490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31491 }
31492 }
31493 }
31494 }
31495
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8)31496 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
31497 TEST_REQUIRES_X86_SSE2;
31498 for (size_t k = 9; k < 16; k++) {
31499 GemmMicrokernelTester()
31500 .mr(3)
31501 .nr(4)
31502 .kr(8)
31503 .sr(1)
31504 .m(3)
31505 .n(4)
31506 .k(k)
31507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31508 }
31509 }
31510
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8_subtile)31511 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
31512 TEST_REQUIRES_X86_SSE2;
31513 for (size_t k = 9; k < 16; k++) {
31514 for (uint32_t n = 1; n <= 4; n++) {
31515 for (uint32_t m = 1; m <= 3; m++) {
31516 GemmMicrokernelTester()
31517 .mr(3)
31518 .nr(4)
31519 .kr(8)
31520 .sr(1)
31521 .m(m)
31522 .n(n)
31523 .k(k)
31524 .iterations(1)
31525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31526 }
31527 }
31528 }
31529 }
31530
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8)31531 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
31532 TEST_REQUIRES_X86_SSE2;
31533 for (size_t k = 16; k <= 80; k += 8) {
31534 GemmMicrokernelTester()
31535 .mr(3)
31536 .nr(4)
31537 .kr(8)
31538 .sr(1)
31539 .m(3)
31540 .n(4)
31541 .k(k)
31542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31543 }
31544 }
31545
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8_subtile)31546 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
31547 TEST_REQUIRES_X86_SSE2;
31548 for (size_t k = 16; k <= 80; k += 8) {
31549 for (uint32_t n = 1; n <= 4; n++) {
31550 for (uint32_t m = 1; m <= 3; m++) {
31551 GemmMicrokernelTester()
31552 .mr(3)
31553 .nr(4)
31554 .kr(8)
31555 .sr(1)
31556 .m(m)
31557 .n(n)
31558 .k(k)
31559 .iterations(1)
31560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31561 }
31562 }
31563 }
31564 }
31565
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4)31566 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
31567 TEST_REQUIRES_X86_SSE2;
31568 for (uint32_t n = 5; n < 8; n++) {
31569 for (size_t k = 1; k <= 40; k += 9) {
31570 GemmMicrokernelTester()
31571 .mr(3)
31572 .nr(4)
31573 .kr(8)
31574 .sr(1)
31575 .m(3)
31576 .n(n)
31577 .k(k)
31578 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31579 }
31580 }
31581 }
31582
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_strided_cn)31583 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
31584 TEST_REQUIRES_X86_SSE2;
31585 for (uint32_t n = 5; n < 8; n++) {
31586 for (size_t k = 1; k <= 40; k += 9) {
31587 GemmMicrokernelTester()
31588 .mr(3)
31589 .nr(4)
31590 .kr(8)
31591 .sr(1)
31592 .m(3)
31593 .n(n)
31594 .k(k)
31595 .cn_stride(7)
31596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31597 }
31598 }
31599 }
31600
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_subtile)31601 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
31602 TEST_REQUIRES_X86_SSE2;
31603 for (uint32_t n = 5; n < 8; n++) {
31604 for (size_t k = 1; k <= 40; k += 9) {
31605 for (uint32_t m = 1; m <= 3; m++) {
31606 GemmMicrokernelTester()
31607 .mr(3)
31608 .nr(4)
31609 .kr(8)
31610 .sr(1)
31611 .m(m)
31612 .n(n)
31613 .k(k)
31614 .iterations(1)
31615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31616 }
31617 }
31618 }
31619 }
31620
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4)31621 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
31622 TEST_REQUIRES_X86_SSE2;
31623 for (uint32_t n = 8; n <= 12; n += 4) {
31624 for (size_t k = 1; k <= 40; k += 9) {
31625 GemmMicrokernelTester()
31626 .mr(3)
31627 .nr(4)
31628 .kr(8)
31629 .sr(1)
31630 .m(3)
31631 .n(n)
31632 .k(k)
31633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31634 }
31635 }
31636 }
31637
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_strided_cn)31638 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
31639 TEST_REQUIRES_X86_SSE2;
31640 for (uint32_t n = 8; n <= 12; n += 4) {
31641 for (size_t k = 1; k <= 40; k += 9) {
31642 GemmMicrokernelTester()
31643 .mr(3)
31644 .nr(4)
31645 .kr(8)
31646 .sr(1)
31647 .m(3)
31648 .n(n)
31649 .k(k)
31650 .cn_stride(7)
31651 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31652 }
31653 }
31654 }
31655
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_subtile)31656 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
31657 TEST_REQUIRES_X86_SSE2;
31658 for (uint32_t n = 8; n <= 12; n += 4) {
31659 for (size_t k = 1; k <= 40; k += 9) {
31660 for (uint32_t m = 1; m <= 3; m++) {
31661 GemmMicrokernelTester()
31662 .mr(3)
31663 .nr(4)
31664 .kr(8)
31665 .sr(1)
31666 .m(m)
31667 .n(n)
31668 .k(k)
31669 .iterations(1)
31670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31671 }
31672 }
31673 }
31674 }
31675
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel)31676 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel) {
31677 TEST_REQUIRES_X86_SSE2;
31678 for (size_t k = 1; k <= 40; k += 9) {
31679 GemmMicrokernelTester()
31680 .mr(3)
31681 .nr(4)
31682 .kr(8)
31683 .sr(1)
31684 .m(3)
31685 .n(4)
31686 .k(k)
31687 .ks(3)
31688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31689 }
31690 }
31691
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel_subtile)31692 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel_subtile) {
31693 TEST_REQUIRES_X86_SSE2;
31694 for (size_t k = 1; k <= 40; k += 9) {
31695 for (uint32_t n = 1; n <= 4; n++) {
31696 for (uint32_t m = 1; m <= 3; m++) {
31697 GemmMicrokernelTester()
31698 .mr(3)
31699 .nr(4)
31700 .kr(8)
31701 .sr(1)
31702 .m(m)
31703 .n(n)
31704 .k(k)
31705 .ks(3)
31706 .iterations(1)
31707 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31708 }
31709 }
31710 }
31711 }
31712
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_small_kernel)31713 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
31714 TEST_REQUIRES_X86_SSE2;
31715 for (uint32_t n = 5; n < 8; n++) {
31716 for (size_t k = 1; k <= 40; k += 9) {
31717 GemmMicrokernelTester()
31718 .mr(3)
31719 .nr(4)
31720 .kr(8)
31721 .sr(1)
31722 .m(3)
31723 .n(n)
31724 .k(k)
31725 .ks(3)
31726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31727 }
31728 }
31729 }
31730
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_small_kernel)31731 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
31732 TEST_REQUIRES_X86_SSE2;
31733 for (uint32_t n = 8; n <= 12; n += 4) {
31734 for (size_t k = 1; k <= 40; k += 9) {
31735 GemmMicrokernelTester()
31736 .mr(3)
31737 .nr(4)
31738 .kr(8)
31739 .sr(1)
31740 .m(3)
31741 .n(n)
31742 .k(k)
31743 .ks(3)
31744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31745 }
31746 }
31747 }
31748
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm_subtile)31749 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
31750 TEST_REQUIRES_X86_SSE2;
31751 for (size_t k = 1; k <= 40; k += 9) {
31752 for (uint32_t n = 1; n <= 4; n++) {
31753 for (uint32_t m = 1; m <= 3; m++) {
31754 GemmMicrokernelTester()
31755 .mr(3)
31756 .nr(4)
31757 .kr(8)
31758 .sr(1)
31759 .m(m)
31760 .n(n)
31761 .k(k)
31762 .cm_stride(7)
31763 .iterations(1)
31764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31765 }
31766 }
31767 }
31768 }
31769
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,a_offset)31770 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, a_offset) {
31771 TEST_REQUIRES_X86_SSE2;
31772 for (size_t k = 1; k <= 40; k += 9) {
31773 GemmMicrokernelTester()
31774 .mr(3)
31775 .nr(4)
31776 .kr(8)
31777 .sr(1)
31778 .m(3)
31779 .n(4)
31780 .k(k)
31781 .ks(3)
31782 .a_offset(127)
31783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31784 }
31785 }
31786
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,zero)31787 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, zero) {
31788 TEST_REQUIRES_X86_SSE2;
31789 for (size_t k = 1; k <= 40; k += 9) {
31790 for (uint32_t mz = 0; mz < 3; mz++) {
31791 GemmMicrokernelTester()
31792 .mr(3)
31793 .nr(4)
31794 .kr(8)
31795 .sr(1)
31796 .m(3)
31797 .n(4)
31798 .k(k)
31799 .ks(3)
31800 .a_offset(127)
31801 .zero_index(mz)
31802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31803 }
31804 }
31805 }
31806
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmin)31807 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
31808 TEST_REQUIRES_X86_SSE2;
31809 GemmMicrokernelTester()
31810 .mr(3)
31811 .nr(4)
31812 .kr(8)
31813 .sr(1)
31814 .m(3)
31815 .n(4)
31816 .k(8)
31817 .qmin(128)
31818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31819 }
31820
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmax)31821 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
31822 TEST_REQUIRES_X86_SSE2;
31823 GemmMicrokernelTester()
31824 .mr(3)
31825 .nr(4)
31826 .kr(8)
31827 .sr(1)
31828 .m(3)
31829 .n(4)
31830 .k(8)
31831 .qmax(128)
31832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31833 }
31834
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm)31835 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
31836 TEST_REQUIRES_X86_SSE2;
31837 GemmMicrokernelTester()
31838 .mr(3)
31839 .nr(4)
31840 .kr(8)
31841 .sr(1)
31842 .m(3)
31843 .n(4)
31844 .k(8)
31845 .cm_stride(7)
31846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31847 }
31848 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31849
31850
31851 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8)31852 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
31853 TEST_REQUIRES_X86_SSE41;
31854 GemmMicrokernelTester()
31855 .mr(1)
31856 .nr(4)
31857 .kr(8)
31858 .sr(1)
31859 .m(1)
31860 .n(4)
31861 .k(8)
31862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31863 }
31864
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cn)31865 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
31866 TEST_REQUIRES_X86_SSE41;
31867 GemmMicrokernelTester()
31868 .mr(1)
31869 .nr(4)
31870 .kr(8)
31871 .sr(1)
31872 .m(1)
31873 .n(4)
31874 .k(8)
31875 .cn_stride(7)
31876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31877 }
31878
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile)31879 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
31880 TEST_REQUIRES_X86_SSE41;
31881 for (uint32_t n = 1; n <= 4; n++) {
31882 for (uint32_t m = 1; m <= 1; m++) {
31883 GemmMicrokernelTester()
31884 .mr(1)
31885 .nr(4)
31886 .kr(8)
31887 .sr(1)
31888 .m(m)
31889 .n(n)
31890 .k(8)
31891 .iterations(1)
31892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31893 }
31894 }
31895 }
31896
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_m)31897 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
31898 TEST_REQUIRES_X86_SSE41;
31899 for (uint32_t m = 1; m <= 1; m++) {
31900 GemmMicrokernelTester()
31901 .mr(1)
31902 .nr(4)
31903 .kr(8)
31904 .sr(1)
31905 .m(m)
31906 .n(4)
31907 .k(8)
31908 .iterations(1)
31909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31910 }
31911 }
31912
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_n)31913 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
31914 TEST_REQUIRES_X86_SSE41;
31915 for (uint32_t n = 1; n <= 4; n++) {
31916 GemmMicrokernelTester()
31917 .mr(1)
31918 .nr(4)
31919 .kr(8)
31920 .sr(1)
31921 .m(1)
31922 .n(n)
31923 .k(8)
31924 .iterations(1)
31925 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31926 }
31927 }
31928
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8)31929 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
31930 TEST_REQUIRES_X86_SSE41;
31931 for (size_t k = 1; k < 8; k++) {
31932 GemmMicrokernelTester()
31933 .mr(1)
31934 .nr(4)
31935 .kr(8)
31936 .sr(1)
31937 .m(1)
31938 .n(4)
31939 .k(k)
31940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31941 }
31942 }
31943
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8_subtile)31944 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
31945 TEST_REQUIRES_X86_SSE41;
31946 for (size_t k = 1; k < 8; k++) {
31947 for (uint32_t n = 1; n <= 4; n++) {
31948 for (uint32_t m = 1; m <= 1; m++) {
31949 GemmMicrokernelTester()
31950 .mr(1)
31951 .nr(4)
31952 .kr(8)
31953 .sr(1)
31954 .m(m)
31955 .n(n)
31956 .k(k)
31957 .iterations(1)
31958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31959 }
31960 }
31961 }
31962 }
31963
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8)31964 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
31965 TEST_REQUIRES_X86_SSE41;
31966 for (size_t k = 9; k < 16; k++) {
31967 GemmMicrokernelTester()
31968 .mr(1)
31969 .nr(4)
31970 .kr(8)
31971 .sr(1)
31972 .m(1)
31973 .n(4)
31974 .k(k)
31975 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31976 }
31977 }
31978
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8_subtile)31979 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
31980 TEST_REQUIRES_X86_SSE41;
31981 for (size_t k = 9; k < 16; k++) {
31982 for (uint32_t n = 1; n <= 4; n++) {
31983 for (uint32_t m = 1; m <= 1; m++) {
31984 GemmMicrokernelTester()
31985 .mr(1)
31986 .nr(4)
31987 .kr(8)
31988 .sr(1)
31989 .m(m)
31990 .n(n)
31991 .k(k)
31992 .iterations(1)
31993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31994 }
31995 }
31996 }
31997 }
31998
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8)31999 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
32000 TEST_REQUIRES_X86_SSE41;
32001 for (size_t k = 16; k <= 80; k += 8) {
32002 GemmMicrokernelTester()
32003 .mr(1)
32004 .nr(4)
32005 .kr(8)
32006 .sr(1)
32007 .m(1)
32008 .n(4)
32009 .k(k)
32010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32011 }
32012 }
32013
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8_subtile)32014 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
32015 TEST_REQUIRES_X86_SSE41;
32016 for (size_t k = 16; k <= 80; k += 8) {
32017 for (uint32_t n = 1; n <= 4; n++) {
32018 for (uint32_t m = 1; m <= 1; m++) {
32019 GemmMicrokernelTester()
32020 .mr(1)
32021 .nr(4)
32022 .kr(8)
32023 .sr(1)
32024 .m(m)
32025 .n(n)
32026 .k(k)
32027 .iterations(1)
32028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32029 }
32030 }
32031 }
32032 }
32033
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4)32034 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
32035 TEST_REQUIRES_X86_SSE41;
32036 for (uint32_t n = 5; n < 8; n++) {
32037 for (size_t k = 1; k <= 40; k += 9) {
32038 GemmMicrokernelTester()
32039 .mr(1)
32040 .nr(4)
32041 .kr(8)
32042 .sr(1)
32043 .m(1)
32044 .n(n)
32045 .k(k)
32046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32047 }
32048 }
32049 }
32050
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_strided_cn)32051 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
32052 TEST_REQUIRES_X86_SSE41;
32053 for (uint32_t n = 5; n < 8; n++) {
32054 for (size_t k = 1; k <= 40; k += 9) {
32055 GemmMicrokernelTester()
32056 .mr(1)
32057 .nr(4)
32058 .kr(8)
32059 .sr(1)
32060 .m(1)
32061 .n(n)
32062 .k(k)
32063 .cn_stride(7)
32064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32065 }
32066 }
32067 }
32068
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_subtile)32069 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
32070 TEST_REQUIRES_X86_SSE41;
32071 for (uint32_t n = 5; n < 8; n++) {
32072 for (size_t k = 1; k <= 40; k += 9) {
32073 for (uint32_t m = 1; m <= 1; m++) {
32074 GemmMicrokernelTester()
32075 .mr(1)
32076 .nr(4)
32077 .kr(8)
32078 .sr(1)
32079 .m(m)
32080 .n(n)
32081 .k(k)
32082 .iterations(1)
32083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32084 }
32085 }
32086 }
32087 }
32088
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4)32089 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
32090 TEST_REQUIRES_X86_SSE41;
32091 for (uint32_t n = 8; n <= 12; n += 4) {
32092 for (size_t k = 1; k <= 40; k += 9) {
32093 GemmMicrokernelTester()
32094 .mr(1)
32095 .nr(4)
32096 .kr(8)
32097 .sr(1)
32098 .m(1)
32099 .n(n)
32100 .k(k)
32101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32102 }
32103 }
32104 }
32105
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_strided_cn)32106 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
32107 TEST_REQUIRES_X86_SSE41;
32108 for (uint32_t n = 8; n <= 12; n += 4) {
32109 for (size_t k = 1; k <= 40; k += 9) {
32110 GemmMicrokernelTester()
32111 .mr(1)
32112 .nr(4)
32113 .kr(8)
32114 .sr(1)
32115 .m(1)
32116 .n(n)
32117 .k(k)
32118 .cn_stride(7)
32119 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32120 }
32121 }
32122 }
32123
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_subtile)32124 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
32125 TEST_REQUIRES_X86_SSE41;
32126 for (uint32_t n = 8; n <= 12; n += 4) {
32127 for (size_t k = 1; k <= 40; k += 9) {
32128 for (uint32_t m = 1; m <= 1; m++) {
32129 GemmMicrokernelTester()
32130 .mr(1)
32131 .nr(4)
32132 .kr(8)
32133 .sr(1)
32134 .m(m)
32135 .n(n)
32136 .k(k)
32137 .iterations(1)
32138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32139 }
32140 }
32141 }
32142 }
32143
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel)32144 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel) {
32145 TEST_REQUIRES_X86_SSE41;
32146 for (size_t k = 1; k <= 40; k += 9) {
32147 GemmMicrokernelTester()
32148 .mr(1)
32149 .nr(4)
32150 .kr(8)
32151 .sr(1)
32152 .m(1)
32153 .n(4)
32154 .k(k)
32155 .ks(3)
32156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32157 }
32158 }
32159
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel_subtile)32160 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel_subtile) {
32161 TEST_REQUIRES_X86_SSE41;
32162 for (size_t k = 1; k <= 40; k += 9) {
32163 for (uint32_t n = 1; n <= 4; n++) {
32164 for (uint32_t m = 1; m <= 1; m++) {
32165 GemmMicrokernelTester()
32166 .mr(1)
32167 .nr(4)
32168 .kr(8)
32169 .sr(1)
32170 .m(m)
32171 .n(n)
32172 .k(k)
32173 .ks(3)
32174 .iterations(1)
32175 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32176 }
32177 }
32178 }
32179 }
32180
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_small_kernel)32181 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
32182 TEST_REQUIRES_X86_SSE41;
32183 for (uint32_t n = 5; n < 8; n++) {
32184 for (size_t k = 1; k <= 40; k += 9) {
32185 GemmMicrokernelTester()
32186 .mr(1)
32187 .nr(4)
32188 .kr(8)
32189 .sr(1)
32190 .m(1)
32191 .n(n)
32192 .k(k)
32193 .ks(3)
32194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32195 }
32196 }
32197 }
32198
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_small_kernel)32199 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
32200 TEST_REQUIRES_X86_SSE41;
32201 for (uint32_t n = 8; n <= 12; n += 4) {
32202 for (size_t k = 1; k <= 40; k += 9) {
32203 GemmMicrokernelTester()
32204 .mr(1)
32205 .nr(4)
32206 .kr(8)
32207 .sr(1)
32208 .m(1)
32209 .n(n)
32210 .k(k)
32211 .ks(3)
32212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32213 }
32214 }
32215 }
32216
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm_subtile)32217 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
32218 TEST_REQUIRES_X86_SSE41;
32219 for (size_t k = 1; k <= 40; k += 9) {
32220 for (uint32_t n = 1; n <= 4; n++) {
32221 for (uint32_t m = 1; m <= 1; m++) {
32222 GemmMicrokernelTester()
32223 .mr(1)
32224 .nr(4)
32225 .kr(8)
32226 .sr(1)
32227 .m(m)
32228 .n(n)
32229 .k(k)
32230 .cm_stride(7)
32231 .iterations(1)
32232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32233 }
32234 }
32235 }
32236 }
32237
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,a_offset)32238 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, a_offset) {
32239 TEST_REQUIRES_X86_SSE41;
32240 for (size_t k = 1; k <= 40; k += 9) {
32241 GemmMicrokernelTester()
32242 .mr(1)
32243 .nr(4)
32244 .kr(8)
32245 .sr(1)
32246 .m(1)
32247 .n(4)
32248 .k(k)
32249 .ks(3)
32250 .a_offset(43)
32251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32252 }
32253 }
32254
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,zero)32255 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, zero) {
32256 TEST_REQUIRES_X86_SSE41;
32257 for (size_t k = 1; k <= 40; k += 9) {
32258 for (uint32_t mz = 0; mz < 1; mz++) {
32259 GemmMicrokernelTester()
32260 .mr(1)
32261 .nr(4)
32262 .kr(8)
32263 .sr(1)
32264 .m(1)
32265 .n(4)
32266 .k(k)
32267 .ks(3)
32268 .a_offset(43)
32269 .zero_index(mz)
32270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32271 }
32272 }
32273 }
32274
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmin)32275 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
32276 TEST_REQUIRES_X86_SSE41;
32277 GemmMicrokernelTester()
32278 .mr(1)
32279 .nr(4)
32280 .kr(8)
32281 .sr(1)
32282 .m(1)
32283 .n(4)
32284 .k(8)
32285 .qmin(128)
32286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32287 }
32288
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmax)32289 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
32290 TEST_REQUIRES_X86_SSE41;
32291 GemmMicrokernelTester()
32292 .mr(1)
32293 .nr(4)
32294 .kr(8)
32295 .sr(1)
32296 .m(1)
32297 .n(4)
32298 .k(8)
32299 .qmax(128)
32300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32301 }
32302
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm)32303 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
32304 TEST_REQUIRES_X86_SSE41;
32305 GemmMicrokernelTester()
32306 .mr(1)
32307 .nr(4)
32308 .kr(8)
32309 .sr(1)
32310 .m(1)
32311 .n(4)
32312 .k(8)
32313 .cm_stride(7)
32314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32315 }
32316 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32317
32318
32319 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8)32320 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
32321 TEST_REQUIRES_X86_SSE41;
32322 GemmMicrokernelTester()
32323 .mr(2)
32324 .nr(4)
32325 .kr(8)
32326 .sr(1)
32327 .m(2)
32328 .n(4)
32329 .k(8)
32330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32331 }
32332
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cn)32333 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
32334 TEST_REQUIRES_X86_SSE41;
32335 GemmMicrokernelTester()
32336 .mr(2)
32337 .nr(4)
32338 .kr(8)
32339 .sr(1)
32340 .m(2)
32341 .n(4)
32342 .k(8)
32343 .cn_stride(7)
32344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32345 }
32346
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile)32347 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
32348 TEST_REQUIRES_X86_SSE41;
32349 for (uint32_t n = 1; n <= 4; n++) {
32350 for (uint32_t m = 1; m <= 2; m++) {
32351 GemmMicrokernelTester()
32352 .mr(2)
32353 .nr(4)
32354 .kr(8)
32355 .sr(1)
32356 .m(m)
32357 .n(n)
32358 .k(8)
32359 .iterations(1)
32360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32361 }
32362 }
32363 }
32364
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_m)32365 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
32366 TEST_REQUIRES_X86_SSE41;
32367 for (uint32_t m = 1; m <= 2; m++) {
32368 GemmMicrokernelTester()
32369 .mr(2)
32370 .nr(4)
32371 .kr(8)
32372 .sr(1)
32373 .m(m)
32374 .n(4)
32375 .k(8)
32376 .iterations(1)
32377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32378 }
32379 }
32380
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_n)32381 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
32382 TEST_REQUIRES_X86_SSE41;
32383 for (uint32_t n = 1; n <= 4; n++) {
32384 GemmMicrokernelTester()
32385 .mr(2)
32386 .nr(4)
32387 .kr(8)
32388 .sr(1)
32389 .m(2)
32390 .n(n)
32391 .k(8)
32392 .iterations(1)
32393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32394 }
32395 }
32396
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8)32397 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
32398 TEST_REQUIRES_X86_SSE41;
32399 for (size_t k = 1; k < 8; k++) {
32400 GemmMicrokernelTester()
32401 .mr(2)
32402 .nr(4)
32403 .kr(8)
32404 .sr(1)
32405 .m(2)
32406 .n(4)
32407 .k(k)
32408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32409 }
32410 }
32411
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8_subtile)32412 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
32413 TEST_REQUIRES_X86_SSE41;
32414 for (size_t k = 1; k < 8; k++) {
32415 for (uint32_t n = 1; n <= 4; n++) {
32416 for (uint32_t m = 1; m <= 2; m++) {
32417 GemmMicrokernelTester()
32418 .mr(2)
32419 .nr(4)
32420 .kr(8)
32421 .sr(1)
32422 .m(m)
32423 .n(n)
32424 .k(k)
32425 .iterations(1)
32426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32427 }
32428 }
32429 }
32430 }
32431
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8)32432 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
32433 TEST_REQUIRES_X86_SSE41;
32434 for (size_t k = 9; k < 16; k++) {
32435 GemmMicrokernelTester()
32436 .mr(2)
32437 .nr(4)
32438 .kr(8)
32439 .sr(1)
32440 .m(2)
32441 .n(4)
32442 .k(k)
32443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32444 }
32445 }
32446
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8_subtile)32447 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
32448 TEST_REQUIRES_X86_SSE41;
32449 for (size_t k = 9; k < 16; k++) {
32450 for (uint32_t n = 1; n <= 4; n++) {
32451 for (uint32_t m = 1; m <= 2; m++) {
32452 GemmMicrokernelTester()
32453 .mr(2)
32454 .nr(4)
32455 .kr(8)
32456 .sr(1)
32457 .m(m)
32458 .n(n)
32459 .k(k)
32460 .iterations(1)
32461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32462 }
32463 }
32464 }
32465 }
32466
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8)32467 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
32468 TEST_REQUIRES_X86_SSE41;
32469 for (size_t k = 16; k <= 80; k += 8) {
32470 GemmMicrokernelTester()
32471 .mr(2)
32472 .nr(4)
32473 .kr(8)
32474 .sr(1)
32475 .m(2)
32476 .n(4)
32477 .k(k)
32478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32479 }
32480 }
32481
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8_subtile)32482 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
32483 TEST_REQUIRES_X86_SSE41;
32484 for (size_t k = 16; k <= 80; k += 8) {
32485 for (uint32_t n = 1; n <= 4; n++) {
32486 for (uint32_t m = 1; m <= 2; m++) {
32487 GemmMicrokernelTester()
32488 .mr(2)
32489 .nr(4)
32490 .kr(8)
32491 .sr(1)
32492 .m(m)
32493 .n(n)
32494 .k(k)
32495 .iterations(1)
32496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32497 }
32498 }
32499 }
32500 }
32501
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4)32502 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
32503 TEST_REQUIRES_X86_SSE41;
32504 for (uint32_t n = 5; n < 8; n++) {
32505 for (size_t k = 1; k <= 40; k += 9) {
32506 GemmMicrokernelTester()
32507 .mr(2)
32508 .nr(4)
32509 .kr(8)
32510 .sr(1)
32511 .m(2)
32512 .n(n)
32513 .k(k)
32514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32515 }
32516 }
32517 }
32518
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_strided_cn)32519 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
32520 TEST_REQUIRES_X86_SSE41;
32521 for (uint32_t n = 5; n < 8; n++) {
32522 for (size_t k = 1; k <= 40; k += 9) {
32523 GemmMicrokernelTester()
32524 .mr(2)
32525 .nr(4)
32526 .kr(8)
32527 .sr(1)
32528 .m(2)
32529 .n(n)
32530 .k(k)
32531 .cn_stride(7)
32532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32533 }
32534 }
32535 }
32536
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_subtile)32537 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
32538 TEST_REQUIRES_X86_SSE41;
32539 for (uint32_t n = 5; n < 8; n++) {
32540 for (size_t k = 1; k <= 40; k += 9) {
32541 for (uint32_t m = 1; m <= 2; m++) {
32542 GemmMicrokernelTester()
32543 .mr(2)
32544 .nr(4)
32545 .kr(8)
32546 .sr(1)
32547 .m(m)
32548 .n(n)
32549 .k(k)
32550 .iterations(1)
32551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32552 }
32553 }
32554 }
32555 }
32556
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4)32557 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
32558 TEST_REQUIRES_X86_SSE41;
32559 for (uint32_t n = 8; n <= 12; n += 4) {
32560 for (size_t k = 1; k <= 40; k += 9) {
32561 GemmMicrokernelTester()
32562 .mr(2)
32563 .nr(4)
32564 .kr(8)
32565 .sr(1)
32566 .m(2)
32567 .n(n)
32568 .k(k)
32569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32570 }
32571 }
32572 }
32573
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_strided_cn)32574 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
32575 TEST_REQUIRES_X86_SSE41;
32576 for (uint32_t n = 8; n <= 12; n += 4) {
32577 for (size_t k = 1; k <= 40; k += 9) {
32578 GemmMicrokernelTester()
32579 .mr(2)
32580 .nr(4)
32581 .kr(8)
32582 .sr(1)
32583 .m(2)
32584 .n(n)
32585 .k(k)
32586 .cn_stride(7)
32587 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32588 }
32589 }
32590 }
32591
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_subtile)32592 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
32593 TEST_REQUIRES_X86_SSE41;
32594 for (uint32_t n = 8; n <= 12; n += 4) {
32595 for (size_t k = 1; k <= 40; k += 9) {
32596 for (uint32_t m = 1; m <= 2; m++) {
32597 GemmMicrokernelTester()
32598 .mr(2)
32599 .nr(4)
32600 .kr(8)
32601 .sr(1)
32602 .m(m)
32603 .n(n)
32604 .k(k)
32605 .iterations(1)
32606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32607 }
32608 }
32609 }
32610 }
32611
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel)32612 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel) {
32613 TEST_REQUIRES_X86_SSE41;
32614 for (size_t k = 1; k <= 40; k += 9) {
32615 GemmMicrokernelTester()
32616 .mr(2)
32617 .nr(4)
32618 .kr(8)
32619 .sr(1)
32620 .m(2)
32621 .n(4)
32622 .k(k)
32623 .ks(3)
32624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32625 }
32626 }
32627
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel_subtile)32628 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel_subtile) {
32629 TEST_REQUIRES_X86_SSE41;
32630 for (size_t k = 1; k <= 40; k += 9) {
32631 for (uint32_t n = 1; n <= 4; n++) {
32632 for (uint32_t m = 1; m <= 2; m++) {
32633 GemmMicrokernelTester()
32634 .mr(2)
32635 .nr(4)
32636 .kr(8)
32637 .sr(1)
32638 .m(m)
32639 .n(n)
32640 .k(k)
32641 .ks(3)
32642 .iterations(1)
32643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32644 }
32645 }
32646 }
32647 }
32648
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_small_kernel)32649 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
32650 TEST_REQUIRES_X86_SSE41;
32651 for (uint32_t n = 5; n < 8; n++) {
32652 for (size_t k = 1; k <= 40; k += 9) {
32653 GemmMicrokernelTester()
32654 .mr(2)
32655 .nr(4)
32656 .kr(8)
32657 .sr(1)
32658 .m(2)
32659 .n(n)
32660 .k(k)
32661 .ks(3)
32662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32663 }
32664 }
32665 }
32666
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_small_kernel)32667 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
32668 TEST_REQUIRES_X86_SSE41;
32669 for (uint32_t n = 8; n <= 12; n += 4) {
32670 for (size_t k = 1; k <= 40; k += 9) {
32671 GemmMicrokernelTester()
32672 .mr(2)
32673 .nr(4)
32674 .kr(8)
32675 .sr(1)
32676 .m(2)
32677 .n(n)
32678 .k(k)
32679 .ks(3)
32680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32681 }
32682 }
32683 }
32684
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm_subtile)32685 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
32686 TEST_REQUIRES_X86_SSE41;
32687 for (size_t k = 1; k <= 40; k += 9) {
32688 for (uint32_t n = 1; n <= 4; n++) {
32689 for (uint32_t m = 1; m <= 2; m++) {
32690 GemmMicrokernelTester()
32691 .mr(2)
32692 .nr(4)
32693 .kr(8)
32694 .sr(1)
32695 .m(m)
32696 .n(n)
32697 .k(k)
32698 .cm_stride(7)
32699 .iterations(1)
32700 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32701 }
32702 }
32703 }
32704 }
32705
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,a_offset)32706 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, a_offset) {
32707 TEST_REQUIRES_X86_SSE41;
32708 for (size_t k = 1; k <= 40; k += 9) {
32709 GemmMicrokernelTester()
32710 .mr(2)
32711 .nr(4)
32712 .kr(8)
32713 .sr(1)
32714 .m(2)
32715 .n(4)
32716 .k(k)
32717 .ks(3)
32718 .a_offset(83)
32719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32720 }
32721 }
32722
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,zero)32723 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, zero) {
32724 TEST_REQUIRES_X86_SSE41;
32725 for (size_t k = 1; k <= 40; k += 9) {
32726 for (uint32_t mz = 0; mz < 2; mz++) {
32727 GemmMicrokernelTester()
32728 .mr(2)
32729 .nr(4)
32730 .kr(8)
32731 .sr(1)
32732 .m(2)
32733 .n(4)
32734 .k(k)
32735 .ks(3)
32736 .a_offset(83)
32737 .zero_index(mz)
32738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32739 }
32740 }
32741 }
32742
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmin)32743 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
32744 TEST_REQUIRES_X86_SSE41;
32745 GemmMicrokernelTester()
32746 .mr(2)
32747 .nr(4)
32748 .kr(8)
32749 .sr(1)
32750 .m(2)
32751 .n(4)
32752 .k(8)
32753 .qmin(128)
32754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32755 }
32756
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmax)32757 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
32758 TEST_REQUIRES_X86_SSE41;
32759 GemmMicrokernelTester()
32760 .mr(2)
32761 .nr(4)
32762 .kr(8)
32763 .sr(1)
32764 .m(2)
32765 .n(4)
32766 .k(8)
32767 .qmax(128)
32768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32769 }
32770
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm)32771 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
32772 TEST_REQUIRES_X86_SSE41;
32773 GemmMicrokernelTester()
32774 .mr(2)
32775 .nr(4)
32776 .kr(8)
32777 .sr(1)
32778 .m(2)
32779 .n(4)
32780 .k(8)
32781 .cm_stride(7)
32782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32783 }
32784 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32785
32786
32787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8)32788 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8) {
32789 TEST_REQUIRES_X86_AVX;
32790 GemmMicrokernelTester()
32791 .mr(1)
32792 .nr(4)
32793 .kr(8)
32794 .sr(1)
32795 .m(1)
32796 .n(4)
32797 .k(8)
32798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32799 }
32800
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cn)32801 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cn) {
32802 TEST_REQUIRES_X86_AVX;
32803 GemmMicrokernelTester()
32804 .mr(1)
32805 .nr(4)
32806 .kr(8)
32807 .sr(1)
32808 .m(1)
32809 .n(4)
32810 .k(8)
32811 .cn_stride(7)
32812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32813 }
32814
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile)32815 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile) {
32816 TEST_REQUIRES_X86_AVX;
32817 for (uint32_t n = 1; n <= 4; n++) {
32818 for (uint32_t m = 1; m <= 1; m++) {
32819 GemmMicrokernelTester()
32820 .mr(1)
32821 .nr(4)
32822 .kr(8)
32823 .sr(1)
32824 .m(m)
32825 .n(n)
32826 .k(8)
32827 .iterations(1)
32828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32829 }
32830 }
32831 }
32832
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_m)32833 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
32834 TEST_REQUIRES_X86_AVX;
32835 for (uint32_t m = 1; m <= 1; m++) {
32836 GemmMicrokernelTester()
32837 .mr(1)
32838 .nr(4)
32839 .kr(8)
32840 .sr(1)
32841 .m(m)
32842 .n(4)
32843 .k(8)
32844 .iterations(1)
32845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32846 }
32847 }
32848
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_n)32849 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
32850 TEST_REQUIRES_X86_AVX;
32851 for (uint32_t n = 1; n <= 4; n++) {
32852 GemmMicrokernelTester()
32853 .mr(1)
32854 .nr(4)
32855 .kr(8)
32856 .sr(1)
32857 .m(1)
32858 .n(n)
32859 .k(8)
32860 .iterations(1)
32861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32862 }
32863 }
32864
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8)32865 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8) {
32866 TEST_REQUIRES_X86_AVX;
32867 for (size_t k = 1; k < 8; k++) {
32868 GemmMicrokernelTester()
32869 .mr(1)
32870 .nr(4)
32871 .kr(8)
32872 .sr(1)
32873 .m(1)
32874 .n(4)
32875 .k(k)
32876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32877 }
32878 }
32879
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8_subtile)32880 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8_subtile) {
32881 TEST_REQUIRES_X86_AVX;
32882 for (size_t k = 1; k < 8; k++) {
32883 for (uint32_t n = 1; n <= 4; n++) {
32884 for (uint32_t m = 1; m <= 1; m++) {
32885 GemmMicrokernelTester()
32886 .mr(1)
32887 .nr(4)
32888 .kr(8)
32889 .sr(1)
32890 .m(m)
32891 .n(n)
32892 .k(k)
32893 .iterations(1)
32894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32895 }
32896 }
32897 }
32898 }
32899
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8)32900 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8) {
32901 TEST_REQUIRES_X86_AVX;
32902 for (size_t k = 9; k < 16; k++) {
32903 GemmMicrokernelTester()
32904 .mr(1)
32905 .nr(4)
32906 .kr(8)
32907 .sr(1)
32908 .m(1)
32909 .n(4)
32910 .k(k)
32911 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32912 }
32913 }
32914
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8_subtile)32915 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8_subtile) {
32916 TEST_REQUIRES_X86_AVX;
32917 for (size_t k = 9; k < 16; k++) {
32918 for (uint32_t n = 1; n <= 4; n++) {
32919 for (uint32_t m = 1; m <= 1; m++) {
32920 GemmMicrokernelTester()
32921 .mr(1)
32922 .nr(4)
32923 .kr(8)
32924 .sr(1)
32925 .m(m)
32926 .n(n)
32927 .k(k)
32928 .iterations(1)
32929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32930 }
32931 }
32932 }
32933 }
32934
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8)32935 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8) {
32936 TEST_REQUIRES_X86_AVX;
32937 for (size_t k = 16; k <= 80; k += 8) {
32938 GemmMicrokernelTester()
32939 .mr(1)
32940 .nr(4)
32941 .kr(8)
32942 .sr(1)
32943 .m(1)
32944 .n(4)
32945 .k(k)
32946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32947 }
32948 }
32949
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8_subtile)32950 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8_subtile) {
32951 TEST_REQUIRES_X86_AVX;
32952 for (size_t k = 16; k <= 80; k += 8) {
32953 for (uint32_t n = 1; n <= 4; n++) {
32954 for (uint32_t m = 1; m <= 1; m++) {
32955 GemmMicrokernelTester()
32956 .mr(1)
32957 .nr(4)
32958 .kr(8)
32959 .sr(1)
32960 .m(m)
32961 .n(n)
32962 .k(k)
32963 .iterations(1)
32964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32965 }
32966 }
32967 }
32968 }
32969
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4)32970 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4) {
32971 TEST_REQUIRES_X86_AVX;
32972 for (uint32_t n = 5; n < 8; n++) {
32973 for (size_t k = 1; k <= 40; k += 9) {
32974 GemmMicrokernelTester()
32975 .mr(1)
32976 .nr(4)
32977 .kr(8)
32978 .sr(1)
32979 .m(1)
32980 .n(n)
32981 .k(k)
32982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32983 }
32984 }
32985 }
32986
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_strided_cn)32987 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
32988 TEST_REQUIRES_X86_AVX;
32989 for (uint32_t n = 5; n < 8; n++) {
32990 for (size_t k = 1; k <= 40; k += 9) {
32991 GemmMicrokernelTester()
32992 .mr(1)
32993 .nr(4)
32994 .kr(8)
32995 .sr(1)
32996 .m(1)
32997 .n(n)
32998 .k(k)
32999 .cn_stride(7)
33000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33001 }
33002 }
33003 }
33004
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_subtile)33005 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_subtile) {
33006 TEST_REQUIRES_X86_AVX;
33007 for (uint32_t n = 5; n < 8; n++) {
33008 for (size_t k = 1; k <= 40; k += 9) {
33009 for (uint32_t m = 1; m <= 1; m++) {
33010 GemmMicrokernelTester()
33011 .mr(1)
33012 .nr(4)
33013 .kr(8)
33014 .sr(1)
33015 .m(m)
33016 .n(n)
33017 .k(k)
33018 .iterations(1)
33019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33020 }
33021 }
33022 }
33023 }
33024
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4)33025 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4) {
33026 TEST_REQUIRES_X86_AVX;
33027 for (uint32_t n = 8; n <= 12; n += 4) {
33028 for (size_t k = 1; k <= 40; k += 9) {
33029 GemmMicrokernelTester()
33030 .mr(1)
33031 .nr(4)
33032 .kr(8)
33033 .sr(1)
33034 .m(1)
33035 .n(n)
33036 .k(k)
33037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33038 }
33039 }
33040 }
33041
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_strided_cn)33042 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_strided_cn) {
33043 TEST_REQUIRES_X86_AVX;
33044 for (uint32_t n = 8; n <= 12; n += 4) {
33045 for (size_t k = 1; k <= 40; k += 9) {
33046 GemmMicrokernelTester()
33047 .mr(1)
33048 .nr(4)
33049 .kr(8)
33050 .sr(1)
33051 .m(1)
33052 .n(n)
33053 .k(k)
33054 .cn_stride(7)
33055 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33056 }
33057 }
33058 }
33059
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_subtile)33060 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_subtile) {
33061 TEST_REQUIRES_X86_AVX;
33062 for (uint32_t n = 8; n <= 12; n += 4) {
33063 for (size_t k = 1; k <= 40; k += 9) {
33064 for (uint32_t m = 1; m <= 1; m++) {
33065 GemmMicrokernelTester()
33066 .mr(1)
33067 .nr(4)
33068 .kr(8)
33069 .sr(1)
33070 .m(m)
33071 .n(n)
33072 .k(k)
33073 .iterations(1)
33074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33075 }
33076 }
33077 }
33078 }
33079
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel)33080 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel) {
33081 TEST_REQUIRES_X86_AVX;
33082 for (size_t k = 1; k <= 40; k += 9) {
33083 GemmMicrokernelTester()
33084 .mr(1)
33085 .nr(4)
33086 .kr(8)
33087 .sr(1)
33088 .m(1)
33089 .n(4)
33090 .k(k)
33091 .ks(3)
33092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33093 }
33094 }
33095
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel_subtile)33096 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel_subtile) {
33097 TEST_REQUIRES_X86_AVX;
33098 for (size_t k = 1; k <= 40; k += 9) {
33099 for (uint32_t n = 1; n <= 4; n++) {
33100 for (uint32_t m = 1; m <= 1; m++) {
33101 GemmMicrokernelTester()
33102 .mr(1)
33103 .nr(4)
33104 .kr(8)
33105 .sr(1)
33106 .m(m)
33107 .n(n)
33108 .k(k)
33109 .ks(3)
33110 .iterations(1)
33111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33112 }
33113 }
33114 }
33115 }
33116
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_small_kernel)33117 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_small_kernel) {
33118 TEST_REQUIRES_X86_AVX;
33119 for (uint32_t n = 5; n < 8; n++) {
33120 for (size_t k = 1; k <= 40; k += 9) {
33121 GemmMicrokernelTester()
33122 .mr(1)
33123 .nr(4)
33124 .kr(8)
33125 .sr(1)
33126 .m(1)
33127 .n(n)
33128 .k(k)
33129 .ks(3)
33130 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33131 }
33132 }
33133 }
33134
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_small_kernel)33135 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_small_kernel) {
33136 TEST_REQUIRES_X86_AVX;
33137 for (uint32_t n = 8; n <= 12; n += 4) {
33138 for (size_t k = 1; k <= 40; k += 9) {
33139 GemmMicrokernelTester()
33140 .mr(1)
33141 .nr(4)
33142 .kr(8)
33143 .sr(1)
33144 .m(1)
33145 .n(n)
33146 .k(k)
33147 .ks(3)
33148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33149 }
33150 }
33151 }
33152
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm_subtile)33153 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm_subtile) {
33154 TEST_REQUIRES_X86_AVX;
33155 for (size_t k = 1; k <= 40; k += 9) {
33156 for (uint32_t n = 1; n <= 4; n++) {
33157 for (uint32_t m = 1; m <= 1; m++) {
33158 GemmMicrokernelTester()
33159 .mr(1)
33160 .nr(4)
33161 .kr(8)
33162 .sr(1)
33163 .m(m)
33164 .n(n)
33165 .k(k)
33166 .cm_stride(7)
33167 .iterations(1)
33168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33169 }
33170 }
33171 }
33172 }
33173
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,a_offset)33174 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, a_offset) {
33175 TEST_REQUIRES_X86_AVX;
33176 for (size_t k = 1; k <= 40; k += 9) {
33177 GemmMicrokernelTester()
33178 .mr(1)
33179 .nr(4)
33180 .kr(8)
33181 .sr(1)
33182 .m(1)
33183 .n(4)
33184 .k(k)
33185 .ks(3)
33186 .a_offset(43)
33187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33188 }
33189 }
33190
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,zero)33191 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, zero) {
33192 TEST_REQUIRES_X86_AVX;
33193 for (size_t k = 1; k <= 40; k += 9) {
33194 for (uint32_t mz = 0; mz < 1; mz++) {
33195 GemmMicrokernelTester()
33196 .mr(1)
33197 .nr(4)
33198 .kr(8)
33199 .sr(1)
33200 .m(1)
33201 .n(4)
33202 .k(k)
33203 .ks(3)
33204 .a_offset(43)
33205 .zero_index(mz)
33206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33207 }
33208 }
33209 }
33210
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmin)33211 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmin) {
33212 TEST_REQUIRES_X86_AVX;
33213 GemmMicrokernelTester()
33214 .mr(1)
33215 .nr(4)
33216 .kr(8)
33217 .sr(1)
33218 .m(1)
33219 .n(4)
33220 .k(8)
33221 .qmin(128)
33222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33223 }
33224
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmax)33225 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmax) {
33226 TEST_REQUIRES_X86_AVX;
33227 GemmMicrokernelTester()
33228 .mr(1)
33229 .nr(4)
33230 .kr(8)
33231 .sr(1)
33232 .m(1)
33233 .n(4)
33234 .k(8)
33235 .qmax(128)
33236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33237 }
33238
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm)33239 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm) {
33240 TEST_REQUIRES_X86_AVX;
33241 GemmMicrokernelTester()
33242 .mr(1)
33243 .nr(4)
33244 .kr(8)
33245 .sr(1)
33246 .m(1)
33247 .n(4)
33248 .k(8)
33249 .cm_stride(7)
33250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33251 }
33252 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33253
33254
33255 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8)33256 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8) {
33257 TEST_REQUIRES_X86_XOP;
33258 GemmMicrokernelTester()
33259 .mr(1)
33260 .nr(4)
33261 .kr(8)
33262 .sr(1)
33263 .m(1)
33264 .n(4)
33265 .k(8)
33266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33267 }
33268
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cn)33269 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cn) {
33270 TEST_REQUIRES_X86_XOP;
33271 GemmMicrokernelTester()
33272 .mr(1)
33273 .nr(4)
33274 .kr(8)
33275 .sr(1)
33276 .m(1)
33277 .n(4)
33278 .k(8)
33279 .cn_stride(7)
33280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33281 }
33282
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile)33283 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile) {
33284 TEST_REQUIRES_X86_XOP;
33285 for (uint32_t n = 1; n <= 4; n++) {
33286 for (uint32_t m = 1; m <= 1; m++) {
33287 GemmMicrokernelTester()
33288 .mr(1)
33289 .nr(4)
33290 .kr(8)
33291 .sr(1)
33292 .m(m)
33293 .n(n)
33294 .k(8)
33295 .iterations(1)
33296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33297 }
33298 }
33299 }
33300
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_m)33301 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
33302 TEST_REQUIRES_X86_XOP;
33303 for (uint32_t m = 1; m <= 1; m++) {
33304 GemmMicrokernelTester()
33305 .mr(1)
33306 .nr(4)
33307 .kr(8)
33308 .sr(1)
33309 .m(m)
33310 .n(4)
33311 .k(8)
33312 .iterations(1)
33313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33314 }
33315 }
33316
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_n)33317 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
33318 TEST_REQUIRES_X86_XOP;
33319 for (uint32_t n = 1; n <= 4; n++) {
33320 GemmMicrokernelTester()
33321 .mr(1)
33322 .nr(4)
33323 .kr(8)
33324 .sr(1)
33325 .m(1)
33326 .n(n)
33327 .k(8)
33328 .iterations(1)
33329 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33330 }
33331 }
33332
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8)33333 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8) {
33334 TEST_REQUIRES_X86_XOP;
33335 for (size_t k = 1; k < 8; k++) {
33336 GemmMicrokernelTester()
33337 .mr(1)
33338 .nr(4)
33339 .kr(8)
33340 .sr(1)
33341 .m(1)
33342 .n(4)
33343 .k(k)
33344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33345 }
33346 }
33347
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8_subtile)33348 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8_subtile) {
33349 TEST_REQUIRES_X86_XOP;
33350 for (size_t k = 1; k < 8; k++) {
33351 for (uint32_t n = 1; n <= 4; n++) {
33352 for (uint32_t m = 1; m <= 1; m++) {
33353 GemmMicrokernelTester()
33354 .mr(1)
33355 .nr(4)
33356 .kr(8)
33357 .sr(1)
33358 .m(m)
33359 .n(n)
33360 .k(k)
33361 .iterations(1)
33362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33363 }
33364 }
33365 }
33366 }
33367
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8)33368 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8) {
33369 TEST_REQUIRES_X86_XOP;
33370 for (size_t k = 9; k < 16; k++) {
33371 GemmMicrokernelTester()
33372 .mr(1)
33373 .nr(4)
33374 .kr(8)
33375 .sr(1)
33376 .m(1)
33377 .n(4)
33378 .k(k)
33379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33380 }
33381 }
33382
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8_subtile)33383 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8_subtile) {
33384 TEST_REQUIRES_X86_XOP;
33385 for (size_t k = 9; k < 16; k++) {
33386 for (uint32_t n = 1; n <= 4; n++) {
33387 for (uint32_t m = 1; m <= 1; m++) {
33388 GemmMicrokernelTester()
33389 .mr(1)
33390 .nr(4)
33391 .kr(8)
33392 .sr(1)
33393 .m(m)
33394 .n(n)
33395 .k(k)
33396 .iterations(1)
33397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33398 }
33399 }
33400 }
33401 }
33402
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8)33403 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8) {
33404 TEST_REQUIRES_X86_XOP;
33405 for (size_t k = 16; k <= 80; k += 8) {
33406 GemmMicrokernelTester()
33407 .mr(1)
33408 .nr(4)
33409 .kr(8)
33410 .sr(1)
33411 .m(1)
33412 .n(4)
33413 .k(k)
33414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33415 }
33416 }
33417
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8_subtile)33418 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8_subtile) {
33419 TEST_REQUIRES_X86_XOP;
33420 for (size_t k = 16; k <= 80; k += 8) {
33421 for (uint32_t n = 1; n <= 4; n++) {
33422 for (uint32_t m = 1; m <= 1; m++) {
33423 GemmMicrokernelTester()
33424 .mr(1)
33425 .nr(4)
33426 .kr(8)
33427 .sr(1)
33428 .m(m)
33429 .n(n)
33430 .k(k)
33431 .iterations(1)
33432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33433 }
33434 }
33435 }
33436 }
33437
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4)33438 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4) {
33439 TEST_REQUIRES_X86_XOP;
33440 for (uint32_t n = 5; n < 8; n++) {
33441 for (size_t k = 1; k <= 40; k += 9) {
33442 GemmMicrokernelTester()
33443 .mr(1)
33444 .nr(4)
33445 .kr(8)
33446 .sr(1)
33447 .m(1)
33448 .n(n)
33449 .k(k)
33450 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33451 }
33452 }
33453 }
33454
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_strided_cn)33455 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
33456 TEST_REQUIRES_X86_XOP;
33457 for (uint32_t n = 5; n < 8; n++) {
33458 for (size_t k = 1; k <= 40; k += 9) {
33459 GemmMicrokernelTester()
33460 .mr(1)
33461 .nr(4)
33462 .kr(8)
33463 .sr(1)
33464 .m(1)
33465 .n(n)
33466 .k(k)
33467 .cn_stride(7)
33468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33469 }
33470 }
33471 }
33472
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_subtile)33473 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_subtile) {
33474 TEST_REQUIRES_X86_XOP;
33475 for (uint32_t n = 5; n < 8; n++) {
33476 for (size_t k = 1; k <= 40; k += 9) {
33477 for (uint32_t m = 1; m <= 1; m++) {
33478 GemmMicrokernelTester()
33479 .mr(1)
33480 .nr(4)
33481 .kr(8)
33482 .sr(1)
33483 .m(m)
33484 .n(n)
33485 .k(k)
33486 .iterations(1)
33487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33488 }
33489 }
33490 }
33491 }
33492
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4)33493 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4) {
33494 TEST_REQUIRES_X86_XOP;
33495 for (uint32_t n = 8; n <= 12; n += 4) {
33496 for (size_t k = 1; k <= 40; k += 9) {
33497 GemmMicrokernelTester()
33498 .mr(1)
33499 .nr(4)
33500 .kr(8)
33501 .sr(1)
33502 .m(1)
33503 .n(n)
33504 .k(k)
33505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33506 }
33507 }
33508 }
33509
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_strided_cn)33510 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_strided_cn) {
33511 TEST_REQUIRES_X86_XOP;
33512 for (uint32_t n = 8; n <= 12; n += 4) {
33513 for (size_t k = 1; k <= 40; k += 9) {
33514 GemmMicrokernelTester()
33515 .mr(1)
33516 .nr(4)
33517 .kr(8)
33518 .sr(1)
33519 .m(1)
33520 .n(n)
33521 .k(k)
33522 .cn_stride(7)
33523 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33524 }
33525 }
33526 }
33527
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_subtile)33528 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_subtile) {
33529 TEST_REQUIRES_X86_XOP;
33530 for (uint32_t n = 8; n <= 12; n += 4) {
33531 for (size_t k = 1; k <= 40; k += 9) {
33532 for (uint32_t m = 1; m <= 1; m++) {
33533 GemmMicrokernelTester()
33534 .mr(1)
33535 .nr(4)
33536 .kr(8)
33537 .sr(1)
33538 .m(m)
33539 .n(n)
33540 .k(k)
33541 .iterations(1)
33542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33543 }
33544 }
33545 }
33546 }
33547
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel)33548 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel) {
33549 TEST_REQUIRES_X86_XOP;
33550 for (size_t k = 1; k <= 40; k += 9) {
33551 GemmMicrokernelTester()
33552 .mr(1)
33553 .nr(4)
33554 .kr(8)
33555 .sr(1)
33556 .m(1)
33557 .n(4)
33558 .k(k)
33559 .ks(3)
33560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33561 }
33562 }
33563
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel_subtile)33564 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel_subtile) {
33565 TEST_REQUIRES_X86_XOP;
33566 for (size_t k = 1; k <= 40; k += 9) {
33567 for (uint32_t n = 1; n <= 4; n++) {
33568 for (uint32_t m = 1; m <= 1; m++) {
33569 GemmMicrokernelTester()
33570 .mr(1)
33571 .nr(4)
33572 .kr(8)
33573 .sr(1)
33574 .m(m)
33575 .n(n)
33576 .k(k)
33577 .ks(3)
33578 .iterations(1)
33579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33580 }
33581 }
33582 }
33583 }
33584
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_small_kernel)33585 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_small_kernel) {
33586 TEST_REQUIRES_X86_XOP;
33587 for (uint32_t n = 5; n < 8; n++) {
33588 for (size_t k = 1; k <= 40; k += 9) {
33589 GemmMicrokernelTester()
33590 .mr(1)
33591 .nr(4)
33592 .kr(8)
33593 .sr(1)
33594 .m(1)
33595 .n(n)
33596 .k(k)
33597 .ks(3)
33598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33599 }
33600 }
33601 }
33602
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_small_kernel)33603 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_small_kernel) {
33604 TEST_REQUIRES_X86_XOP;
33605 for (uint32_t n = 8; n <= 12; n += 4) {
33606 for (size_t k = 1; k <= 40; k += 9) {
33607 GemmMicrokernelTester()
33608 .mr(1)
33609 .nr(4)
33610 .kr(8)
33611 .sr(1)
33612 .m(1)
33613 .n(n)
33614 .k(k)
33615 .ks(3)
33616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33617 }
33618 }
33619 }
33620
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm_subtile)33621 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm_subtile) {
33622 TEST_REQUIRES_X86_XOP;
33623 for (size_t k = 1; k <= 40; k += 9) {
33624 for (uint32_t n = 1; n <= 4; n++) {
33625 for (uint32_t m = 1; m <= 1; m++) {
33626 GemmMicrokernelTester()
33627 .mr(1)
33628 .nr(4)
33629 .kr(8)
33630 .sr(1)
33631 .m(m)
33632 .n(n)
33633 .k(k)
33634 .cm_stride(7)
33635 .iterations(1)
33636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33637 }
33638 }
33639 }
33640 }
33641
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,a_offset)33642 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, a_offset) {
33643 TEST_REQUIRES_X86_XOP;
33644 for (size_t k = 1; k <= 40; k += 9) {
33645 GemmMicrokernelTester()
33646 .mr(1)
33647 .nr(4)
33648 .kr(8)
33649 .sr(1)
33650 .m(1)
33651 .n(4)
33652 .k(k)
33653 .ks(3)
33654 .a_offset(43)
33655 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33656 }
33657 }
33658
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,zero)33659 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, zero) {
33660 TEST_REQUIRES_X86_XOP;
33661 for (size_t k = 1; k <= 40; k += 9) {
33662 for (uint32_t mz = 0; mz < 1; mz++) {
33663 GemmMicrokernelTester()
33664 .mr(1)
33665 .nr(4)
33666 .kr(8)
33667 .sr(1)
33668 .m(1)
33669 .n(4)
33670 .k(k)
33671 .ks(3)
33672 .a_offset(43)
33673 .zero_index(mz)
33674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33675 }
33676 }
33677 }
33678
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmin)33679 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmin) {
33680 TEST_REQUIRES_X86_XOP;
33681 GemmMicrokernelTester()
33682 .mr(1)
33683 .nr(4)
33684 .kr(8)
33685 .sr(1)
33686 .m(1)
33687 .n(4)
33688 .k(8)
33689 .qmin(128)
33690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33691 }
33692
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmax)33693 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmax) {
33694 TEST_REQUIRES_X86_XOP;
33695 GemmMicrokernelTester()
33696 .mr(1)
33697 .nr(4)
33698 .kr(8)
33699 .sr(1)
33700 .m(1)
33701 .n(4)
33702 .k(8)
33703 .qmax(128)
33704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33705 }
33706
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm)33707 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm) {
33708 TEST_REQUIRES_X86_XOP;
33709 GemmMicrokernelTester()
33710 .mr(1)
33711 .nr(4)
33712 .kr(8)
33713 .sr(1)
33714 .m(1)
33715 .n(4)
33716 .k(8)
33717 .cm_stride(7)
33718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33719 }
33720 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33721
33722
33723 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_eq_8)33724 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8) {
33725 TEST_REQUIRES_X86_SSSE3;
33726 GemmMicrokernelTester()
33727 .mr(1)
33728 .nr(4)
33729 .kr(8)
33730 .sr(1)
33731 .m(1)
33732 .n(4)
33733 .k(8)
33734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33735 }
33736
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,strided_cn)33737 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, strided_cn) {
33738 TEST_REQUIRES_X86_SSSE3;
33739 GemmMicrokernelTester()
33740 .mr(1)
33741 .nr(4)
33742 .kr(8)
33743 .sr(1)
33744 .m(1)
33745 .n(4)
33746 .k(8)
33747 .cn_stride(7)
33748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33749 }
33750
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_eq_8_subtile)33751 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
33752 TEST_REQUIRES_X86_SSSE3;
33753 for (uint32_t n = 1; n <= 4; n++) {
33754 for (uint32_t m = 1; m <= 1; m++) {
33755 GemmMicrokernelTester()
33756 .mr(1)
33757 .nr(4)
33758 .kr(8)
33759 .sr(1)
33760 .m(m)
33761 .n(n)
33762 .k(8)
33763 .iterations(1)
33764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33765 }
33766 }
33767 }
33768
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_eq_8_subtile_m)33769 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
33770 TEST_REQUIRES_X86_SSSE3;
33771 for (uint32_t m = 1; m <= 1; m++) {
33772 GemmMicrokernelTester()
33773 .mr(1)
33774 .nr(4)
33775 .kr(8)
33776 .sr(1)
33777 .m(m)
33778 .n(4)
33779 .k(8)
33780 .iterations(1)
33781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33782 }
33783 }
33784
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_eq_8_subtile_n)33785 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
33786 TEST_REQUIRES_X86_SSSE3;
33787 for (uint32_t n = 1; n <= 4; n++) {
33788 GemmMicrokernelTester()
33789 .mr(1)
33790 .nr(4)
33791 .kr(8)
33792 .sr(1)
33793 .m(1)
33794 .n(n)
33795 .k(8)
33796 .iterations(1)
33797 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33798 }
33799 }
33800
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_lt_8)33801 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_lt_8) {
33802 TEST_REQUIRES_X86_SSSE3;
33803 for (size_t k = 1; k < 8; k++) {
33804 GemmMicrokernelTester()
33805 .mr(1)
33806 .nr(4)
33807 .kr(8)
33808 .sr(1)
33809 .m(1)
33810 .n(4)
33811 .k(k)
33812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33813 }
33814 }
33815
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_lt_8_subtile)33816 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_lt_8_subtile) {
33817 TEST_REQUIRES_X86_SSSE3;
33818 for (size_t k = 1; k < 8; k++) {
33819 for (uint32_t n = 1; n <= 4; n++) {
33820 for (uint32_t m = 1; m <= 1; m++) {
33821 GemmMicrokernelTester()
33822 .mr(1)
33823 .nr(4)
33824 .kr(8)
33825 .sr(1)
33826 .m(m)
33827 .n(n)
33828 .k(k)
33829 .iterations(1)
33830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33831 }
33832 }
33833 }
33834 }
33835
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_gt_8)33836 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_gt_8) {
33837 TEST_REQUIRES_X86_SSSE3;
33838 for (size_t k = 9; k < 16; k++) {
33839 GemmMicrokernelTester()
33840 .mr(1)
33841 .nr(4)
33842 .kr(8)
33843 .sr(1)
33844 .m(1)
33845 .n(4)
33846 .k(k)
33847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33848 }
33849 }
33850
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_gt_8_subtile)33851 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_gt_8_subtile) {
33852 TEST_REQUIRES_X86_SSSE3;
33853 for (size_t k = 9; k < 16; k++) {
33854 for (uint32_t n = 1; n <= 4; n++) {
33855 for (uint32_t m = 1; m <= 1; m++) {
33856 GemmMicrokernelTester()
33857 .mr(1)
33858 .nr(4)
33859 .kr(8)
33860 .sr(1)
33861 .m(m)
33862 .n(n)
33863 .k(k)
33864 .iterations(1)
33865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33866 }
33867 }
33868 }
33869 }
33870
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_div_8)33871 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_div_8) {
33872 TEST_REQUIRES_X86_SSSE3;
33873 for (size_t k = 16; k <= 80; k += 8) {
33874 GemmMicrokernelTester()
33875 .mr(1)
33876 .nr(4)
33877 .kr(8)
33878 .sr(1)
33879 .m(1)
33880 .n(4)
33881 .k(k)
33882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33883 }
33884 }
33885
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,k_div_8_subtile)33886 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_div_8_subtile) {
33887 TEST_REQUIRES_X86_SSSE3;
33888 for (size_t k = 16; k <= 80; k += 8) {
33889 for (uint32_t n = 1; n <= 4; n++) {
33890 for (uint32_t m = 1; m <= 1; m++) {
33891 GemmMicrokernelTester()
33892 .mr(1)
33893 .nr(4)
33894 .kr(8)
33895 .sr(1)
33896 .m(m)
33897 .n(n)
33898 .k(k)
33899 .iterations(1)
33900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33901 }
33902 }
33903 }
33904 }
33905
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_gt_4)33906 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4) {
33907 TEST_REQUIRES_X86_SSSE3;
33908 for (uint32_t n = 5; n < 8; n++) {
33909 for (size_t k = 1; k <= 40; k += 9) {
33910 GemmMicrokernelTester()
33911 .mr(1)
33912 .nr(4)
33913 .kr(8)
33914 .sr(1)
33915 .m(1)
33916 .n(n)
33917 .k(k)
33918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33919 }
33920 }
33921 }
33922
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_gt_4_strided_cn)33923 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
33924 TEST_REQUIRES_X86_SSSE3;
33925 for (uint32_t n = 5; n < 8; n++) {
33926 for (size_t k = 1; k <= 40; k += 9) {
33927 GemmMicrokernelTester()
33928 .mr(1)
33929 .nr(4)
33930 .kr(8)
33931 .sr(1)
33932 .m(1)
33933 .n(n)
33934 .k(k)
33935 .cn_stride(7)
33936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33937 }
33938 }
33939 }
33940
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_gt_4_subtile)33941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4_subtile) {
33942 TEST_REQUIRES_X86_SSSE3;
33943 for (uint32_t n = 5; n < 8; n++) {
33944 for (size_t k = 1; k <= 40; k += 9) {
33945 for (uint32_t m = 1; m <= 1; m++) {
33946 GemmMicrokernelTester()
33947 .mr(1)
33948 .nr(4)
33949 .kr(8)
33950 .sr(1)
33951 .m(m)
33952 .n(n)
33953 .k(k)
33954 .iterations(1)
33955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33956 }
33957 }
33958 }
33959 }
33960
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_div_4)33961 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4) {
33962 TEST_REQUIRES_X86_SSSE3;
33963 for (uint32_t n = 8; n <= 12; n += 4) {
33964 for (size_t k = 1; k <= 40; k += 9) {
33965 GemmMicrokernelTester()
33966 .mr(1)
33967 .nr(4)
33968 .kr(8)
33969 .sr(1)
33970 .m(1)
33971 .n(n)
33972 .k(k)
33973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33974 }
33975 }
33976 }
33977
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_div_4_strided_cn)33978 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4_strided_cn) {
33979 TEST_REQUIRES_X86_SSSE3;
33980 for (uint32_t n = 8; n <= 12; n += 4) {
33981 for (size_t k = 1; k <= 40; k += 9) {
33982 GemmMicrokernelTester()
33983 .mr(1)
33984 .nr(4)
33985 .kr(8)
33986 .sr(1)
33987 .m(1)
33988 .n(n)
33989 .k(k)
33990 .cn_stride(7)
33991 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33992 }
33993 }
33994 }
33995
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_div_4_subtile)33996 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4_subtile) {
33997 TEST_REQUIRES_X86_SSSE3;
33998 for (uint32_t n = 8; n <= 12; n += 4) {
33999 for (size_t k = 1; k <= 40; k += 9) {
34000 for (uint32_t m = 1; m <= 1; m++) {
34001 GemmMicrokernelTester()
34002 .mr(1)
34003 .nr(4)
34004 .kr(8)
34005 .sr(1)
34006 .m(m)
34007 .n(n)
34008 .k(k)
34009 .iterations(1)
34010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34011 }
34012 }
34013 }
34014 }
34015
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,small_kernel)34016 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, small_kernel) {
34017 TEST_REQUIRES_X86_SSSE3;
34018 for (size_t k = 1; k <= 40; k += 9) {
34019 GemmMicrokernelTester()
34020 .mr(1)
34021 .nr(4)
34022 .kr(8)
34023 .sr(1)
34024 .m(1)
34025 .n(4)
34026 .k(k)
34027 .ks(3)
34028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34029 }
34030 }
34031
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,small_kernel_subtile)34032 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, small_kernel_subtile) {
34033 TEST_REQUIRES_X86_SSSE3;
34034 for (size_t k = 1; k <= 40; k += 9) {
34035 for (uint32_t n = 1; n <= 4; n++) {
34036 for (uint32_t m = 1; m <= 1; m++) {
34037 GemmMicrokernelTester()
34038 .mr(1)
34039 .nr(4)
34040 .kr(8)
34041 .sr(1)
34042 .m(m)
34043 .n(n)
34044 .k(k)
34045 .ks(3)
34046 .iterations(1)
34047 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34048 }
34049 }
34050 }
34051 }
34052
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_gt_4_small_kernel)34053 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
34054 TEST_REQUIRES_X86_SSSE3;
34055 for (uint32_t n = 5; n < 8; n++) {
34056 for (size_t k = 1; k <= 40; k += 9) {
34057 GemmMicrokernelTester()
34058 .mr(1)
34059 .nr(4)
34060 .kr(8)
34061 .sr(1)
34062 .m(1)
34063 .n(n)
34064 .k(k)
34065 .ks(3)
34066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34067 }
34068 }
34069 }
34070
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,n_div_4_small_kernel)34071 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4_small_kernel) {
34072 TEST_REQUIRES_X86_SSSE3;
34073 for (uint32_t n = 8; n <= 12; n += 4) {
34074 for (size_t k = 1; k <= 40; k += 9) {
34075 GemmMicrokernelTester()
34076 .mr(1)
34077 .nr(4)
34078 .kr(8)
34079 .sr(1)
34080 .m(1)
34081 .n(n)
34082 .k(k)
34083 .ks(3)
34084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34085 }
34086 }
34087 }
34088
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,strided_cm_subtile)34089 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, strided_cm_subtile) {
34090 TEST_REQUIRES_X86_SSSE3;
34091 for (size_t k = 1; k <= 40; k += 9) {
34092 for (uint32_t n = 1; n <= 4; n++) {
34093 for (uint32_t m = 1; m <= 1; m++) {
34094 GemmMicrokernelTester()
34095 .mr(1)
34096 .nr(4)
34097 .kr(8)
34098 .sr(1)
34099 .m(m)
34100 .n(n)
34101 .k(k)
34102 .cm_stride(7)
34103 .iterations(1)
34104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34105 }
34106 }
34107 }
34108 }
34109
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,a_offset)34110 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, a_offset) {
34111 TEST_REQUIRES_X86_SSSE3;
34112 for (size_t k = 1; k <= 40; k += 9) {
34113 GemmMicrokernelTester()
34114 .mr(1)
34115 .nr(4)
34116 .kr(8)
34117 .sr(1)
34118 .m(1)
34119 .n(4)
34120 .k(k)
34121 .ks(3)
34122 .a_offset(43)
34123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34124 }
34125 }
34126
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,zero)34127 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, zero) {
34128 TEST_REQUIRES_X86_SSSE3;
34129 for (size_t k = 1; k <= 40; k += 9) {
34130 for (uint32_t mz = 0; mz < 1; mz++) {
34131 GemmMicrokernelTester()
34132 .mr(1)
34133 .nr(4)
34134 .kr(8)
34135 .sr(1)
34136 .m(1)
34137 .n(4)
34138 .k(k)
34139 .ks(3)
34140 .a_offset(43)
34141 .zero_index(mz)
34142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34143 }
34144 }
34145 }
34146
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,qmin)34147 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, qmin) {
34148 TEST_REQUIRES_X86_SSSE3;
34149 GemmMicrokernelTester()
34150 .mr(1)
34151 .nr(4)
34152 .kr(8)
34153 .sr(1)
34154 .m(1)
34155 .n(4)
34156 .k(8)
34157 .qmin(128)
34158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34159 }
34160
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,qmax)34161 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, qmax) {
34162 TEST_REQUIRES_X86_SSSE3;
34163 GemmMicrokernelTester()
34164 .mr(1)
34165 .nr(4)
34166 .kr(8)
34167 .sr(1)
34168 .m(1)
34169 .n(4)
34170 .k(8)
34171 .qmax(128)
34172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34173 }
34174
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128,strided_cm)34175 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, strided_cm) {
34176 TEST_REQUIRES_X86_SSSE3;
34177 GemmMicrokernelTester()
34178 .mr(1)
34179 .nr(4)
34180 .kr(8)
34181 .sr(1)
34182 .m(1)
34183 .n(4)
34184 .k(8)
34185 .cm_stride(7)
34186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34187 }
34188 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34189
34190
34191 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_eq_8)34192 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8) {
34193 TEST_REQUIRES_X86_SSSE3;
34194 GemmMicrokernelTester()
34195 .mr(2)
34196 .nr(4)
34197 .kr(8)
34198 .sr(1)
34199 .m(2)
34200 .n(4)
34201 .k(8)
34202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34203 }
34204
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,strided_cn)34205 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, strided_cn) {
34206 TEST_REQUIRES_X86_SSSE3;
34207 GemmMicrokernelTester()
34208 .mr(2)
34209 .nr(4)
34210 .kr(8)
34211 .sr(1)
34212 .m(2)
34213 .n(4)
34214 .k(8)
34215 .cn_stride(7)
34216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34217 }
34218
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_eq_8_subtile)34219 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
34220 TEST_REQUIRES_X86_SSSE3;
34221 for (uint32_t n = 1; n <= 4; n++) {
34222 for (uint32_t m = 1; m <= 2; m++) {
34223 GemmMicrokernelTester()
34224 .mr(2)
34225 .nr(4)
34226 .kr(8)
34227 .sr(1)
34228 .m(m)
34229 .n(n)
34230 .k(8)
34231 .iterations(1)
34232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34233 }
34234 }
34235 }
34236
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_eq_8_subtile_m)34237 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
34238 TEST_REQUIRES_X86_SSSE3;
34239 for (uint32_t m = 1; m <= 2; m++) {
34240 GemmMicrokernelTester()
34241 .mr(2)
34242 .nr(4)
34243 .kr(8)
34244 .sr(1)
34245 .m(m)
34246 .n(4)
34247 .k(8)
34248 .iterations(1)
34249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34250 }
34251 }
34252
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_eq_8_subtile_n)34253 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
34254 TEST_REQUIRES_X86_SSSE3;
34255 for (uint32_t n = 1; n <= 4; n++) {
34256 GemmMicrokernelTester()
34257 .mr(2)
34258 .nr(4)
34259 .kr(8)
34260 .sr(1)
34261 .m(2)
34262 .n(n)
34263 .k(8)
34264 .iterations(1)
34265 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34266 }
34267 }
34268
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_lt_8)34269 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_lt_8) {
34270 TEST_REQUIRES_X86_SSSE3;
34271 for (size_t k = 1; k < 8; k++) {
34272 GemmMicrokernelTester()
34273 .mr(2)
34274 .nr(4)
34275 .kr(8)
34276 .sr(1)
34277 .m(2)
34278 .n(4)
34279 .k(k)
34280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34281 }
34282 }
34283
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_lt_8_subtile)34284 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_lt_8_subtile) {
34285 TEST_REQUIRES_X86_SSSE3;
34286 for (size_t k = 1; k < 8; k++) {
34287 for (uint32_t n = 1; n <= 4; n++) {
34288 for (uint32_t m = 1; m <= 2; m++) {
34289 GemmMicrokernelTester()
34290 .mr(2)
34291 .nr(4)
34292 .kr(8)
34293 .sr(1)
34294 .m(m)
34295 .n(n)
34296 .k(k)
34297 .iterations(1)
34298 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34299 }
34300 }
34301 }
34302 }
34303
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_gt_8)34304 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_gt_8) {
34305 TEST_REQUIRES_X86_SSSE3;
34306 for (size_t k = 9; k < 16; k++) {
34307 GemmMicrokernelTester()
34308 .mr(2)
34309 .nr(4)
34310 .kr(8)
34311 .sr(1)
34312 .m(2)
34313 .n(4)
34314 .k(k)
34315 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34316 }
34317 }
34318
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_gt_8_subtile)34319 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_gt_8_subtile) {
34320 TEST_REQUIRES_X86_SSSE3;
34321 for (size_t k = 9; k < 16; k++) {
34322 for (uint32_t n = 1; n <= 4; n++) {
34323 for (uint32_t m = 1; m <= 2; m++) {
34324 GemmMicrokernelTester()
34325 .mr(2)
34326 .nr(4)
34327 .kr(8)
34328 .sr(1)
34329 .m(m)
34330 .n(n)
34331 .k(k)
34332 .iterations(1)
34333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34334 }
34335 }
34336 }
34337 }
34338
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_div_8)34339 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_div_8) {
34340 TEST_REQUIRES_X86_SSSE3;
34341 for (size_t k = 16; k <= 80; k += 8) {
34342 GemmMicrokernelTester()
34343 .mr(2)
34344 .nr(4)
34345 .kr(8)
34346 .sr(1)
34347 .m(2)
34348 .n(4)
34349 .k(k)
34350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34351 }
34352 }
34353
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,k_div_8_subtile)34354 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_div_8_subtile) {
34355 TEST_REQUIRES_X86_SSSE3;
34356 for (size_t k = 16; k <= 80; k += 8) {
34357 for (uint32_t n = 1; n <= 4; n++) {
34358 for (uint32_t m = 1; m <= 2; m++) {
34359 GemmMicrokernelTester()
34360 .mr(2)
34361 .nr(4)
34362 .kr(8)
34363 .sr(1)
34364 .m(m)
34365 .n(n)
34366 .k(k)
34367 .iterations(1)
34368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34369 }
34370 }
34371 }
34372 }
34373
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_gt_4)34374 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4) {
34375 TEST_REQUIRES_X86_SSSE3;
34376 for (uint32_t n = 5; n < 8; n++) {
34377 for (size_t k = 1; k <= 40; k += 9) {
34378 GemmMicrokernelTester()
34379 .mr(2)
34380 .nr(4)
34381 .kr(8)
34382 .sr(1)
34383 .m(2)
34384 .n(n)
34385 .k(k)
34386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34387 }
34388 }
34389 }
34390
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_gt_4_strided_cn)34391 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
34392 TEST_REQUIRES_X86_SSSE3;
34393 for (uint32_t n = 5; n < 8; n++) {
34394 for (size_t k = 1; k <= 40; k += 9) {
34395 GemmMicrokernelTester()
34396 .mr(2)
34397 .nr(4)
34398 .kr(8)
34399 .sr(1)
34400 .m(2)
34401 .n(n)
34402 .k(k)
34403 .cn_stride(7)
34404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34405 }
34406 }
34407 }
34408
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_gt_4_subtile)34409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4_subtile) {
34410 TEST_REQUIRES_X86_SSSE3;
34411 for (uint32_t n = 5; n < 8; n++) {
34412 for (size_t k = 1; k <= 40; k += 9) {
34413 for (uint32_t m = 1; m <= 2; m++) {
34414 GemmMicrokernelTester()
34415 .mr(2)
34416 .nr(4)
34417 .kr(8)
34418 .sr(1)
34419 .m(m)
34420 .n(n)
34421 .k(k)
34422 .iterations(1)
34423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34424 }
34425 }
34426 }
34427 }
34428
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_div_4)34429 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4) {
34430 TEST_REQUIRES_X86_SSSE3;
34431 for (uint32_t n = 8; n <= 12; n += 4) {
34432 for (size_t k = 1; k <= 40; k += 9) {
34433 GemmMicrokernelTester()
34434 .mr(2)
34435 .nr(4)
34436 .kr(8)
34437 .sr(1)
34438 .m(2)
34439 .n(n)
34440 .k(k)
34441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34442 }
34443 }
34444 }
34445
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_div_4_strided_cn)34446 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4_strided_cn) {
34447 TEST_REQUIRES_X86_SSSE3;
34448 for (uint32_t n = 8; n <= 12; n += 4) {
34449 for (size_t k = 1; k <= 40; k += 9) {
34450 GemmMicrokernelTester()
34451 .mr(2)
34452 .nr(4)
34453 .kr(8)
34454 .sr(1)
34455 .m(2)
34456 .n(n)
34457 .k(k)
34458 .cn_stride(7)
34459 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34460 }
34461 }
34462 }
34463
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_div_4_subtile)34464 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4_subtile) {
34465 TEST_REQUIRES_X86_SSSE3;
34466 for (uint32_t n = 8; n <= 12; n += 4) {
34467 for (size_t k = 1; k <= 40; k += 9) {
34468 for (uint32_t m = 1; m <= 2; m++) {
34469 GemmMicrokernelTester()
34470 .mr(2)
34471 .nr(4)
34472 .kr(8)
34473 .sr(1)
34474 .m(m)
34475 .n(n)
34476 .k(k)
34477 .iterations(1)
34478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34479 }
34480 }
34481 }
34482 }
34483
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,small_kernel)34484 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, small_kernel) {
34485 TEST_REQUIRES_X86_SSSE3;
34486 for (size_t k = 1; k <= 40; k += 9) {
34487 GemmMicrokernelTester()
34488 .mr(2)
34489 .nr(4)
34490 .kr(8)
34491 .sr(1)
34492 .m(2)
34493 .n(4)
34494 .k(k)
34495 .ks(3)
34496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34497 }
34498 }
34499
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,small_kernel_subtile)34500 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, small_kernel_subtile) {
34501 TEST_REQUIRES_X86_SSSE3;
34502 for (size_t k = 1; k <= 40; k += 9) {
34503 for (uint32_t n = 1; n <= 4; n++) {
34504 for (uint32_t m = 1; m <= 2; m++) {
34505 GemmMicrokernelTester()
34506 .mr(2)
34507 .nr(4)
34508 .kr(8)
34509 .sr(1)
34510 .m(m)
34511 .n(n)
34512 .k(k)
34513 .ks(3)
34514 .iterations(1)
34515 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34516 }
34517 }
34518 }
34519 }
34520
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_gt_4_small_kernel)34521 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
34522 TEST_REQUIRES_X86_SSSE3;
34523 for (uint32_t n = 5; n < 8; n++) {
34524 for (size_t k = 1; k <= 40; k += 9) {
34525 GemmMicrokernelTester()
34526 .mr(2)
34527 .nr(4)
34528 .kr(8)
34529 .sr(1)
34530 .m(2)
34531 .n(n)
34532 .k(k)
34533 .ks(3)
34534 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34535 }
34536 }
34537 }
34538
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,n_div_4_small_kernel)34539 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4_small_kernel) {
34540 TEST_REQUIRES_X86_SSSE3;
34541 for (uint32_t n = 8; n <= 12; n += 4) {
34542 for (size_t k = 1; k <= 40; k += 9) {
34543 GemmMicrokernelTester()
34544 .mr(2)
34545 .nr(4)
34546 .kr(8)
34547 .sr(1)
34548 .m(2)
34549 .n(n)
34550 .k(k)
34551 .ks(3)
34552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34553 }
34554 }
34555 }
34556
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,strided_cm_subtile)34557 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, strided_cm_subtile) {
34558 TEST_REQUIRES_X86_SSSE3;
34559 for (size_t k = 1; k <= 40; k += 9) {
34560 for (uint32_t n = 1; n <= 4; n++) {
34561 for (uint32_t m = 1; m <= 2; m++) {
34562 GemmMicrokernelTester()
34563 .mr(2)
34564 .nr(4)
34565 .kr(8)
34566 .sr(1)
34567 .m(m)
34568 .n(n)
34569 .k(k)
34570 .cm_stride(7)
34571 .iterations(1)
34572 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34573 }
34574 }
34575 }
34576 }
34577
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,a_offset)34578 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, a_offset) {
34579 TEST_REQUIRES_X86_SSSE3;
34580 for (size_t k = 1; k <= 40; k += 9) {
34581 GemmMicrokernelTester()
34582 .mr(2)
34583 .nr(4)
34584 .kr(8)
34585 .sr(1)
34586 .m(2)
34587 .n(4)
34588 .k(k)
34589 .ks(3)
34590 .a_offset(83)
34591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34592 }
34593 }
34594
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,zero)34595 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, zero) {
34596 TEST_REQUIRES_X86_SSSE3;
34597 for (size_t k = 1; k <= 40; k += 9) {
34598 for (uint32_t mz = 0; mz < 2; mz++) {
34599 GemmMicrokernelTester()
34600 .mr(2)
34601 .nr(4)
34602 .kr(8)
34603 .sr(1)
34604 .m(2)
34605 .n(4)
34606 .k(k)
34607 .ks(3)
34608 .a_offset(83)
34609 .zero_index(mz)
34610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34611 }
34612 }
34613 }
34614
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,qmin)34615 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, qmin) {
34616 TEST_REQUIRES_X86_SSSE3;
34617 GemmMicrokernelTester()
34618 .mr(2)
34619 .nr(4)
34620 .kr(8)
34621 .sr(1)
34622 .m(2)
34623 .n(4)
34624 .k(8)
34625 .qmin(128)
34626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34627 }
34628
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,qmax)34629 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, qmax) {
34630 TEST_REQUIRES_X86_SSSE3;
34631 GemmMicrokernelTester()
34632 .mr(2)
34633 .nr(4)
34634 .kr(8)
34635 .sr(1)
34636 .m(2)
34637 .n(4)
34638 .k(8)
34639 .qmax(128)
34640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34641 }
34642
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128,strided_cm)34643 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, strided_cm) {
34644 TEST_REQUIRES_X86_SSSE3;
34645 GemmMicrokernelTester()
34646 .mr(2)
34647 .nr(4)
34648 .kr(8)
34649 .sr(1)
34650 .m(2)
34651 .n(4)
34652 .k(8)
34653 .cm_stride(7)
34654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34655 }
34656 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34657
34658
34659 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8)34660 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
34661 TEST_REQUIRES_X86_SSE2;
34662 GemmMicrokernelTester()
34663 .mr(3)
34664 .nr(4)
34665 .kr(8)
34666 .sr(1)
34667 .m(3)
34668 .n(4)
34669 .k(8)
34670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34671 }
34672
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cn)34673 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
34674 TEST_REQUIRES_X86_SSE2;
34675 GemmMicrokernelTester()
34676 .mr(3)
34677 .nr(4)
34678 .kr(8)
34679 .sr(1)
34680 .m(3)
34681 .n(4)
34682 .k(8)
34683 .cn_stride(7)
34684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34685 }
34686
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile)34687 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
34688 TEST_REQUIRES_X86_SSE2;
34689 for (uint32_t n = 1; n <= 4; n++) {
34690 for (uint32_t m = 1; m <= 3; m++) {
34691 GemmMicrokernelTester()
34692 .mr(3)
34693 .nr(4)
34694 .kr(8)
34695 .sr(1)
34696 .m(m)
34697 .n(n)
34698 .k(8)
34699 .iterations(1)
34700 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34701 }
34702 }
34703 }
34704
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_m)34705 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
34706 TEST_REQUIRES_X86_SSE2;
34707 for (uint32_t m = 1; m <= 3; m++) {
34708 GemmMicrokernelTester()
34709 .mr(3)
34710 .nr(4)
34711 .kr(8)
34712 .sr(1)
34713 .m(m)
34714 .n(4)
34715 .k(8)
34716 .iterations(1)
34717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34718 }
34719 }
34720
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_n)34721 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
34722 TEST_REQUIRES_X86_SSE2;
34723 for (uint32_t n = 1; n <= 4; n++) {
34724 GemmMicrokernelTester()
34725 .mr(3)
34726 .nr(4)
34727 .kr(8)
34728 .sr(1)
34729 .m(3)
34730 .n(n)
34731 .k(8)
34732 .iterations(1)
34733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34734 }
34735 }
34736
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8)34737 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
34738 TEST_REQUIRES_X86_SSE2;
34739 for (size_t k = 1; k < 8; k++) {
34740 GemmMicrokernelTester()
34741 .mr(3)
34742 .nr(4)
34743 .kr(8)
34744 .sr(1)
34745 .m(3)
34746 .n(4)
34747 .k(k)
34748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34749 }
34750 }
34751
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8_subtile)34752 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
34753 TEST_REQUIRES_X86_SSE2;
34754 for (size_t k = 1; k < 8; k++) {
34755 for (uint32_t n = 1; n <= 4; n++) {
34756 for (uint32_t m = 1; m <= 3; m++) {
34757 GemmMicrokernelTester()
34758 .mr(3)
34759 .nr(4)
34760 .kr(8)
34761 .sr(1)
34762 .m(m)
34763 .n(n)
34764 .k(k)
34765 .iterations(1)
34766 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34767 }
34768 }
34769 }
34770 }
34771
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8)34772 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
34773 TEST_REQUIRES_X86_SSE2;
34774 for (size_t k = 9; k < 16; k++) {
34775 GemmMicrokernelTester()
34776 .mr(3)
34777 .nr(4)
34778 .kr(8)
34779 .sr(1)
34780 .m(3)
34781 .n(4)
34782 .k(k)
34783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34784 }
34785 }
34786
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8_subtile)34787 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
34788 TEST_REQUIRES_X86_SSE2;
34789 for (size_t k = 9; k < 16; k++) {
34790 for (uint32_t n = 1; n <= 4; n++) {
34791 for (uint32_t m = 1; m <= 3; m++) {
34792 GemmMicrokernelTester()
34793 .mr(3)
34794 .nr(4)
34795 .kr(8)
34796 .sr(1)
34797 .m(m)
34798 .n(n)
34799 .k(k)
34800 .iterations(1)
34801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34802 }
34803 }
34804 }
34805 }
34806
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8)34807 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
34808 TEST_REQUIRES_X86_SSE2;
34809 for (size_t k = 16; k <= 80; k += 8) {
34810 GemmMicrokernelTester()
34811 .mr(3)
34812 .nr(4)
34813 .kr(8)
34814 .sr(1)
34815 .m(3)
34816 .n(4)
34817 .k(k)
34818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34819 }
34820 }
34821
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8_subtile)34822 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
34823 TEST_REQUIRES_X86_SSE2;
34824 for (size_t k = 16; k <= 80; k += 8) {
34825 for (uint32_t n = 1; n <= 4; n++) {
34826 for (uint32_t m = 1; m <= 3; m++) {
34827 GemmMicrokernelTester()
34828 .mr(3)
34829 .nr(4)
34830 .kr(8)
34831 .sr(1)
34832 .m(m)
34833 .n(n)
34834 .k(k)
34835 .iterations(1)
34836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34837 }
34838 }
34839 }
34840 }
34841
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4)34842 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
34843 TEST_REQUIRES_X86_SSE2;
34844 for (uint32_t n = 5; n < 8; n++) {
34845 for (size_t k = 1; k <= 40; k += 9) {
34846 GemmMicrokernelTester()
34847 .mr(3)
34848 .nr(4)
34849 .kr(8)
34850 .sr(1)
34851 .m(3)
34852 .n(n)
34853 .k(k)
34854 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34855 }
34856 }
34857 }
34858
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_strided_cn)34859 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
34860 TEST_REQUIRES_X86_SSE2;
34861 for (uint32_t n = 5; n < 8; n++) {
34862 for (size_t k = 1; k <= 40; k += 9) {
34863 GemmMicrokernelTester()
34864 .mr(3)
34865 .nr(4)
34866 .kr(8)
34867 .sr(1)
34868 .m(3)
34869 .n(n)
34870 .k(k)
34871 .cn_stride(7)
34872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34873 }
34874 }
34875 }
34876
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_subtile)34877 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
34878 TEST_REQUIRES_X86_SSE2;
34879 for (uint32_t n = 5; n < 8; n++) {
34880 for (size_t k = 1; k <= 40; k += 9) {
34881 for (uint32_t m = 1; m <= 3; m++) {
34882 GemmMicrokernelTester()
34883 .mr(3)
34884 .nr(4)
34885 .kr(8)
34886 .sr(1)
34887 .m(m)
34888 .n(n)
34889 .k(k)
34890 .iterations(1)
34891 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34892 }
34893 }
34894 }
34895 }
34896
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4)34897 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
34898 TEST_REQUIRES_X86_SSE2;
34899 for (uint32_t n = 8; n <= 12; n += 4) {
34900 for (size_t k = 1; k <= 40; k += 9) {
34901 GemmMicrokernelTester()
34902 .mr(3)
34903 .nr(4)
34904 .kr(8)
34905 .sr(1)
34906 .m(3)
34907 .n(n)
34908 .k(k)
34909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34910 }
34911 }
34912 }
34913
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_strided_cn)34914 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
34915 TEST_REQUIRES_X86_SSE2;
34916 for (uint32_t n = 8; n <= 12; n += 4) {
34917 for (size_t k = 1; k <= 40; k += 9) {
34918 GemmMicrokernelTester()
34919 .mr(3)
34920 .nr(4)
34921 .kr(8)
34922 .sr(1)
34923 .m(3)
34924 .n(n)
34925 .k(k)
34926 .cn_stride(7)
34927 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34928 }
34929 }
34930 }
34931
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_subtile)34932 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
34933 TEST_REQUIRES_X86_SSE2;
34934 for (uint32_t n = 8; n <= 12; n += 4) {
34935 for (size_t k = 1; k <= 40; k += 9) {
34936 for (uint32_t m = 1; m <= 3; m++) {
34937 GemmMicrokernelTester()
34938 .mr(3)
34939 .nr(4)
34940 .kr(8)
34941 .sr(1)
34942 .m(m)
34943 .n(n)
34944 .k(k)
34945 .iterations(1)
34946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34947 }
34948 }
34949 }
34950 }
34951
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel)34952 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel) {
34953 TEST_REQUIRES_X86_SSE2;
34954 for (size_t k = 1; k <= 40; k += 9) {
34955 GemmMicrokernelTester()
34956 .mr(3)
34957 .nr(4)
34958 .kr(8)
34959 .sr(1)
34960 .m(3)
34961 .n(4)
34962 .k(k)
34963 .ks(3)
34964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34965 }
34966 }
34967
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel_subtile)34968 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel_subtile) {
34969 TEST_REQUIRES_X86_SSE2;
34970 for (size_t k = 1; k <= 40; k += 9) {
34971 for (uint32_t n = 1; n <= 4; n++) {
34972 for (uint32_t m = 1; m <= 3; m++) {
34973 GemmMicrokernelTester()
34974 .mr(3)
34975 .nr(4)
34976 .kr(8)
34977 .sr(1)
34978 .m(m)
34979 .n(n)
34980 .k(k)
34981 .ks(3)
34982 .iterations(1)
34983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34984 }
34985 }
34986 }
34987 }
34988
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_small_kernel)34989 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
34990 TEST_REQUIRES_X86_SSE2;
34991 for (uint32_t n = 5; n < 8; n++) {
34992 for (size_t k = 1; k <= 40; k += 9) {
34993 GemmMicrokernelTester()
34994 .mr(3)
34995 .nr(4)
34996 .kr(8)
34997 .sr(1)
34998 .m(3)
34999 .n(n)
35000 .k(k)
35001 .ks(3)
35002 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35003 }
35004 }
35005 }
35006
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_small_kernel)35007 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
35008 TEST_REQUIRES_X86_SSE2;
35009 for (uint32_t n = 8; n <= 12; n += 4) {
35010 for (size_t k = 1; k <= 40; k += 9) {
35011 GemmMicrokernelTester()
35012 .mr(3)
35013 .nr(4)
35014 .kr(8)
35015 .sr(1)
35016 .m(3)
35017 .n(n)
35018 .k(k)
35019 .ks(3)
35020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35021 }
35022 }
35023 }
35024
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm_subtile)35025 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
35026 TEST_REQUIRES_X86_SSE2;
35027 for (size_t k = 1; k <= 40; k += 9) {
35028 for (uint32_t n = 1; n <= 4; n++) {
35029 for (uint32_t m = 1; m <= 3; m++) {
35030 GemmMicrokernelTester()
35031 .mr(3)
35032 .nr(4)
35033 .kr(8)
35034 .sr(1)
35035 .m(m)
35036 .n(n)
35037 .k(k)
35038 .cm_stride(7)
35039 .iterations(1)
35040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35041 }
35042 }
35043 }
35044 }
35045
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,a_offset)35046 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, a_offset) {
35047 TEST_REQUIRES_X86_SSE2;
35048 for (size_t k = 1; k <= 40; k += 9) {
35049 GemmMicrokernelTester()
35050 .mr(3)
35051 .nr(4)
35052 .kr(8)
35053 .sr(1)
35054 .m(3)
35055 .n(4)
35056 .k(k)
35057 .ks(3)
35058 .a_offset(127)
35059 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35060 }
35061 }
35062
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,zero)35063 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, zero) {
35064 TEST_REQUIRES_X86_SSE2;
35065 for (size_t k = 1; k <= 40; k += 9) {
35066 for (uint32_t mz = 0; mz < 3; mz++) {
35067 GemmMicrokernelTester()
35068 .mr(3)
35069 .nr(4)
35070 .kr(8)
35071 .sr(1)
35072 .m(3)
35073 .n(4)
35074 .k(k)
35075 .ks(3)
35076 .a_offset(127)
35077 .zero_index(mz)
35078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35079 }
35080 }
35081 }
35082
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmin)35083 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
35084 TEST_REQUIRES_X86_SSE2;
35085 GemmMicrokernelTester()
35086 .mr(3)
35087 .nr(4)
35088 .kr(8)
35089 .sr(1)
35090 .m(3)
35091 .n(4)
35092 .k(8)
35093 .qmin(128)
35094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35095 }
35096
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmax)35097 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
35098 TEST_REQUIRES_X86_SSE2;
35099 GemmMicrokernelTester()
35100 .mr(3)
35101 .nr(4)
35102 .kr(8)
35103 .sr(1)
35104 .m(3)
35105 .n(4)
35106 .k(8)
35107 .qmax(128)
35108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35109 }
35110
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm)35111 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
35112 TEST_REQUIRES_X86_SSE2;
35113 GemmMicrokernelTester()
35114 .mr(3)
35115 .nr(4)
35116 .kr(8)
35117 .sr(1)
35118 .m(3)
35119 .n(4)
35120 .k(8)
35121 .cm_stride(7)
35122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35123 }
35124 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35125
35126
35127 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8)35128 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
35129 TEST_REQUIRES_X86_SSE41;
35130 GemmMicrokernelTester()
35131 .mr(1)
35132 .nr(4)
35133 .kr(8)
35134 .sr(1)
35135 .m(1)
35136 .n(4)
35137 .k(8)
35138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35139 }
35140
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cn)35141 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
35142 TEST_REQUIRES_X86_SSE41;
35143 GemmMicrokernelTester()
35144 .mr(1)
35145 .nr(4)
35146 .kr(8)
35147 .sr(1)
35148 .m(1)
35149 .n(4)
35150 .k(8)
35151 .cn_stride(7)
35152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35153 }
35154
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile)35155 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
35156 TEST_REQUIRES_X86_SSE41;
35157 for (uint32_t n = 1; n <= 4; n++) {
35158 for (uint32_t m = 1; m <= 1; m++) {
35159 GemmMicrokernelTester()
35160 .mr(1)
35161 .nr(4)
35162 .kr(8)
35163 .sr(1)
35164 .m(m)
35165 .n(n)
35166 .k(8)
35167 .iterations(1)
35168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35169 }
35170 }
35171 }
35172
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_m)35173 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
35174 TEST_REQUIRES_X86_SSE41;
35175 for (uint32_t m = 1; m <= 1; m++) {
35176 GemmMicrokernelTester()
35177 .mr(1)
35178 .nr(4)
35179 .kr(8)
35180 .sr(1)
35181 .m(m)
35182 .n(4)
35183 .k(8)
35184 .iterations(1)
35185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35186 }
35187 }
35188
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_n)35189 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
35190 TEST_REQUIRES_X86_SSE41;
35191 for (uint32_t n = 1; n <= 4; n++) {
35192 GemmMicrokernelTester()
35193 .mr(1)
35194 .nr(4)
35195 .kr(8)
35196 .sr(1)
35197 .m(1)
35198 .n(n)
35199 .k(8)
35200 .iterations(1)
35201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35202 }
35203 }
35204
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8)35205 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
35206 TEST_REQUIRES_X86_SSE41;
35207 for (size_t k = 1; k < 8; k++) {
35208 GemmMicrokernelTester()
35209 .mr(1)
35210 .nr(4)
35211 .kr(8)
35212 .sr(1)
35213 .m(1)
35214 .n(4)
35215 .k(k)
35216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35217 }
35218 }
35219
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_subtile)35220 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
35221 TEST_REQUIRES_X86_SSE41;
35222 for (size_t k = 1; k < 8; k++) {
35223 for (uint32_t n = 1; n <= 4; n++) {
35224 for (uint32_t m = 1; m <= 1; m++) {
35225 GemmMicrokernelTester()
35226 .mr(1)
35227 .nr(4)
35228 .kr(8)
35229 .sr(1)
35230 .m(m)
35231 .n(n)
35232 .k(k)
35233 .iterations(1)
35234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35235 }
35236 }
35237 }
35238 }
35239
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8)35240 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
35241 TEST_REQUIRES_X86_SSE41;
35242 for (size_t k = 9; k < 16; k++) {
35243 GemmMicrokernelTester()
35244 .mr(1)
35245 .nr(4)
35246 .kr(8)
35247 .sr(1)
35248 .m(1)
35249 .n(4)
35250 .k(k)
35251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35252 }
35253 }
35254
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_subtile)35255 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
35256 TEST_REQUIRES_X86_SSE41;
35257 for (size_t k = 9; k < 16; k++) {
35258 for (uint32_t n = 1; n <= 4; n++) {
35259 for (uint32_t m = 1; m <= 1; m++) {
35260 GemmMicrokernelTester()
35261 .mr(1)
35262 .nr(4)
35263 .kr(8)
35264 .sr(1)
35265 .m(m)
35266 .n(n)
35267 .k(k)
35268 .iterations(1)
35269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35270 }
35271 }
35272 }
35273 }
35274
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8)35275 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
35276 TEST_REQUIRES_X86_SSE41;
35277 for (size_t k = 16; k <= 80; k += 8) {
35278 GemmMicrokernelTester()
35279 .mr(1)
35280 .nr(4)
35281 .kr(8)
35282 .sr(1)
35283 .m(1)
35284 .n(4)
35285 .k(k)
35286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35287 }
35288 }
35289
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_subtile)35290 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
35291 TEST_REQUIRES_X86_SSE41;
35292 for (size_t k = 16; k <= 80; k += 8) {
35293 for (uint32_t n = 1; n <= 4; n++) {
35294 for (uint32_t m = 1; m <= 1; m++) {
35295 GemmMicrokernelTester()
35296 .mr(1)
35297 .nr(4)
35298 .kr(8)
35299 .sr(1)
35300 .m(m)
35301 .n(n)
35302 .k(k)
35303 .iterations(1)
35304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35305 }
35306 }
35307 }
35308 }
35309
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4)35310 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
35311 TEST_REQUIRES_X86_SSE41;
35312 for (uint32_t n = 5; n < 8; n++) {
35313 for (size_t k = 1; k <= 40; k += 9) {
35314 GemmMicrokernelTester()
35315 .mr(1)
35316 .nr(4)
35317 .kr(8)
35318 .sr(1)
35319 .m(1)
35320 .n(n)
35321 .k(k)
35322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35323 }
35324 }
35325 }
35326
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_cn)35327 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
35328 TEST_REQUIRES_X86_SSE41;
35329 for (uint32_t n = 5; n < 8; n++) {
35330 for (size_t k = 1; k <= 40; k += 9) {
35331 GemmMicrokernelTester()
35332 .mr(1)
35333 .nr(4)
35334 .kr(8)
35335 .sr(1)
35336 .m(1)
35337 .n(n)
35338 .k(k)
35339 .cn_stride(7)
35340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35341 }
35342 }
35343 }
35344
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_subtile)35345 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
35346 TEST_REQUIRES_X86_SSE41;
35347 for (uint32_t n = 5; n < 8; n++) {
35348 for (size_t k = 1; k <= 40; k += 9) {
35349 for (uint32_t m = 1; m <= 1; m++) {
35350 GemmMicrokernelTester()
35351 .mr(1)
35352 .nr(4)
35353 .kr(8)
35354 .sr(1)
35355 .m(m)
35356 .n(n)
35357 .k(k)
35358 .iterations(1)
35359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35360 }
35361 }
35362 }
35363 }
35364
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4)35365 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
35366 TEST_REQUIRES_X86_SSE41;
35367 for (uint32_t n = 8; n <= 12; n += 4) {
35368 for (size_t k = 1; k <= 40; k += 9) {
35369 GemmMicrokernelTester()
35370 .mr(1)
35371 .nr(4)
35372 .kr(8)
35373 .sr(1)
35374 .m(1)
35375 .n(n)
35376 .k(k)
35377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35378 }
35379 }
35380 }
35381
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_cn)35382 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
35383 TEST_REQUIRES_X86_SSE41;
35384 for (uint32_t n = 8; n <= 12; n += 4) {
35385 for (size_t k = 1; k <= 40; k += 9) {
35386 GemmMicrokernelTester()
35387 .mr(1)
35388 .nr(4)
35389 .kr(8)
35390 .sr(1)
35391 .m(1)
35392 .n(n)
35393 .k(k)
35394 .cn_stride(7)
35395 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35396 }
35397 }
35398 }
35399
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_subtile)35400 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
35401 TEST_REQUIRES_X86_SSE41;
35402 for (uint32_t n = 8; n <= 12; n += 4) {
35403 for (size_t k = 1; k <= 40; k += 9) {
35404 for (uint32_t m = 1; m <= 1; m++) {
35405 GemmMicrokernelTester()
35406 .mr(1)
35407 .nr(4)
35408 .kr(8)
35409 .sr(1)
35410 .m(m)
35411 .n(n)
35412 .k(k)
35413 .iterations(1)
35414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35415 }
35416 }
35417 }
35418 }
35419
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel)35420 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel) {
35421 TEST_REQUIRES_X86_SSE41;
35422 for (size_t k = 1; k <= 40; k += 9) {
35423 GemmMicrokernelTester()
35424 .mr(1)
35425 .nr(4)
35426 .kr(8)
35427 .sr(1)
35428 .m(1)
35429 .n(4)
35430 .k(k)
35431 .ks(3)
35432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35433 }
35434 }
35435
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel_subtile)35436 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel_subtile) {
35437 TEST_REQUIRES_X86_SSE41;
35438 for (size_t k = 1; k <= 40; k += 9) {
35439 for (uint32_t n = 1; n <= 4; n++) {
35440 for (uint32_t m = 1; m <= 1; m++) {
35441 GemmMicrokernelTester()
35442 .mr(1)
35443 .nr(4)
35444 .kr(8)
35445 .sr(1)
35446 .m(m)
35447 .n(n)
35448 .k(k)
35449 .ks(3)
35450 .iterations(1)
35451 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35452 }
35453 }
35454 }
35455 }
35456
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_small_kernel)35457 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
35458 TEST_REQUIRES_X86_SSE41;
35459 for (uint32_t n = 5; n < 8; n++) {
35460 for (size_t k = 1; k <= 40; k += 9) {
35461 GemmMicrokernelTester()
35462 .mr(1)
35463 .nr(4)
35464 .kr(8)
35465 .sr(1)
35466 .m(1)
35467 .n(n)
35468 .k(k)
35469 .ks(3)
35470 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35471 }
35472 }
35473 }
35474
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_small_kernel)35475 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
35476 TEST_REQUIRES_X86_SSE41;
35477 for (uint32_t n = 8; n <= 12; n += 4) {
35478 for (size_t k = 1; k <= 40; k += 9) {
35479 GemmMicrokernelTester()
35480 .mr(1)
35481 .nr(4)
35482 .kr(8)
35483 .sr(1)
35484 .m(1)
35485 .n(n)
35486 .k(k)
35487 .ks(3)
35488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35489 }
35490 }
35491 }
35492
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm_subtile)35493 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
35494 TEST_REQUIRES_X86_SSE41;
35495 for (size_t k = 1; k <= 40; k += 9) {
35496 for (uint32_t n = 1; n <= 4; n++) {
35497 for (uint32_t m = 1; m <= 1; m++) {
35498 GemmMicrokernelTester()
35499 .mr(1)
35500 .nr(4)
35501 .kr(8)
35502 .sr(1)
35503 .m(m)
35504 .n(n)
35505 .k(k)
35506 .cm_stride(7)
35507 .iterations(1)
35508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35509 }
35510 }
35511 }
35512 }
35513
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,a_offset)35514 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, a_offset) {
35515 TEST_REQUIRES_X86_SSE41;
35516 for (size_t k = 1; k <= 40; k += 9) {
35517 GemmMicrokernelTester()
35518 .mr(1)
35519 .nr(4)
35520 .kr(8)
35521 .sr(1)
35522 .m(1)
35523 .n(4)
35524 .k(k)
35525 .ks(3)
35526 .a_offset(43)
35527 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35528 }
35529 }
35530
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,zero)35531 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, zero) {
35532 TEST_REQUIRES_X86_SSE41;
35533 for (size_t k = 1; k <= 40; k += 9) {
35534 for (uint32_t mz = 0; mz < 1; mz++) {
35535 GemmMicrokernelTester()
35536 .mr(1)
35537 .nr(4)
35538 .kr(8)
35539 .sr(1)
35540 .m(1)
35541 .n(4)
35542 .k(k)
35543 .ks(3)
35544 .a_offset(43)
35545 .zero_index(mz)
35546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35547 }
35548 }
35549 }
35550
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmin)35551 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
35552 TEST_REQUIRES_X86_SSE41;
35553 GemmMicrokernelTester()
35554 .mr(1)
35555 .nr(4)
35556 .kr(8)
35557 .sr(1)
35558 .m(1)
35559 .n(4)
35560 .k(8)
35561 .qmin(128)
35562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35563 }
35564
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmax)35565 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
35566 TEST_REQUIRES_X86_SSE41;
35567 GemmMicrokernelTester()
35568 .mr(1)
35569 .nr(4)
35570 .kr(8)
35571 .sr(1)
35572 .m(1)
35573 .n(4)
35574 .k(8)
35575 .qmax(128)
35576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35577 }
35578
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm)35579 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
35580 TEST_REQUIRES_X86_SSE41;
35581 GemmMicrokernelTester()
35582 .mr(1)
35583 .nr(4)
35584 .kr(8)
35585 .sr(1)
35586 .m(1)
35587 .n(4)
35588 .k(8)
35589 .cm_stride(7)
35590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35591 }
35592 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35593
35594
35595 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8)35596 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
35597 TEST_REQUIRES_X86_SSE41;
35598 GemmMicrokernelTester()
35599 .mr(2)
35600 .nr(4)
35601 .kr(8)
35602 .sr(1)
35603 .m(2)
35604 .n(4)
35605 .k(8)
35606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35607 }
35608
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cn)35609 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
35610 TEST_REQUIRES_X86_SSE41;
35611 GemmMicrokernelTester()
35612 .mr(2)
35613 .nr(4)
35614 .kr(8)
35615 .sr(1)
35616 .m(2)
35617 .n(4)
35618 .k(8)
35619 .cn_stride(7)
35620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35621 }
35622
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile)35623 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
35624 TEST_REQUIRES_X86_SSE41;
35625 for (uint32_t n = 1; n <= 4; n++) {
35626 for (uint32_t m = 1; m <= 2; m++) {
35627 GemmMicrokernelTester()
35628 .mr(2)
35629 .nr(4)
35630 .kr(8)
35631 .sr(1)
35632 .m(m)
35633 .n(n)
35634 .k(8)
35635 .iterations(1)
35636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35637 }
35638 }
35639 }
35640
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_m)35641 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
35642 TEST_REQUIRES_X86_SSE41;
35643 for (uint32_t m = 1; m <= 2; m++) {
35644 GemmMicrokernelTester()
35645 .mr(2)
35646 .nr(4)
35647 .kr(8)
35648 .sr(1)
35649 .m(m)
35650 .n(4)
35651 .k(8)
35652 .iterations(1)
35653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35654 }
35655 }
35656
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_n)35657 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
35658 TEST_REQUIRES_X86_SSE41;
35659 for (uint32_t n = 1; n <= 4; n++) {
35660 GemmMicrokernelTester()
35661 .mr(2)
35662 .nr(4)
35663 .kr(8)
35664 .sr(1)
35665 .m(2)
35666 .n(n)
35667 .k(8)
35668 .iterations(1)
35669 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35670 }
35671 }
35672
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8)35673 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
35674 TEST_REQUIRES_X86_SSE41;
35675 for (size_t k = 1; k < 8; k++) {
35676 GemmMicrokernelTester()
35677 .mr(2)
35678 .nr(4)
35679 .kr(8)
35680 .sr(1)
35681 .m(2)
35682 .n(4)
35683 .k(k)
35684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35685 }
35686 }
35687
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_subtile)35688 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
35689 TEST_REQUIRES_X86_SSE41;
35690 for (size_t k = 1; k < 8; k++) {
35691 for (uint32_t n = 1; n <= 4; n++) {
35692 for (uint32_t m = 1; m <= 2; m++) {
35693 GemmMicrokernelTester()
35694 .mr(2)
35695 .nr(4)
35696 .kr(8)
35697 .sr(1)
35698 .m(m)
35699 .n(n)
35700 .k(k)
35701 .iterations(1)
35702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35703 }
35704 }
35705 }
35706 }
35707
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8)35708 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
35709 TEST_REQUIRES_X86_SSE41;
35710 for (size_t k = 9; k < 16; k++) {
35711 GemmMicrokernelTester()
35712 .mr(2)
35713 .nr(4)
35714 .kr(8)
35715 .sr(1)
35716 .m(2)
35717 .n(4)
35718 .k(k)
35719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35720 }
35721 }
35722
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_subtile)35723 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
35724 TEST_REQUIRES_X86_SSE41;
35725 for (size_t k = 9; k < 16; k++) {
35726 for (uint32_t n = 1; n <= 4; n++) {
35727 for (uint32_t m = 1; m <= 2; m++) {
35728 GemmMicrokernelTester()
35729 .mr(2)
35730 .nr(4)
35731 .kr(8)
35732 .sr(1)
35733 .m(m)
35734 .n(n)
35735 .k(k)
35736 .iterations(1)
35737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35738 }
35739 }
35740 }
35741 }
35742
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8)35743 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
35744 TEST_REQUIRES_X86_SSE41;
35745 for (size_t k = 16; k <= 80; k += 8) {
35746 GemmMicrokernelTester()
35747 .mr(2)
35748 .nr(4)
35749 .kr(8)
35750 .sr(1)
35751 .m(2)
35752 .n(4)
35753 .k(k)
35754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35755 }
35756 }
35757
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_subtile)35758 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
35759 TEST_REQUIRES_X86_SSE41;
35760 for (size_t k = 16; k <= 80; k += 8) {
35761 for (uint32_t n = 1; n <= 4; n++) {
35762 for (uint32_t m = 1; m <= 2; m++) {
35763 GemmMicrokernelTester()
35764 .mr(2)
35765 .nr(4)
35766 .kr(8)
35767 .sr(1)
35768 .m(m)
35769 .n(n)
35770 .k(k)
35771 .iterations(1)
35772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35773 }
35774 }
35775 }
35776 }
35777
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4)35778 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
35779 TEST_REQUIRES_X86_SSE41;
35780 for (uint32_t n = 5; n < 8; n++) {
35781 for (size_t k = 1; k <= 40; k += 9) {
35782 GemmMicrokernelTester()
35783 .mr(2)
35784 .nr(4)
35785 .kr(8)
35786 .sr(1)
35787 .m(2)
35788 .n(n)
35789 .k(k)
35790 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35791 }
35792 }
35793 }
35794
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_cn)35795 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
35796 TEST_REQUIRES_X86_SSE41;
35797 for (uint32_t n = 5; n < 8; n++) {
35798 for (size_t k = 1; k <= 40; k += 9) {
35799 GemmMicrokernelTester()
35800 .mr(2)
35801 .nr(4)
35802 .kr(8)
35803 .sr(1)
35804 .m(2)
35805 .n(n)
35806 .k(k)
35807 .cn_stride(7)
35808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35809 }
35810 }
35811 }
35812
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_subtile)35813 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
35814 TEST_REQUIRES_X86_SSE41;
35815 for (uint32_t n = 5; n < 8; n++) {
35816 for (size_t k = 1; k <= 40; k += 9) {
35817 for (uint32_t m = 1; m <= 2; m++) {
35818 GemmMicrokernelTester()
35819 .mr(2)
35820 .nr(4)
35821 .kr(8)
35822 .sr(1)
35823 .m(m)
35824 .n(n)
35825 .k(k)
35826 .iterations(1)
35827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35828 }
35829 }
35830 }
35831 }
35832
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4)35833 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
35834 TEST_REQUIRES_X86_SSE41;
35835 for (uint32_t n = 8; n <= 12; n += 4) {
35836 for (size_t k = 1; k <= 40; k += 9) {
35837 GemmMicrokernelTester()
35838 .mr(2)
35839 .nr(4)
35840 .kr(8)
35841 .sr(1)
35842 .m(2)
35843 .n(n)
35844 .k(k)
35845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35846 }
35847 }
35848 }
35849
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_cn)35850 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
35851 TEST_REQUIRES_X86_SSE41;
35852 for (uint32_t n = 8; n <= 12; n += 4) {
35853 for (size_t k = 1; k <= 40; k += 9) {
35854 GemmMicrokernelTester()
35855 .mr(2)
35856 .nr(4)
35857 .kr(8)
35858 .sr(1)
35859 .m(2)
35860 .n(n)
35861 .k(k)
35862 .cn_stride(7)
35863 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35864 }
35865 }
35866 }
35867
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_subtile)35868 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
35869 TEST_REQUIRES_X86_SSE41;
35870 for (uint32_t n = 8; n <= 12; n += 4) {
35871 for (size_t k = 1; k <= 40; k += 9) {
35872 for (uint32_t m = 1; m <= 2; m++) {
35873 GemmMicrokernelTester()
35874 .mr(2)
35875 .nr(4)
35876 .kr(8)
35877 .sr(1)
35878 .m(m)
35879 .n(n)
35880 .k(k)
35881 .iterations(1)
35882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35883 }
35884 }
35885 }
35886 }
35887
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel)35888 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel) {
35889 TEST_REQUIRES_X86_SSE41;
35890 for (size_t k = 1; k <= 40; k += 9) {
35891 GemmMicrokernelTester()
35892 .mr(2)
35893 .nr(4)
35894 .kr(8)
35895 .sr(1)
35896 .m(2)
35897 .n(4)
35898 .k(k)
35899 .ks(3)
35900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35901 }
35902 }
35903
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel_subtile)35904 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel_subtile) {
35905 TEST_REQUIRES_X86_SSE41;
35906 for (size_t k = 1; k <= 40; k += 9) {
35907 for (uint32_t n = 1; n <= 4; n++) {
35908 for (uint32_t m = 1; m <= 2; m++) {
35909 GemmMicrokernelTester()
35910 .mr(2)
35911 .nr(4)
35912 .kr(8)
35913 .sr(1)
35914 .m(m)
35915 .n(n)
35916 .k(k)
35917 .ks(3)
35918 .iterations(1)
35919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35920 }
35921 }
35922 }
35923 }
35924
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_small_kernel)35925 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
35926 TEST_REQUIRES_X86_SSE41;
35927 for (uint32_t n = 5; n < 8; n++) {
35928 for (size_t k = 1; k <= 40; k += 9) {
35929 GemmMicrokernelTester()
35930 .mr(2)
35931 .nr(4)
35932 .kr(8)
35933 .sr(1)
35934 .m(2)
35935 .n(n)
35936 .k(k)
35937 .ks(3)
35938 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35939 }
35940 }
35941 }
35942
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_small_kernel)35943 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
35944 TEST_REQUIRES_X86_SSE41;
35945 for (uint32_t n = 8; n <= 12; n += 4) {
35946 for (size_t k = 1; k <= 40; k += 9) {
35947 GemmMicrokernelTester()
35948 .mr(2)
35949 .nr(4)
35950 .kr(8)
35951 .sr(1)
35952 .m(2)
35953 .n(n)
35954 .k(k)
35955 .ks(3)
35956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35957 }
35958 }
35959 }
35960
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm_subtile)35961 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
35962 TEST_REQUIRES_X86_SSE41;
35963 for (size_t k = 1; k <= 40; k += 9) {
35964 for (uint32_t n = 1; n <= 4; n++) {
35965 for (uint32_t m = 1; m <= 2; m++) {
35966 GemmMicrokernelTester()
35967 .mr(2)
35968 .nr(4)
35969 .kr(8)
35970 .sr(1)
35971 .m(m)
35972 .n(n)
35973 .k(k)
35974 .cm_stride(7)
35975 .iterations(1)
35976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35977 }
35978 }
35979 }
35980 }
35981
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,a_offset)35982 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, a_offset) {
35983 TEST_REQUIRES_X86_SSE41;
35984 for (size_t k = 1; k <= 40; k += 9) {
35985 GemmMicrokernelTester()
35986 .mr(2)
35987 .nr(4)
35988 .kr(8)
35989 .sr(1)
35990 .m(2)
35991 .n(4)
35992 .k(k)
35993 .ks(3)
35994 .a_offset(83)
35995 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35996 }
35997 }
35998
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,zero)35999 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, zero) {
36000 TEST_REQUIRES_X86_SSE41;
36001 for (size_t k = 1; k <= 40; k += 9) {
36002 for (uint32_t mz = 0; mz < 2; mz++) {
36003 GemmMicrokernelTester()
36004 .mr(2)
36005 .nr(4)
36006 .kr(8)
36007 .sr(1)
36008 .m(2)
36009 .n(4)
36010 .k(k)
36011 .ks(3)
36012 .a_offset(83)
36013 .zero_index(mz)
36014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36015 }
36016 }
36017 }
36018
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmin)36019 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
36020 TEST_REQUIRES_X86_SSE41;
36021 GemmMicrokernelTester()
36022 .mr(2)
36023 .nr(4)
36024 .kr(8)
36025 .sr(1)
36026 .m(2)
36027 .n(4)
36028 .k(8)
36029 .qmin(128)
36030 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36031 }
36032
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmax)36033 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
36034 TEST_REQUIRES_X86_SSE41;
36035 GemmMicrokernelTester()
36036 .mr(2)
36037 .nr(4)
36038 .kr(8)
36039 .sr(1)
36040 .m(2)
36041 .n(4)
36042 .k(8)
36043 .qmax(128)
36044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36045 }
36046
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm)36047 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
36048 TEST_REQUIRES_X86_SSE41;
36049 GemmMicrokernelTester()
36050 .mr(2)
36051 .nr(4)
36052 .kr(8)
36053 .sr(1)
36054 .m(2)
36055 .n(4)
36056 .k(8)
36057 .cm_stride(7)
36058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36059 }
36060 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36061
36062
36063 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8)36064 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8) {
36065 TEST_REQUIRES_X86_XOP;
36066 GemmMicrokernelTester()
36067 .mr(1)
36068 .nr(4)
36069 .kr(8)
36070 .sr(1)
36071 .m(1)
36072 .n(4)
36073 .k(8)
36074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36075 }
36076
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cn)36077 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cn) {
36078 TEST_REQUIRES_X86_XOP;
36079 GemmMicrokernelTester()
36080 .mr(1)
36081 .nr(4)
36082 .kr(8)
36083 .sr(1)
36084 .m(1)
36085 .n(4)
36086 .k(8)
36087 .cn_stride(7)
36088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36089 }
36090
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile)36091 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile) {
36092 TEST_REQUIRES_X86_XOP;
36093 for (uint32_t n = 1; n <= 4; n++) {
36094 for (uint32_t m = 1; m <= 1; m++) {
36095 GemmMicrokernelTester()
36096 .mr(1)
36097 .nr(4)
36098 .kr(8)
36099 .sr(1)
36100 .m(m)
36101 .n(n)
36102 .k(8)
36103 .iterations(1)
36104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36105 }
36106 }
36107 }
36108
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_m)36109 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
36110 TEST_REQUIRES_X86_XOP;
36111 for (uint32_t m = 1; m <= 1; m++) {
36112 GemmMicrokernelTester()
36113 .mr(1)
36114 .nr(4)
36115 .kr(8)
36116 .sr(1)
36117 .m(m)
36118 .n(4)
36119 .k(8)
36120 .iterations(1)
36121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36122 }
36123 }
36124
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_n)36125 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
36126 TEST_REQUIRES_X86_XOP;
36127 for (uint32_t n = 1; n <= 4; n++) {
36128 GemmMicrokernelTester()
36129 .mr(1)
36130 .nr(4)
36131 .kr(8)
36132 .sr(1)
36133 .m(1)
36134 .n(n)
36135 .k(8)
36136 .iterations(1)
36137 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36138 }
36139 }
36140
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8)36141 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8) {
36142 TEST_REQUIRES_X86_XOP;
36143 for (size_t k = 1; k < 8; k++) {
36144 GemmMicrokernelTester()
36145 .mr(1)
36146 .nr(4)
36147 .kr(8)
36148 .sr(1)
36149 .m(1)
36150 .n(4)
36151 .k(k)
36152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36153 }
36154 }
36155
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8_subtile)36156 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_subtile) {
36157 TEST_REQUIRES_X86_XOP;
36158 for (size_t k = 1; k < 8; k++) {
36159 for (uint32_t n = 1; n <= 4; n++) {
36160 for (uint32_t m = 1; m <= 1; m++) {
36161 GemmMicrokernelTester()
36162 .mr(1)
36163 .nr(4)
36164 .kr(8)
36165 .sr(1)
36166 .m(m)
36167 .n(n)
36168 .k(k)
36169 .iterations(1)
36170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36171 }
36172 }
36173 }
36174 }
36175
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8)36176 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8) {
36177 TEST_REQUIRES_X86_XOP;
36178 for (size_t k = 9; k < 16; k++) {
36179 GemmMicrokernelTester()
36180 .mr(1)
36181 .nr(4)
36182 .kr(8)
36183 .sr(1)
36184 .m(1)
36185 .n(4)
36186 .k(k)
36187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36188 }
36189 }
36190
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8_subtile)36191 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_subtile) {
36192 TEST_REQUIRES_X86_XOP;
36193 for (size_t k = 9; k < 16; k++) {
36194 for (uint32_t n = 1; n <= 4; n++) {
36195 for (uint32_t m = 1; m <= 1; m++) {
36196 GemmMicrokernelTester()
36197 .mr(1)
36198 .nr(4)
36199 .kr(8)
36200 .sr(1)
36201 .m(m)
36202 .n(n)
36203 .k(k)
36204 .iterations(1)
36205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36206 }
36207 }
36208 }
36209 }
36210
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8)36211 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8) {
36212 TEST_REQUIRES_X86_XOP;
36213 for (size_t k = 16; k <= 80; k += 8) {
36214 GemmMicrokernelTester()
36215 .mr(1)
36216 .nr(4)
36217 .kr(8)
36218 .sr(1)
36219 .m(1)
36220 .n(4)
36221 .k(k)
36222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36223 }
36224 }
36225
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8_subtile)36226 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_subtile) {
36227 TEST_REQUIRES_X86_XOP;
36228 for (size_t k = 16; k <= 80; k += 8) {
36229 for (uint32_t n = 1; n <= 4; n++) {
36230 for (uint32_t m = 1; m <= 1; m++) {
36231 GemmMicrokernelTester()
36232 .mr(1)
36233 .nr(4)
36234 .kr(8)
36235 .sr(1)
36236 .m(m)
36237 .n(n)
36238 .k(k)
36239 .iterations(1)
36240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36241 }
36242 }
36243 }
36244 }
36245
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4)36246 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4) {
36247 TEST_REQUIRES_X86_XOP;
36248 for (uint32_t n = 5; n < 8; n++) {
36249 for (size_t k = 1; k <= 40; k += 9) {
36250 GemmMicrokernelTester()
36251 .mr(1)
36252 .nr(4)
36253 .kr(8)
36254 .sr(1)
36255 .m(1)
36256 .n(n)
36257 .k(k)
36258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36259 }
36260 }
36261 }
36262
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_strided_cn)36263 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
36264 TEST_REQUIRES_X86_XOP;
36265 for (uint32_t n = 5; n < 8; n++) {
36266 for (size_t k = 1; k <= 40; k += 9) {
36267 GemmMicrokernelTester()
36268 .mr(1)
36269 .nr(4)
36270 .kr(8)
36271 .sr(1)
36272 .m(1)
36273 .n(n)
36274 .k(k)
36275 .cn_stride(7)
36276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36277 }
36278 }
36279 }
36280
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_subtile)36281 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_subtile) {
36282 TEST_REQUIRES_X86_XOP;
36283 for (uint32_t n = 5; n < 8; n++) {
36284 for (size_t k = 1; k <= 40; k += 9) {
36285 for (uint32_t m = 1; m <= 1; m++) {
36286 GemmMicrokernelTester()
36287 .mr(1)
36288 .nr(4)
36289 .kr(8)
36290 .sr(1)
36291 .m(m)
36292 .n(n)
36293 .k(k)
36294 .iterations(1)
36295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36296 }
36297 }
36298 }
36299 }
36300
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4)36301 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4) {
36302 TEST_REQUIRES_X86_XOP;
36303 for (uint32_t n = 8; n <= 12; n += 4) {
36304 for (size_t k = 1; k <= 40; k += 9) {
36305 GemmMicrokernelTester()
36306 .mr(1)
36307 .nr(4)
36308 .kr(8)
36309 .sr(1)
36310 .m(1)
36311 .n(n)
36312 .k(k)
36313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36314 }
36315 }
36316 }
36317
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_strided_cn)36318 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_cn) {
36319 TEST_REQUIRES_X86_XOP;
36320 for (uint32_t n = 8; n <= 12; n += 4) {
36321 for (size_t k = 1; k <= 40; k += 9) {
36322 GemmMicrokernelTester()
36323 .mr(1)
36324 .nr(4)
36325 .kr(8)
36326 .sr(1)
36327 .m(1)
36328 .n(n)
36329 .k(k)
36330 .cn_stride(7)
36331 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36332 }
36333 }
36334 }
36335
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_subtile)36336 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_subtile) {
36337 TEST_REQUIRES_X86_XOP;
36338 for (uint32_t n = 8; n <= 12; n += 4) {
36339 for (size_t k = 1; k <= 40; k += 9) {
36340 for (uint32_t m = 1; m <= 1; m++) {
36341 GemmMicrokernelTester()
36342 .mr(1)
36343 .nr(4)
36344 .kr(8)
36345 .sr(1)
36346 .m(m)
36347 .n(n)
36348 .k(k)
36349 .iterations(1)
36350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36351 }
36352 }
36353 }
36354 }
36355
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel)36356 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel) {
36357 TEST_REQUIRES_X86_XOP;
36358 for (size_t k = 1; k <= 40; k += 9) {
36359 GemmMicrokernelTester()
36360 .mr(1)
36361 .nr(4)
36362 .kr(8)
36363 .sr(1)
36364 .m(1)
36365 .n(4)
36366 .k(k)
36367 .ks(3)
36368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36369 }
36370 }
36371
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel_subtile)36372 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel_subtile) {
36373 TEST_REQUIRES_X86_XOP;
36374 for (size_t k = 1; k <= 40; k += 9) {
36375 for (uint32_t n = 1; n <= 4; n++) {
36376 for (uint32_t m = 1; m <= 1; m++) {
36377 GemmMicrokernelTester()
36378 .mr(1)
36379 .nr(4)
36380 .kr(8)
36381 .sr(1)
36382 .m(m)
36383 .n(n)
36384 .k(k)
36385 .ks(3)
36386 .iterations(1)
36387 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36388 }
36389 }
36390 }
36391 }
36392
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_small_kernel)36393 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_small_kernel) {
36394 TEST_REQUIRES_X86_XOP;
36395 for (uint32_t n = 5; n < 8; n++) {
36396 for (size_t k = 1; k <= 40; k += 9) {
36397 GemmMicrokernelTester()
36398 .mr(1)
36399 .nr(4)
36400 .kr(8)
36401 .sr(1)
36402 .m(1)
36403 .n(n)
36404 .k(k)
36405 .ks(3)
36406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36407 }
36408 }
36409 }
36410
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_small_kernel)36411 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_small_kernel) {
36412 TEST_REQUIRES_X86_XOP;
36413 for (uint32_t n = 8; n <= 12; n += 4) {
36414 for (size_t k = 1; k <= 40; k += 9) {
36415 GemmMicrokernelTester()
36416 .mr(1)
36417 .nr(4)
36418 .kr(8)
36419 .sr(1)
36420 .m(1)
36421 .n(n)
36422 .k(k)
36423 .ks(3)
36424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36425 }
36426 }
36427 }
36428
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm_subtile)36429 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm_subtile) {
36430 TEST_REQUIRES_X86_XOP;
36431 for (size_t k = 1; k <= 40; k += 9) {
36432 for (uint32_t n = 1; n <= 4; n++) {
36433 for (uint32_t m = 1; m <= 1; m++) {
36434 GemmMicrokernelTester()
36435 .mr(1)
36436 .nr(4)
36437 .kr(8)
36438 .sr(1)
36439 .m(m)
36440 .n(n)
36441 .k(k)
36442 .cm_stride(7)
36443 .iterations(1)
36444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36445 }
36446 }
36447 }
36448 }
36449
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,a_offset)36450 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, a_offset) {
36451 TEST_REQUIRES_X86_XOP;
36452 for (size_t k = 1; k <= 40; k += 9) {
36453 GemmMicrokernelTester()
36454 .mr(1)
36455 .nr(4)
36456 .kr(8)
36457 .sr(1)
36458 .m(1)
36459 .n(4)
36460 .k(k)
36461 .ks(3)
36462 .a_offset(43)
36463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36464 }
36465 }
36466
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,zero)36467 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, zero) {
36468 TEST_REQUIRES_X86_XOP;
36469 for (size_t k = 1; k <= 40; k += 9) {
36470 for (uint32_t mz = 0; mz < 1; mz++) {
36471 GemmMicrokernelTester()
36472 .mr(1)
36473 .nr(4)
36474 .kr(8)
36475 .sr(1)
36476 .m(1)
36477 .n(4)
36478 .k(k)
36479 .ks(3)
36480 .a_offset(43)
36481 .zero_index(mz)
36482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36483 }
36484 }
36485 }
36486
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmin)36487 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmin) {
36488 TEST_REQUIRES_X86_XOP;
36489 GemmMicrokernelTester()
36490 .mr(1)
36491 .nr(4)
36492 .kr(8)
36493 .sr(1)
36494 .m(1)
36495 .n(4)
36496 .k(8)
36497 .qmin(128)
36498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36499 }
36500
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmax)36501 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmax) {
36502 TEST_REQUIRES_X86_XOP;
36503 GemmMicrokernelTester()
36504 .mr(1)
36505 .nr(4)
36506 .kr(8)
36507 .sr(1)
36508 .m(1)
36509 .n(4)
36510 .k(8)
36511 .qmax(128)
36512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36513 }
36514
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm)36515 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm) {
36516 TEST_REQUIRES_X86_XOP;
36517 GemmMicrokernelTester()
36518 .mr(1)
36519 .nr(4)
36520 .kr(8)
36521 .sr(1)
36522 .m(1)
36523 .n(4)
36524 .k(8)
36525 .cm_stride(7)
36526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36527 }
36528 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36529
36530
36531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8)36532 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8) {
36533 TEST_REQUIRES_X86_XOP;
36534 GemmMicrokernelTester()
36535 .mr(2)
36536 .nr(4)
36537 .kr(8)
36538 .sr(1)
36539 .m(2)
36540 .n(4)
36541 .k(8)
36542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36543 }
36544
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cn)36545 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cn) {
36546 TEST_REQUIRES_X86_XOP;
36547 GemmMicrokernelTester()
36548 .mr(2)
36549 .nr(4)
36550 .kr(8)
36551 .sr(1)
36552 .m(2)
36553 .n(4)
36554 .k(8)
36555 .cn_stride(7)
36556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36557 }
36558
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile)36559 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile) {
36560 TEST_REQUIRES_X86_XOP;
36561 for (uint32_t n = 1; n <= 4; n++) {
36562 for (uint32_t m = 1; m <= 2; m++) {
36563 GemmMicrokernelTester()
36564 .mr(2)
36565 .nr(4)
36566 .kr(8)
36567 .sr(1)
36568 .m(m)
36569 .n(n)
36570 .k(8)
36571 .iterations(1)
36572 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36573 }
36574 }
36575 }
36576
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_m)36577 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
36578 TEST_REQUIRES_X86_XOP;
36579 for (uint32_t m = 1; m <= 2; m++) {
36580 GemmMicrokernelTester()
36581 .mr(2)
36582 .nr(4)
36583 .kr(8)
36584 .sr(1)
36585 .m(m)
36586 .n(4)
36587 .k(8)
36588 .iterations(1)
36589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36590 }
36591 }
36592
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_n)36593 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
36594 TEST_REQUIRES_X86_XOP;
36595 for (uint32_t n = 1; n <= 4; n++) {
36596 GemmMicrokernelTester()
36597 .mr(2)
36598 .nr(4)
36599 .kr(8)
36600 .sr(1)
36601 .m(2)
36602 .n(n)
36603 .k(8)
36604 .iterations(1)
36605 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36606 }
36607 }
36608
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8)36609 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8) {
36610 TEST_REQUIRES_X86_XOP;
36611 for (size_t k = 1; k < 8; k++) {
36612 GemmMicrokernelTester()
36613 .mr(2)
36614 .nr(4)
36615 .kr(8)
36616 .sr(1)
36617 .m(2)
36618 .n(4)
36619 .k(k)
36620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36621 }
36622 }
36623
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8_subtile)36624 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_subtile) {
36625 TEST_REQUIRES_X86_XOP;
36626 for (size_t k = 1; k < 8; k++) {
36627 for (uint32_t n = 1; n <= 4; n++) {
36628 for (uint32_t m = 1; m <= 2; m++) {
36629 GemmMicrokernelTester()
36630 .mr(2)
36631 .nr(4)
36632 .kr(8)
36633 .sr(1)
36634 .m(m)
36635 .n(n)
36636 .k(k)
36637 .iterations(1)
36638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36639 }
36640 }
36641 }
36642 }
36643
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8)36644 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8) {
36645 TEST_REQUIRES_X86_XOP;
36646 for (size_t k = 9; k < 16; k++) {
36647 GemmMicrokernelTester()
36648 .mr(2)
36649 .nr(4)
36650 .kr(8)
36651 .sr(1)
36652 .m(2)
36653 .n(4)
36654 .k(k)
36655 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36656 }
36657 }
36658
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8_subtile)36659 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_subtile) {
36660 TEST_REQUIRES_X86_XOP;
36661 for (size_t k = 9; k < 16; k++) {
36662 for (uint32_t n = 1; n <= 4; n++) {
36663 for (uint32_t m = 1; m <= 2; m++) {
36664 GemmMicrokernelTester()
36665 .mr(2)
36666 .nr(4)
36667 .kr(8)
36668 .sr(1)
36669 .m(m)
36670 .n(n)
36671 .k(k)
36672 .iterations(1)
36673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36674 }
36675 }
36676 }
36677 }
36678
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8)36679 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8) {
36680 TEST_REQUIRES_X86_XOP;
36681 for (size_t k = 16; k <= 80; k += 8) {
36682 GemmMicrokernelTester()
36683 .mr(2)
36684 .nr(4)
36685 .kr(8)
36686 .sr(1)
36687 .m(2)
36688 .n(4)
36689 .k(k)
36690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36691 }
36692 }
36693
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8_subtile)36694 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_subtile) {
36695 TEST_REQUIRES_X86_XOP;
36696 for (size_t k = 16; k <= 80; k += 8) {
36697 for (uint32_t n = 1; n <= 4; n++) {
36698 for (uint32_t m = 1; m <= 2; m++) {
36699 GemmMicrokernelTester()
36700 .mr(2)
36701 .nr(4)
36702 .kr(8)
36703 .sr(1)
36704 .m(m)
36705 .n(n)
36706 .k(k)
36707 .iterations(1)
36708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36709 }
36710 }
36711 }
36712 }
36713
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4)36714 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4) {
36715 TEST_REQUIRES_X86_XOP;
36716 for (uint32_t n = 5; n < 8; n++) {
36717 for (size_t k = 1; k <= 40; k += 9) {
36718 GemmMicrokernelTester()
36719 .mr(2)
36720 .nr(4)
36721 .kr(8)
36722 .sr(1)
36723 .m(2)
36724 .n(n)
36725 .k(k)
36726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36727 }
36728 }
36729 }
36730
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_strided_cn)36731 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
36732 TEST_REQUIRES_X86_XOP;
36733 for (uint32_t n = 5; n < 8; n++) {
36734 for (size_t k = 1; k <= 40; k += 9) {
36735 GemmMicrokernelTester()
36736 .mr(2)
36737 .nr(4)
36738 .kr(8)
36739 .sr(1)
36740 .m(2)
36741 .n(n)
36742 .k(k)
36743 .cn_stride(7)
36744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36745 }
36746 }
36747 }
36748
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_subtile)36749 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_subtile) {
36750 TEST_REQUIRES_X86_XOP;
36751 for (uint32_t n = 5; n < 8; n++) {
36752 for (size_t k = 1; k <= 40; k += 9) {
36753 for (uint32_t m = 1; m <= 2; m++) {
36754 GemmMicrokernelTester()
36755 .mr(2)
36756 .nr(4)
36757 .kr(8)
36758 .sr(1)
36759 .m(m)
36760 .n(n)
36761 .k(k)
36762 .iterations(1)
36763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36764 }
36765 }
36766 }
36767 }
36768
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4)36769 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4) {
36770 TEST_REQUIRES_X86_XOP;
36771 for (uint32_t n = 8; n <= 12; n += 4) {
36772 for (size_t k = 1; k <= 40; k += 9) {
36773 GemmMicrokernelTester()
36774 .mr(2)
36775 .nr(4)
36776 .kr(8)
36777 .sr(1)
36778 .m(2)
36779 .n(n)
36780 .k(k)
36781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36782 }
36783 }
36784 }
36785
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_strided_cn)36786 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_cn) {
36787 TEST_REQUIRES_X86_XOP;
36788 for (uint32_t n = 8; n <= 12; n += 4) {
36789 for (size_t k = 1; k <= 40; k += 9) {
36790 GemmMicrokernelTester()
36791 .mr(2)
36792 .nr(4)
36793 .kr(8)
36794 .sr(1)
36795 .m(2)
36796 .n(n)
36797 .k(k)
36798 .cn_stride(7)
36799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36800 }
36801 }
36802 }
36803
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_subtile)36804 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_subtile) {
36805 TEST_REQUIRES_X86_XOP;
36806 for (uint32_t n = 8; n <= 12; n += 4) {
36807 for (size_t k = 1; k <= 40; k += 9) {
36808 for (uint32_t m = 1; m <= 2; m++) {
36809 GemmMicrokernelTester()
36810 .mr(2)
36811 .nr(4)
36812 .kr(8)
36813 .sr(1)
36814 .m(m)
36815 .n(n)
36816 .k(k)
36817 .iterations(1)
36818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36819 }
36820 }
36821 }
36822 }
36823
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel)36824 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel) {
36825 TEST_REQUIRES_X86_XOP;
36826 for (size_t k = 1; k <= 40; k += 9) {
36827 GemmMicrokernelTester()
36828 .mr(2)
36829 .nr(4)
36830 .kr(8)
36831 .sr(1)
36832 .m(2)
36833 .n(4)
36834 .k(k)
36835 .ks(3)
36836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36837 }
36838 }
36839
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel_subtile)36840 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel_subtile) {
36841 TEST_REQUIRES_X86_XOP;
36842 for (size_t k = 1; k <= 40; k += 9) {
36843 for (uint32_t n = 1; n <= 4; n++) {
36844 for (uint32_t m = 1; m <= 2; m++) {
36845 GemmMicrokernelTester()
36846 .mr(2)
36847 .nr(4)
36848 .kr(8)
36849 .sr(1)
36850 .m(m)
36851 .n(n)
36852 .k(k)
36853 .ks(3)
36854 .iterations(1)
36855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36856 }
36857 }
36858 }
36859 }
36860
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_small_kernel)36861 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_small_kernel) {
36862 TEST_REQUIRES_X86_XOP;
36863 for (uint32_t n = 5; n < 8; n++) {
36864 for (size_t k = 1; k <= 40; k += 9) {
36865 GemmMicrokernelTester()
36866 .mr(2)
36867 .nr(4)
36868 .kr(8)
36869 .sr(1)
36870 .m(2)
36871 .n(n)
36872 .k(k)
36873 .ks(3)
36874 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36875 }
36876 }
36877 }
36878
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_small_kernel)36879 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_small_kernel) {
36880 TEST_REQUIRES_X86_XOP;
36881 for (uint32_t n = 8; n <= 12; n += 4) {
36882 for (size_t k = 1; k <= 40; k += 9) {
36883 GemmMicrokernelTester()
36884 .mr(2)
36885 .nr(4)
36886 .kr(8)
36887 .sr(1)
36888 .m(2)
36889 .n(n)
36890 .k(k)
36891 .ks(3)
36892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36893 }
36894 }
36895 }
36896
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm_subtile)36897 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm_subtile) {
36898 TEST_REQUIRES_X86_XOP;
36899 for (size_t k = 1; k <= 40; k += 9) {
36900 for (uint32_t n = 1; n <= 4; n++) {
36901 for (uint32_t m = 1; m <= 2; m++) {
36902 GemmMicrokernelTester()
36903 .mr(2)
36904 .nr(4)
36905 .kr(8)
36906 .sr(1)
36907 .m(m)
36908 .n(n)
36909 .k(k)
36910 .cm_stride(7)
36911 .iterations(1)
36912 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36913 }
36914 }
36915 }
36916 }
36917
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,a_offset)36918 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, a_offset) {
36919 TEST_REQUIRES_X86_XOP;
36920 for (size_t k = 1; k <= 40; k += 9) {
36921 GemmMicrokernelTester()
36922 .mr(2)
36923 .nr(4)
36924 .kr(8)
36925 .sr(1)
36926 .m(2)
36927 .n(4)
36928 .k(k)
36929 .ks(3)
36930 .a_offset(83)
36931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36932 }
36933 }
36934
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,zero)36935 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, zero) {
36936 TEST_REQUIRES_X86_XOP;
36937 for (size_t k = 1; k <= 40; k += 9) {
36938 for (uint32_t mz = 0; mz < 2; mz++) {
36939 GemmMicrokernelTester()
36940 .mr(2)
36941 .nr(4)
36942 .kr(8)
36943 .sr(1)
36944 .m(2)
36945 .n(4)
36946 .k(k)
36947 .ks(3)
36948 .a_offset(83)
36949 .zero_index(mz)
36950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36951 }
36952 }
36953 }
36954
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmin)36955 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmin) {
36956 TEST_REQUIRES_X86_XOP;
36957 GemmMicrokernelTester()
36958 .mr(2)
36959 .nr(4)
36960 .kr(8)
36961 .sr(1)
36962 .m(2)
36963 .n(4)
36964 .k(8)
36965 .qmin(128)
36966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36967 }
36968
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmax)36969 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmax) {
36970 TEST_REQUIRES_X86_XOP;
36971 GemmMicrokernelTester()
36972 .mr(2)
36973 .nr(4)
36974 .kr(8)
36975 .sr(1)
36976 .m(2)
36977 .n(4)
36978 .k(8)
36979 .qmax(128)
36980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36981 }
36982
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm)36983 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm) {
36984 TEST_REQUIRES_X86_XOP;
36985 GemmMicrokernelTester()
36986 .mr(2)
36987 .nr(4)
36988 .kr(8)
36989 .sr(1)
36990 .m(2)
36991 .n(4)
36992 .k(8)
36993 .cm_stride(7)
36994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36995 }
36996 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36997
36998
36999 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8)37000 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8) {
37001 TEST_REQUIRES_X86_AVX;
37002 GemmMicrokernelTester()
37003 .mr(3)
37004 .nr(4)
37005 .kr(8)
37006 .sr(1)
37007 .m(3)
37008 .n(4)
37009 .k(8)
37010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37011 }
37012
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cn)37013 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cn) {
37014 TEST_REQUIRES_X86_AVX;
37015 GemmMicrokernelTester()
37016 .mr(3)
37017 .nr(4)
37018 .kr(8)
37019 .sr(1)
37020 .m(3)
37021 .n(4)
37022 .k(8)
37023 .cn_stride(7)
37024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37025 }
37026
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile)37027 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile) {
37028 TEST_REQUIRES_X86_AVX;
37029 for (uint32_t n = 1; n <= 4; n++) {
37030 for (uint32_t m = 1; m <= 3; m++) {
37031 GemmMicrokernelTester()
37032 .mr(3)
37033 .nr(4)
37034 .kr(8)
37035 .sr(1)
37036 .m(m)
37037 .n(n)
37038 .k(8)
37039 .iterations(1)
37040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37041 }
37042 }
37043 }
37044
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_m)37045 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
37046 TEST_REQUIRES_X86_AVX;
37047 for (uint32_t m = 1; m <= 3; m++) {
37048 GemmMicrokernelTester()
37049 .mr(3)
37050 .nr(4)
37051 .kr(8)
37052 .sr(1)
37053 .m(m)
37054 .n(4)
37055 .k(8)
37056 .iterations(1)
37057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37058 }
37059 }
37060
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_n)37061 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
37062 TEST_REQUIRES_X86_AVX;
37063 for (uint32_t n = 1; n <= 4; n++) {
37064 GemmMicrokernelTester()
37065 .mr(3)
37066 .nr(4)
37067 .kr(8)
37068 .sr(1)
37069 .m(3)
37070 .n(n)
37071 .k(8)
37072 .iterations(1)
37073 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37074 }
37075 }
37076
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8)37077 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8) {
37078 TEST_REQUIRES_X86_AVX;
37079 for (size_t k = 1; k < 8; k++) {
37080 GemmMicrokernelTester()
37081 .mr(3)
37082 .nr(4)
37083 .kr(8)
37084 .sr(1)
37085 .m(3)
37086 .n(4)
37087 .k(k)
37088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37089 }
37090 }
37091
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8_subtile)37092 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_subtile) {
37093 TEST_REQUIRES_X86_AVX;
37094 for (size_t k = 1; k < 8; k++) {
37095 for (uint32_t n = 1; n <= 4; n++) {
37096 for (uint32_t m = 1; m <= 3; m++) {
37097 GemmMicrokernelTester()
37098 .mr(3)
37099 .nr(4)
37100 .kr(8)
37101 .sr(1)
37102 .m(m)
37103 .n(n)
37104 .k(k)
37105 .iterations(1)
37106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37107 }
37108 }
37109 }
37110 }
37111
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8)37112 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8) {
37113 TEST_REQUIRES_X86_AVX;
37114 for (size_t k = 9; k < 16; k++) {
37115 GemmMicrokernelTester()
37116 .mr(3)
37117 .nr(4)
37118 .kr(8)
37119 .sr(1)
37120 .m(3)
37121 .n(4)
37122 .k(k)
37123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37124 }
37125 }
37126
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8_subtile)37127 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_subtile) {
37128 TEST_REQUIRES_X86_AVX;
37129 for (size_t k = 9; k < 16; k++) {
37130 for (uint32_t n = 1; n <= 4; n++) {
37131 for (uint32_t m = 1; m <= 3; m++) {
37132 GemmMicrokernelTester()
37133 .mr(3)
37134 .nr(4)
37135 .kr(8)
37136 .sr(1)
37137 .m(m)
37138 .n(n)
37139 .k(k)
37140 .iterations(1)
37141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37142 }
37143 }
37144 }
37145 }
37146
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8)37147 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8) {
37148 TEST_REQUIRES_X86_AVX;
37149 for (size_t k = 16; k <= 80; k += 8) {
37150 GemmMicrokernelTester()
37151 .mr(3)
37152 .nr(4)
37153 .kr(8)
37154 .sr(1)
37155 .m(3)
37156 .n(4)
37157 .k(k)
37158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37159 }
37160 }
37161
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8_subtile)37162 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_subtile) {
37163 TEST_REQUIRES_X86_AVX;
37164 for (size_t k = 16; k <= 80; k += 8) {
37165 for (uint32_t n = 1; n <= 4; n++) {
37166 for (uint32_t m = 1; m <= 3; m++) {
37167 GemmMicrokernelTester()
37168 .mr(3)
37169 .nr(4)
37170 .kr(8)
37171 .sr(1)
37172 .m(m)
37173 .n(n)
37174 .k(k)
37175 .iterations(1)
37176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37177 }
37178 }
37179 }
37180 }
37181
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4)37182 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4) {
37183 TEST_REQUIRES_X86_AVX;
37184 for (uint32_t n = 5; n < 8; n++) {
37185 for (size_t k = 1; k <= 40; k += 9) {
37186 GemmMicrokernelTester()
37187 .mr(3)
37188 .nr(4)
37189 .kr(8)
37190 .sr(1)
37191 .m(3)
37192 .n(n)
37193 .k(k)
37194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37195 }
37196 }
37197 }
37198
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_strided_cn)37199 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
37200 TEST_REQUIRES_X86_AVX;
37201 for (uint32_t n = 5; n < 8; n++) {
37202 for (size_t k = 1; k <= 40; k += 9) {
37203 GemmMicrokernelTester()
37204 .mr(3)
37205 .nr(4)
37206 .kr(8)
37207 .sr(1)
37208 .m(3)
37209 .n(n)
37210 .k(k)
37211 .cn_stride(7)
37212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37213 }
37214 }
37215 }
37216
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_subtile)37217 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_subtile) {
37218 TEST_REQUIRES_X86_AVX;
37219 for (uint32_t n = 5; n < 8; n++) {
37220 for (size_t k = 1; k <= 40; k += 9) {
37221 for (uint32_t m = 1; m <= 3; m++) {
37222 GemmMicrokernelTester()
37223 .mr(3)
37224 .nr(4)
37225 .kr(8)
37226 .sr(1)
37227 .m(m)
37228 .n(n)
37229 .k(k)
37230 .iterations(1)
37231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37232 }
37233 }
37234 }
37235 }
37236
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4)37237 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4) {
37238 TEST_REQUIRES_X86_AVX;
37239 for (uint32_t n = 8; n <= 12; n += 4) {
37240 for (size_t k = 1; k <= 40; k += 9) {
37241 GemmMicrokernelTester()
37242 .mr(3)
37243 .nr(4)
37244 .kr(8)
37245 .sr(1)
37246 .m(3)
37247 .n(n)
37248 .k(k)
37249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37250 }
37251 }
37252 }
37253
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_strided_cn)37254 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_cn) {
37255 TEST_REQUIRES_X86_AVX;
37256 for (uint32_t n = 8; n <= 12; n += 4) {
37257 for (size_t k = 1; k <= 40; k += 9) {
37258 GemmMicrokernelTester()
37259 .mr(3)
37260 .nr(4)
37261 .kr(8)
37262 .sr(1)
37263 .m(3)
37264 .n(n)
37265 .k(k)
37266 .cn_stride(7)
37267 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37268 }
37269 }
37270 }
37271
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_subtile)37272 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_subtile) {
37273 TEST_REQUIRES_X86_AVX;
37274 for (uint32_t n = 8; n <= 12; n += 4) {
37275 for (size_t k = 1; k <= 40; k += 9) {
37276 for (uint32_t m = 1; m <= 3; m++) {
37277 GemmMicrokernelTester()
37278 .mr(3)
37279 .nr(4)
37280 .kr(8)
37281 .sr(1)
37282 .m(m)
37283 .n(n)
37284 .k(k)
37285 .iterations(1)
37286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37287 }
37288 }
37289 }
37290 }
37291
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel)37292 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel) {
37293 TEST_REQUIRES_X86_AVX;
37294 for (size_t k = 1; k <= 40; k += 9) {
37295 GemmMicrokernelTester()
37296 .mr(3)
37297 .nr(4)
37298 .kr(8)
37299 .sr(1)
37300 .m(3)
37301 .n(4)
37302 .k(k)
37303 .ks(3)
37304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37305 }
37306 }
37307
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel_subtile)37308 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel_subtile) {
37309 TEST_REQUIRES_X86_AVX;
37310 for (size_t k = 1; k <= 40; k += 9) {
37311 for (uint32_t n = 1; n <= 4; n++) {
37312 for (uint32_t m = 1; m <= 3; m++) {
37313 GemmMicrokernelTester()
37314 .mr(3)
37315 .nr(4)
37316 .kr(8)
37317 .sr(1)
37318 .m(m)
37319 .n(n)
37320 .k(k)
37321 .ks(3)
37322 .iterations(1)
37323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37324 }
37325 }
37326 }
37327 }
37328
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_small_kernel)37329 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_small_kernel) {
37330 TEST_REQUIRES_X86_AVX;
37331 for (uint32_t n = 5; n < 8; n++) {
37332 for (size_t k = 1; k <= 40; k += 9) {
37333 GemmMicrokernelTester()
37334 .mr(3)
37335 .nr(4)
37336 .kr(8)
37337 .sr(1)
37338 .m(3)
37339 .n(n)
37340 .k(k)
37341 .ks(3)
37342 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37343 }
37344 }
37345 }
37346
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_small_kernel)37347 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_small_kernel) {
37348 TEST_REQUIRES_X86_AVX;
37349 for (uint32_t n = 8; n <= 12; n += 4) {
37350 for (size_t k = 1; k <= 40; k += 9) {
37351 GemmMicrokernelTester()
37352 .mr(3)
37353 .nr(4)
37354 .kr(8)
37355 .sr(1)
37356 .m(3)
37357 .n(n)
37358 .k(k)
37359 .ks(3)
37360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37361 }
37362 }
37363 }
37364
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm_subtile)37365 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm_subtile) {
37366 TEST_REQUIRES_X86_AVX;
37367 for (size_t k = 1; k <= 40; k += 9) {
37368 for (uint32_t n = 1; n <= 4; n++) {
37369 for (uint32_t m = 1; m <= 3; m++) {
37370 GemmMicrokernelTester()
37371 .mr(3)
37372 .nr(4)
37373 .kr(8)
37374 .sr(1)
37375 .m(m)
37376 .n(n)
37377 .k(k)
37378 .cm_stride(7)
37379 .iterations(1)
37380 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37381 }
37382 }
37383 }
37384 }
37385
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,a_offset)37386 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, a_offset) {
37387 TEST_REQUIRES_X86_AVX;
37388 for (size_t k = 1; k <= 40; k += 9) {
37389 GemmMicrokernelTester()
37390 .mr(3)
37391 .nr(4)
37392 .kr(8)
37393 .sr(1)
37394 .m(3)
37395 .n(4)
37396 .k(k)
37397 .ks(3)
37398 .a_offset(127)
37399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37400 }
37401 }
37402
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,zero)37403 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, zero) {
37404 TEST_REQUIRES_X86_AVX;
37405 for (size_t k = 1; k <= 40; k += 9) {
37406 for (uint32_t mz = 0; mz < 3; mz++) {
37407 GemmMicrokernelTester()
37408 .mr(3)
37409 .nr(4)
37410 .kr(8)
37411 .sr(1)
37412 .m(3)
37413 .n(4)
37414 .k(k)
37415 .ks(3)
37416 .a_offset(127)
37417 .zero_index(mz)
37418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37419 }
37420 }
37421 }
37422
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmin)37423 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmin) {
37424 TEST_REQUIRES_X86_AVX;
37425 GemmMicrokernelTester()
37426 .mr(3)
37427 .nr(4)
37428 .kr(8)
37429 .sr(1)
37430 .m(3)
37431 .n(4)
37432 .k(8)
37433 .qmin(128)
37434 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37435 }
37436
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmax)37437 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmax) {
37438 TEST_REQUIRES_X86_AVX;
37439 GemmMicrokernelTester()
37440 .mr(3)
37441 .nr(4)
37442 .kr(8)
37443 .sr(1)
37444 .m(3)
37445 .n(4)
37446 .k(8)
37447 .qmax(128)
37448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37449 }
37450
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm)37451 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm) {
37452 TEST_REQUIRES_X86_AVX;
37453 GemmMicrokernelTester()
37454 .mr(3)
37455 .nr(4)
37456 .kr(8)
37457 .sr(1)
37458 .m(3)
37459 .n(4)
37460 .k(8)
37461 .cm_stride(7)
37462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37463 }
37464 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
37465
37466
37467 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8)37468 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
37469 TEST_REQUIRES_X86_AVX2;
37470 GemmMicrokernelTester()
37471 .mr(2)
37472 .nr(8)
37473 .kr(8)
37474 .sr(1)
37475 .m(2)
37476 .n(8)
37477 .k(8)
37478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37479 }
37480
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cn)37481 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
37482 TEST_REQUIRES_X86_AVX2;
37483 GemmMicrokernelTester()
37484 .mr(2)
37485 .nr(8)
37486 .kr(8)
37487 .sr(1)
37488 .m(2)
37489 .n(8)
37490 .k(8)
37491 .cn_stride(11)
37492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37493 }
37494
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile)37495 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
37496 TEST_REQUIRES_X86_AVX2;
37497 for (uint32_t n = 1; n <= 8; n++) {
37498 for (uint32_t m = 1; m <= 2; m++) {
37499 GemmMicrokernelTester()
37500 .mr(2)
37501 .nr(8)
37502 .kr(8)
37503 .sr(1)
37504 .m(m)
37505 .n(n)
37506 .k(8)
37507 .iterations(1)
37508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37509 }
37510 }
37511 }
37512
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_m)37513 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
37514 TEST_REQUIRES_X86_AVX2;
37515 for (uint32_t m = 1; m <= 2; m++) {
37516 GemmMicrokernelTester()
37517 .mr(2)
37518 .nr(8)
37519 .kr(8)
37520 .sr(1)
37521 .m(m)
37522 .n(8)
37523 .k(8)
37524 .iterations(1)
37525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37526 }
37527 }
37528
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_n)37529 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
37530 TEST_REQUIRES_X86_AVX2;
37531 for (uint32_t n = 1; n <= 8; n++) {
37532 GemmMicrokernelTester()
37533 .mr(2)
37534 .nr(8)
37535 .kr(8)
37536 .sr(1)
37537 .m(2)
37538 .n(n)
37539 .k(8)
37540 .iterations(1)
37541 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37542 }
37543 }
37544
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8)37545 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
37546 TEST_REQUIRES_X86_AVX2;
37547 for (size_t k = 1; k < 8; k++) {
37548 GemmMicrokernelTester()
37549 .mr(2)
37550 .nr(8)
37551 .kr(8)
37552 .sr(1)
37553 .m(2)
37554 .n(8)
37555 .k(k)
37556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37557 }
37558 }
37559
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_subtile)37560 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
37561 TEST_REQUIRES_X86_AVX2;
37562 for (size_t k = 1; k < 8; k++) {
37563 for (uint32_t n = 1; n <= 8; n++) {
37564 for (uint32_t m = 1; m <= 2; m++) {
37565 GemmMicrokernelTester()
37566 .mr(2)
37567 .nr(8)
37568 .kr(8)
37569 .sr(1)
37570 .m(m)
37571 .n(n)
37572 .k(k)
37573 .iterations(1)
37574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37575 }
37576 }
37577 }
37578 }
37579
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8)37580 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
37581 TEST_REQUIRES_X86_AVX2;
37582 for (size_t k = 9; k < 16; k++) {
37583 GemmMicrokernelTester()
37584 .mr(2)
37585 .nr(8)
37586 .kr(8)
37587 .sr(1)
37588 .m(2)
37589 .n(8)
37590 .k(k)
37591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37592 }
37593 }
37594
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_subtile)37595 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
37596 TEST_REQUIRES_X86_AVX2;
37597 for (size_t k = 9; k < 16; k++) {
37598 for (uint32_t n = 1; n <= 8; n++) {
37599 for (uint32_t m = 1; m <= 2; m++) {
37600 GemmMicrokernelTester()
37601 .mr(2)
37602 .nr(8)
37603 .kr(8)
37604 .sr(1)
37605 .m(m)
37606 .n(n)
37607 .k(k)
37608 .iterations(1)
37609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37610 }
37611 }
37612 }
37613 }
37614
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8)37615 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
37616 TEST_REQUIRES_X86_AVX2;
37617 for (size_t k = 16; k <= 80; k += 8) {
37618 GemmMicrokernelTester()
37619 .mr(2)
37620 .nr(8)
37621 .kr(8)
37622 .sr(1)
37623 .m(2)
37624 .n(8)
37625 .k(k)
37626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37627 }
37628 }
37629
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_subtile)37630 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
37631 TEST_REQUIRES_X86_AVX2;
37632 for (size_t k = 16; k <= 80; k += 8) {
37633 for (uint32_t n = 1; n <= 8; n++) {
37634 for (uint32_t m = 1; m <= 2; m++) {
37635 GemmMicrokernelTester()
37636 .mr(2)
37637 .nr(8)
37638 .kr(8)
37639 .sr(1)
37640 .m(m)
37641 .n(n)
37642 .k(k)
37643 .iterations(1)
37644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37645 }
37646 }
37647 }
37648 }
37649
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8)37650 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
37651 TEST_REQUIRES_X86_AVX2;
37652 for (uint32_t n = 9; n < 16; n++) {
37653 for (size_t k = 1; k <= 40; k += 9) {
37654 GemmMicrokernelTester()
37655 .mr(2)
37656 .nr(8)
37657 .kr(8)
37658 .sr(1)
37659 .m(2)
37660 .n(n)
37661 .k(k)
37662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37663 }
37664 }
37665 }
37666
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_cn)37667 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
37668 TEST_REQUIRES_X86_AVX2;
37669 for (uint32_t n = 9; n < 16; n++) {
37670 for (size_t k = 1; k <= 40; k += 9) {
37671 GemmMicrokernelTester()
37672 .mr(2)
37673 .nr(8)
37674 .kr(8)
37675 .sr(1)
37676 .m(2)
37677 .n(n)
37678 .k(k)
37679 .cn_stride(11)
37680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37681 }
37682 }
37683 }
37684
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_subtile)37685 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
37686 TEST_REQUIRES_X86_AVX2;
37687 for (uint32_t n = 9; n < 16; n++) {
37688 for (size_t k = 1; k <= 40; k += 9) {
37689 for (uint32_t m = 1; m <= 2; m++) {
37690 GemmMicrokernelTester()
37691 .mr(2)
37692 .nr(8)
37693 .kr(8)
37694 .sr(1)
37695 .m(m)
37696 .n(n)
37697 .k(k)
37698 .iterations(1)
37699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37700 }
37701 }
37702 }
37703 }
37704
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8)37705 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
37706 TEST_REQUIRES_X86_AVX2;
37707 for (uint32_t n = 16; n <= 24; n += 8) {
37708 for (size_t k = 1; k <= 40; k += 9) {
37709 GemmMicrokernelTester()
37710 .mr(2)
37711 .nr(8)
37712 .kr(8)
37713 .sr(1)
37714 .m(2)
37715 .n(n)
37716 .k(k)
37717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37718 }
37719 }
37720 }
37721
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_cn)37722 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
37723 TEST_REQUIRES_X86_AVX2;
37724 for (uint32_t n = 16; n <= 24; n += 8) {
37725 for (size_t k = 1; k <= 40; k += 9) {
37726 GemmMicrokernelTester()
37727 .mr(2)
37728 .nr(8)
37729 .kr(8)
37730 .sr(1)
37731 .m(2)
37732 .n(n)
37733 .k(k)
37734 .cn_stride(11)
37735 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37736 }
37737 }
37738 }
37739
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_subtile)37740 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
37741 TEST_REQUIRES_X86_AVX2;
37742 for (uint32_t n = 16; n <= 24; n += 8) {
37743 for (size_t k = 1; k <= 40; k += 9) {
37744 for (uint32_t m = 1; m <= 2; m++) {
37745 GemmMicrokernelTester()
37746 .mr(2)
37747 .nr(8)
37748 .kr(8)
37749 .sr(1)
37750 .m(m)
37751 .n(n)
37752 .k(k)
37753 .iterations(1)
37754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37755 }
37756 }
37757 }
37758 }
37759
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel)37760 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel) {
37761 TEST_REQUIRES_X86_AVX2;
37762 for (size_t k = 1; k <= 40; k += 9) {
37763 GemmMicrokernelTester()
37764 .mr(2)
37765 .nr(8)
37766 .kr(8)
37767 .sr(1)
37768 .m(2)
37769 .n(8)
37770 .k(k)
37771 .ks(3)
37772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37773 }
37774 }
37775
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel_subtile)37776 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel_subtile) {
37777 TEST_REQUIRES_X86_AVX2;
37778 for (size_t k = 1; k <= 40; k += 9) {
37779 for (uint32_t n = 1; n <= 8; n++) {
37780 for (uint32_t m = 1; m <= 2; m++) {
37781 GemmMicrokernelTester()
37782 .mr(2)
37783 .nr(8)
37784 .kr(8)
37785 .sr(1)
37786 .m(m)
37787 .n(n)
37788 .k(k)
37789 .ks(3)
37790 .iterations(1)
37791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37792 }
37793 }
37794 }
37795 }
37796
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_small_kernel)37797 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_small_kernel) {
37798 TEST_REQUIRES_X86_AVX2;
37799 for (uint32_t n = 9; n < 16; n++) {
37800 for (size_t k = 1; k <= 40; k += 9) {
37801 GemmMicrokernelTester()
37802 .mr(2)
37803 .nr(8)
37804 .kr(8)
37805 .sr(1)
37806 .m(2)
37807 .n(n)
37808 .k(k)
37809 .ks(3)
37810 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37811 }
37812 }
37813 }
37814
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_small_kernel)37815 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_small_kernel) {
37816 TEST_REQUIRES_X86_AVX2;
37817 for (uint32_t n = 16; n <= 24; n += 8) {
37818 for (size_t k = 1; k <= 40; k += 9) {
37819 GemmMicrokernelTester()
37820 .mr(2)
37821 .nr(8)
37822 .kr(8)
37823 .sr(1)
37824 .m(2)
37825 .n(n)
37826 .k(k)
37827 .ks(3)
37828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37829 }
37830 }
37831 }
37832
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm_subtile)37833 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
37834 TEST_REQUIRES_X86_AVX2;
37835 for (size_t k = 1; k <= 40; k += 9) {
37836 for (uint32_t n = 1; n <= 8; n++) {
37837 for (uint32_t m = 1; m <= 2; m++) {
37838 GemmMicrokernelTester()
37839 .mr(2)
37840 .nr(8)
37841 .kr(8)
37842 .sr(1)
37843 .m(m)
37844 .n(n)
37845 .k(k)
37846 .cm_stride(11)
37847 .iterations(1)
37848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37849 }
37850 }
37851 }
37852 }
37853
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,a_offset)37854 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, a_offset) {
37855 TEST_REQUIRES_X86_AVX2;
37856 for (size_t k = 1; k <= 40; k += 9) {
37857 GemmMicrokernelTester()
37858 .mr(2)
37859 .nr(8)
37860 .kr(8)
37861 .sr(1)
37862 .m(2)
37863 .n(8)
37864 .k(k)
37865 .ks(3)
37866 .a_offset(83)
37867 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37868 }
37869 }
37870
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,zero)37871 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, zero) {
37872 TEST_REQUIRES_X86_AVX2;
37873 for (size_t k = 1; k <= 40; k += 9) {
37874 for (uint32_t mz = 0; mz < 2; mz++) {
37875 GemmMicrokernelTester()
37876 .mr(2)
37877 .nr(8)
37878 .kr(8)
37879 .sr(1)
37880 .m(2)
37881 .n(8)
37882 .k(k)
37883 .ks(3)
37884 .a_offset(83)
37885 .zero_index(mz)
37886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37887 }
37888 }
37889 }
37890
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmin)37891 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
37892 TEST_REQUIRES_X86_AVX2;
37893 GemmMicrokernelTester()
37894 .mr(2)
37895 .nr(8)
37896 .kr(8)
37897 .sr(1)
37898 .m(2)
37899 .n(8)
37900 .k(8)
37901 .qmin(128)
37902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37903 }
37904
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmax)37905 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
37906 TEST_REQUIRES_X86_AVX2;
37907 GemmMicrokernelTester()
37908 .mr(2)
37909 .nr(8)
37910 .kr(8)
37911 .sr(1)
37912 .m(2)
37913 .n(8)
37914 .k(8)
37915 .qmax(128)
37916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37917 }
37918
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm)37919 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
37920 TEST_REQUIRES_X86_AVX2;
37921 GemmMicrokernelTester()
37922 .mr(2)
37923 .nr(8)
37924 .kr(8)
37925 .sr(1)
37926 .m(2)
37927 .n(8)
37928 .k(8)
37929 .cm_stride(11)
37930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37931 }
37932 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
37933
37934
37935 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8)37936 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
37937 TEST_REQUIRES_X86_AVX2;
37938 GemmMicrokernelTester()
37939 .mr(3)
37940 .nr(8)
37941 .kr(8)
37942 .sr(1)
37943 .m(3)
37944 .n(8)
37945 .k(8)
37946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37947 }
37948
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cn)37949 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
37950 TEST_REQUIRES_X86_AVX2;
37951 GemmMicrokernelTester()
37952 .mr(3)
37953 .nr(8)
37954 .kr(8)
37955 .sr(1)
37956 .m(3)
37957 .n(8)
37958 .k(8)
37959 .cn_stride(11)
37960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37961 }
37962
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)37963 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
37964 TEST_REQUIRES_X86_AVX2;
37965 for (uint32_t n = 1; n <= 8; n++) {
37966 for (uint32_t m = 1; m <= 3; m++) {
37967 GemmMicrokernelTester()
37968 .mr(3)
37969 .nr(8)
37970 .kr(8)
37971 .sr(1)
37972 .m(m)
37973 .n(n)
37974 .k(8)
37975 .iterations(1)
37976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37977 }
37978 }
37979 }
37980
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)37981 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
37982 TEST_REQUIRES_X86_AVX2;
37983 for (uint32_t m = 1; m <= 3; m++) {
37984 GemmMicrokernelTester()
37985 .mr(3)
37986 .nr(8)
37987 .kr(8)
37988 .sr(1)
37989 .m(m)
37990 .n(8)
37991 .k(8)
37992 .iterations(1)
37993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37994 }
37995 }
37996
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)37997 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
37998 TEST_REQUIRES_X86_AVX2;
37999 for (uint32_t n = 1; n <= 8; n++) {
38000 GemmMicrokernelTester()
38001 .mr(3)
38002 .nr(8)
38003 .kr(8)
38004 .sr(1)
38005 .m(3)
38006 .n(n)
38007 .k(8)
38008 .iterations(1)
38009 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38010 }
38011 }
38012
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8)38013 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
38014 TEST_REQUIRES_X86_AVX2;
38015 for (size_t k = 1; k < 8; k++) {
38016 GemmMicrokernelTester()
38017 .mr(3)
38018 .nr(8)
38019 .kr(8)
38020 .sr(1)
38021 .m(3)
38022 .n(8)
38023 .k(k)
38024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38025 }
38026 }
38027
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)38028 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
38029 TEST_REQUIRES_X86_AVX2;
38030 for (size_t k = 1; k < 8; k++) {
38031 for (uint32_t n = 1; n <= 8; n++) {
38032 for (uint32_t m = 1; m <= 3; m++) {
38033 GemmMicrokernelTester()
38034 .mr(3)
38035 .nr(8)
38036 .kr(8)
38037 .sr(1)
38038 .m(m)
38039 .n(n)
38040 .k(k)
38041 .iterations(1)
38042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38043 }
38044 }
38045 }
38046 }
38047
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8)38048 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
38049 TEST_REQUIRES_X86_AVX2;
38050 for (size_t k = 9; k < 16; k++) {
38051 GemmMicrokernelTester()
38052 .mr(3)
38053 .nr(8)
38054 .kr(8)
38055 .sr(1)
38056 .m(3)
38057 .n(8)
38058 .k(k)
38059 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38060 }
38061 }
38062
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)38063 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
38064 TEST_REQUIRES_X86_AVX2;
38065 for (size_t k = 9; k < 16; k++) {
38066 for (uint32_t n = 1; n <= 8; n++) {
38067 for (uint32_t m = 1; m <= 3; m++) {
38068 GemmMicrokernelTester()
38069 .mr(3)
38070 .nr(8)
38071 .kr(8)
38072 .sr(1)
38073 .m(m)
38074 .n(n)
38075 .k(k)
38076 .iterations(1)
38077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38078 }
38079 }
38080 }
38081 }
38082
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8)38083 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
38084 TEST_REQUIRES_X86_AVX2;
38085 for (size_t k = 16; k <= 80; k += 8) {
38086 GemmMicrokernelTester()
38087 .mr(3)
38088 .nr(8)
38089 .kr(8)
38090 .sr(1)
38091 .m(3)
38092 .n(8)
38093 .k(k)
38094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38095 }
38096 }
38097
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)38098 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
38099 TEST_REQUIRES_X86_AVX2;
38100 for (size_t k = 16; k <= 80; k += 8) {
38101 for (uint32_t n = 1; n <= 8; n++) {
38102 for (uint32_t m = 1; m <= 3; m++) {
38103 GemmMicrokernelTester()
38104 .mr(3)
38105 .nr(8)
38106 .kr(8)
38107 .sr(1)
38108 .m(m)
38109 .n(n)
38110 .k(k)
38111 .iterations(1)
38112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38113 }
38114 }
38115 }
38116 }
38117
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8)38118 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
38119 TEST_REQUIRES_X86_AVX2;
38120 for (uint32_t n = 9; n < 16; n++) {
38121 for (size_t k = 1; k <= 40; k += 9) {
38122 GemmMicrokernelTester()
38123 .mr(3)
38124 .nr(8)
38125 .kr(8)
38126 .sr(1)
38127 .m(3)
38128 .n(n)
38129 .k(k)
38130 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38131 }
38132 }
38133 }
38134
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)38135 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
38136 TEST_REQUIRES_X86_AVX2;
38137 for (uint32_t n = 9; n < 16; n++) {
38138 for (size_t k = 1; k <= 40; k += 9) {
38139 GemmMicrokernelTester()
38140 .mr(3)
38141 .nr(8)
38142 .kr(8)
38143 .sr(1)
38144 .m(3)
38145 .n(n)
38146 .k(k)
38147 .cn_stride(11)
38148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38149 }
38150 }
38151 }
38152
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)38153 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
38154 TEST_REQUIRES_X86_AVX2;
38155 for (uint32_t n = 9; n < 16; n++) {
38156 for (size_t k = 1; k <= 40; k += 9) {
38157 for (uint32_t m = 1; m <= 3; m++) {
38158 GemmMicrokernelTester()
38159 .mr(3)
38160 .nr(8)
38161 .kr(8)
38162 .sr(1)
38163 .m(m)
38164 .n(n)
38165 .k(k)
38166 .iterations(1)
38167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38168 }
38169 }
38170 }
38171 }
38172
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8)38173 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
38174 TEST_REQUIRES_X86_AVX2;
38175 for (uint32_t n = 16; n <= 24; n += 8) {
38176 for (size_t k = 1; k <= 40; k += 9) {
38177 GemmMicrokernelTester()
38178 .mr(3)
38179 .nr(8)
38180 .kr(8)
38181 .sr(1)
38182 .m(3)
38183 .n(n)
38184 .k(k)
38185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38186 }
38187 }
38188 }
38189
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)38190 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
38191 TEST_REQUIRES_X86_AVX2;
38192 for (uint32_t n = 16; n <= 24; n += 8) {
38193 for (size_t k = 1; k <= 40; k += 9) {
38194 GemmMicrokernelTester()
38195 .mr(3)
38196 .nr(8)
38197 .kr(8)
38198 .sr(1)
38199 .m(3)
38200 .n(n)
38201 .k(k)
38202 .cn_stride(11)
38203 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38204 }
38205 }
38206 }
38207
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)38208 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
38209 TEST_REQUIRES_X86_AVX2;
38210 for (uint32_t n = 16; n <= 24; n += 8) {
38211 for (size_t k = 1; k <= 40; k += 9) {
38212 for (uint32_t m = 1; m <= 3; m++) {
38213 GemmMicrokernelTester()
38214 .mr(3)
38215 .nr(8)
38216 .kr(8)
38217 .sr(1)
38218 .m(m)
38219 .n(n)
38220 .k(k)
38221 .iterations(1)
38222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38223 }
38224 }
38225 }
38226 }
38227
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel)38228 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel) {
38229 TEST_REQUIRES_X86_AVX2;
38230 for (size_t k = 1; k <= 40; k += 9) {
38231 GemmMicrokernelTester()
38232 .mr(3)
38233 .nr(8)
38234 .kr(8)
38235 .sr(1)
38236 .m(3)
38237 .n(8)
38238 .k(k)
38239 .ks(3)
38240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38241 }
38242 }
38243
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel_subtile)38244 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel_subtile) {
38245 TEST_REQUIRES_X86_AVX2;
38246 for (size_t k = 1; k <= 40; k += 9) {
38247 for (uint32_t n = 1; n <= 8; n++) {
38248 for (uint32_t m = 1; m <= 3; m++) {
38249 GemmMicrokernelTester()
38250 .mr(3)
38251 .nr(8)
38252 .kr(8)
38253 .sr(1)
38254 .m(m)
38255 .n(n)
38256 .k(k)
38257 .ks(3)
38258 .iterations(1)
38259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38260 }
38261 }
38262 }
38263 }
38264
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_small_kernel)38265 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_small_kernel) {
38266 TEST_REQUIRES_X86_AVX2;
38267 for (uint32_t n = 9; n < 16; n++) {
38268 for (size_t k = 1; k <= 40; k += 9) {
38269 GemmMicrokernelTester()
38270 .mr(3)
38271 .nr(8)
38272 .kr(8)
38273 .sr(1)
38274 .m(3)
38275 .n(n)
38276 .k(k)
38277 .ks(3)
38278 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38279 }
38280 }
38281 }
38282
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_small_kernel)38283 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_small_kernel) {
38284 TEST_REQUIRES_X86_AVX2;
38285 for (uint32_t n = 16; n <= 24; n += 8) {
38286 for (size_t k = 1; k <= 40; k += 9) {
38287 GemmMicrokernelTester()
38288 .mr(3)
38289 .nr(8)
38290 .kr(8)
38291 .sr(1)
38292 .m(3)
38293 .n(n)
38294 .k(k)
38295 .ks(3)
38296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38297 }
38298 }
38299 }
38300
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)38301 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
38302 TEST_REQUIRES_X86_AVX2;
38303 for (size_t k = 1; k <= 40; k += 9) {
38304 for (uint32_t n = 1; n <= 8; n++) {
38305 for (uint32_t m = 1; m <= 3; m++) {
38306 GemmMicrokernelTester()
38307 .mr(3)
38308 .nr(8)
38309 .kr(8)
38310 .sr(1)
38311 .m(m)
38312 .n(n)
38313 .k(k)
38314 .cm_stride(11)
38315 .iterations(1)
38316 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38317 }
38318 }
38319 }
38320 }
38321
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,a_offset)38322 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, a_offset) {
38323 TEST_REQUIRES_X86_AVX2;
38324 for (size_t k = 1; k <= 40; k += 9) {
38325 GemmMicrokernelTester()
38326 .mr(3)
38327 .nr(8)
38328 .kr(8)
38329 .sr(1)
38330 .m(3)
38331 .n(8)
38332 .k(k)
38333 .ks(3)
38334 .a_offset(127)
38335 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38336 }
38337 }
38338
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,zero)38339 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, zero) {
38340 TEST_REQUIRES_X86_AVX2;
38341 for (size_t k = 1; k <= 40; k += 9) {
38342 for (uint32_t mz = 0; mz < 3; mz++) {
38343 GemmMicrokernelTester()
38344 .mr(3)
38345 .nr(8)
38346 .kr(8)
38347 .sr(1)
38348 .m(3)
38349 .n(8)
38350 .k(k)
38351 .ks(3)
38352 .a_offset(127)
38353 .zero_index(mz)
38354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38355 }
38356 }
38357 }
38358
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmin)38359 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
38360 TEST_REQUIRES_X86_AVX2;
38361 GemmMicrokernelTester()
38362 .mr(3)
38363 .nr(8)
38364 .kr(8)
38365 .sr(1)
38366 .m(3)
38367 .n(8)
38368 .k(8)
38369 .qmin(128)
38370 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38371 }
38372
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmax)38373 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
38374 TEST_REQUIRES_X86_AVX2;
38375 GemmMicrokernelTester()
38376 .mr(3)
38377 .nr(8)
38378 .kr(8)
38379 .sr(1)
38380 .m(3)
38381 .n(8)
38382 .k(8)
38383 .qmax(128)
38384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38385 }
38386
TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm)38387 TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
38388 TEST_REQUIRES_X86_AVX2;
38389 GemmMicrokernelTester()
38390 .mr(3)
38391 .nr(8)
38392 .kr(8)
38393 .sr(1)
38394 .m(3)
38395 .n(8)
38396 .k(8)
38397 .cm_stride(11)
38398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
38399 }
38400 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
38401
38402
38403 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8)38404 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
38405 TEST_REQUIRES_X86_AVX512SKX;
38406 GemmMicrokernelTester()
38407 .mr(1)
38408 .nr(16)
38409 .kr(8)
38410 .sr(1)
38411 .m(1)
38412 .n(16)
38413 .k(8)
38414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38415 }
38416
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cn)38417 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
38418 TEST_REQUIRES_X86_AVX512SKX;
38419 GemmMicrokernelTester()
38420 .mr(1)
38421 .nr(16)
38422 .kr(8)
38423 .sr(1)
38424 .m(1)
38425 .n(16)
38426 .k(8)
38427 .cn_stride(19)
38428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38429 }
38430
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile)38431 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
38432 TEST_REQUIRES_X86_AVX512SKX;
38433 for (uint32_t n = 1; n <= 16; n++) {
38434 for (uint32_t m = 1; m <= 1; m++) {
38435 GemmMicrokernelTester()
38436 .mr(1)
38437 .nr(16)
38438 .kr(8)
38439 .sr(1)
38440 .m(m)
38441 .n(n)
38442 .k(8)
38443 .iterations(1)
38444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38445 }
38446 }
38447 }
38448
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_m)38449 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
38450 TEST_REQUIRES_X86_AVX512SKX;
38451 for (uint32_t m = 1; m <= 1; m++) {
38452 GemmMicrokernelTester()
38453 .mr(1)
38454 .nr(16)
38455 .kr(8)
38456 .sr(1)
38457 .m(m)
38458 .n(16)
38459 .k(8)
38460 .iterations(1)
38461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38462 }
38463 }
38464
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_n)38465 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
38466 TEST_REQUIRES_X86_AVX512SKX;
38467 for (uint32_t n = 1; n <= 16; n++) {
38468 GemmMicrokernelTester()
38469 .mr(1)
38470 .nr(16)
38471 .kr(8)
38472 .sr(1)
38473 .m(1)
38474 .n(n)
38475 .k(8)
38476 .iterations(1)
38477 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38478 }
38479 }
38480
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8)38481 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
38482 TEST_REQUIRES_X86_AVX512SKX;
38483 for (size_t k = 1; k < 8; k++) {
38484 GemmMicrokernelTester()
38485 .mr(1)
38486 .nr(16)
38487 .kr(8)
38488 .sr(1)
38489 .m(1)
38490 .n(16)
38491 .k(k)
38492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38493 }
38494 }
38495
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8_subtile)38496 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
38497 TEST_REQUIRES_X86_AVX512SKX;
38498 for (size_t k = 1; k < 8; k++) {
38499 for (uint32_t n = 1; n <= 16; n++) {
38500 for (uint32_t m = 1; m <= 1; m++) {
38501 GemmMicrokernelTester()
38502 .mr(1)
38503 .nr(16)
38504 .kr(8)
38505 .sr(1)
38506 .m(m)
38507 .n(n)
38508 .k(k)
38509 .iterations(1)
38510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38511 }
38512 }
38513 }
38514 }
38515
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8)38516 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
38517 TEST_REQUIRES_X86_AVX512SKX;
38518 for (size_t k = 9; k < 16; k++) {
38519 GemmMicrokernelTester()
38520 .mr(1)
38521 .nr(16)
38522 .kr(8)
38523 .sr(1)
38524 .m(1)
38525 .n(16)
38526 .k(k)
38527 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38528 }
38529 }
38530
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8_subtile)38531 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
38532 TEST_REQUIRES_X86_AVX512SKX;
38533 for (size_t k = 9; k < 16; k++) {
38534 for (uint32_t n = 1; n <= 16; n++) {
38535 for (uint32_t m = 1; m <= 1; m++) {
38536 GemmMicrokernelTester()
38537 .mr(1)
38538 .nr(16)
38539 .kr(8)
38540 .sr(1)
38541 .m(m)
38542 .n(n)
38543 .k(k)
38544 .iterations(1)
38545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38546 }
38547 }
38548 }
38549 }
38550
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8)38551 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
38552 TEST_REQUIRES_X86_AVX512SKX;
38553 for (size_t k = 16; k <= 80; k += 8) {
38554 GemmMicrokernelTester()
38555 .mr(1)
38556 .nr(16)
38557 .kr(8)
38558 .sr(1)
38559 .m(1)
38560 .n(16)
38561 .k(k)
38562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38563 }
38564 }
38565
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8_subtile)38566 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
38567 TEST_REQUIRES_X86_AVX512SKX;
38568 for (size_t k = 16; k <= 80; k += 8) {
38569 for (uint32_t n = 1; n <= 16; n++) {
38570 for (uint32_t m = 1; m <= 1; m++) {
38571 GemmMicrokernelTester()
38572 .mr(1)
38573 .nr(16)
38574 .kr(8)
38575 .sr(1)
38576 .m(m)
38577 .n(n)
38578 .k(k)
38579 .iterations(1)
38580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38581 }
38582 }
38583 }
38584 }
38585
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16)38586 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
38587 TEST_REQUIRES_X86_AVX512SKX;
38588 for (uint32_t n = 17; n < 32; n++) {
38589 for (size_t k = 1; k <= 40; k += 9) {
38590 GemmMicrokernelTester()
38591 .mr(1)
38592 .nr(16)
38593 .kr(8)
38594 .sr(1)
38595 .m(1)
38596 .n(n)
38597 .k(k)
38598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38599 }
38600 }
38601 }
38602
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_strided_cn)38603 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
38604 TEST_REQUIRES_X86_AVX512SKX;
38605 for (uint32_t n = 17; n < 32; n++) {
38606 for (size_t k = 1; k <= 40; k += 9) {
38607 GemmMicrokernelTester()
38608 .mr(1)
38609 .nr(16)
38610 .kr(8)
38611 .sr(1)
38612 .m(1)
38613 .n(n)
38614 .k(k)
38615 .cn_stride(19)
38616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38617 }
38618 }
38619 }
38620
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_subtile)38621 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
38622 TEST_REQUIRES_X86_AVX512SKX;
38623 for (uint32_t n = 17; n < 32; n++) {
38624 for (size_t k = 1; k <= 40; k += 9) {
38625 for (uint32_t m = 1; m <= 1; m++) {
38626 GemmMicrokernelTester()
38627 .mr(1)
38628 .nr(16)
38629 .kr(8)
38630 .sr(1)
38631 .m(m)
38632 .n(n)
38633 .k(k)
38634 .iterations(1)
38635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38636 }
38637 }
38638 }
38639 }
38640
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16)38641 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
38642 TEST_REQUIRES_X86_AVX512SKX;
38643 for (uint32_t n = 32; n <= 48; n += 16) {
38644 for (size_t k = 1; k <= 40; k += 9) {
38645 GemmMicrokernelTester()
38646 .mr(1)
38647 .nr(16)
38648 .kr(8)
38649 .sr(1)
38650 .m(1)
38651 .n(n)
38652 .k(k)
38653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38654 }
38655 }
38656 }
38657
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_strided_cn)38658 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
38659 TEST_REQUIRES_X86_AVX512SKX;
38660 for (uint32_t n = 32; n <= 48; n += 16) {
38661 for (size_t k = 1; k <= 40; k += 9) {
38662 GemmMicrokernelTester()
38663 .mr(1)
38664 .nr(16)
38665 .kr(8)
38666 .sr(1)
38667 .m(1)
38668 .n(n)
38669 .k(k)
38670 .cn_stride(19)
38671 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38672 }
38673 }
38674 }
38675
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_subtile)38676 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
38677 TEST_REQUIRES_X86_AVX512SKX;
38678 for (uint32_t n = 32; n <= 48; n += 16) {
38679 for (size_t k = 1; k <= 40; k += 9) {
38680 for (uint32_t m = 1; m <= 1; m++) {
38681 GemmMicrokernelTester()
38682 .mr(1)
38683 .nr(16)
38684 .kr(8)
38685 .sr(1)
38686 .m(m)
38687 .n(n)
38688 .k(k)
38689 .iterations(1)
38690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38691 }
38692 }
38693 }
38694 }
38695
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel)38696 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel) {
38697 TEST_REQUIRES_X86_AVX512SKX;
38698 for (size_t k = 1; k <= 40; k += 9) {
38699 GemmMicrokernelTester()
38700 .mr(1)
38701 .nr(16)
38702 .kr(8)
38703 .sr(1)
38704 .m(1)
38705 .n(16)
38706 .k(k)
38707 .ks(3)
38708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38709 }
38710 }
38711
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel_subtile)38712 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel_subtile) {
38713 TEST_REQUIRES_X86_AVX512SKX;
38714 for (size_t k = 1; k <= 40; k += 9) {
38715 for (uint32_t n = 1; n <= 16; n++) {
38716 for (uint32_t m = 1; m <= 1; m++) {
38717 GemmMicrokernelTester()
38718 .mr(1)
38719 .nr(16)
38720 .kr(8)
38721 .sr(1)
38722 .m(m)
38723 .n(n)
38724 .k(k)
38725 .ks(3)
38726 .iterations(1)
38727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38728 }
38729 }
38730 }
38731 }
38732
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_small_kernel)38733 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
38734 TEST_REQUIRES_X86_AVX512SKX;
38735 for (uint32_t n = 17; n < 32; n++) {
38736 for (size_t k = 1; k <= 40; k += 9) {
38737 GemmMicrokernelTester()
38738 .mr(1)
38739 .nr(16)
38740 .kr(8)
38741 .sr(1)
38742 .m(1)
38743 .n(n)
38744 .k(k)
38745 .ks(3)
38746 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38747 }
38748 }
38749 }
38750
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_small_kernel)38751 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_small_kernel) {
38752 TEST_REQUIRES_X86_AVX512SKX;
38753 for (uint32_t n = 32; n <= 48; n += 16) {
38754 for (size_t k = 1; k <= 40; k += 9) {
38755 GemmMicrokernelTester()
38756 .mr(1)
38757 .nr(16)
38758 .kr(8)
38759 .sr(1)
38760 .m(1)
38761 .n(n)
38762 .k(k)
38763 .ks(3)
38764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38765 }
38766 }
38767 }
38768
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm_subtile)38769 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
38770 TEST_REQUIRES_X86_AVX512SKX;
38771 for (size_t k = 1; k <= 40; k += 9) {
38772 for (uint32_t n = 1; n <= 16; n++) {
38773 for (uint32_t m = 1; m <= 1; m++) {
38774 GemmMicrokernelTester()
38775 .mr(1)
38776 .nr(16)
38777 .kr(8)
38778 .sr(1)
38779 .m(m)
38780 .n(n)
38781 .k(k)
38782 .cm_stride(19)
38783 .iterations(1)
38784 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38785 }
38786 }
38787 }
38788 }
38789
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,a_offset)38790 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, a_offset) {
38791 TEST_REQUIRES_X86_AVX512SKX;
38792 for (size_t k = 1; k <= 40; k += 9) {
38793 GemmMicrokernelTester()
38794 .mr(1)
38795 .nr(16)
38796 .kr(8)
38797 .sr(1)
38798 .m(1)
38799 .n(16)
38800 .k(k)
38801 .ks(3)
38802 .a_offset(43)
38803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38804 }
38805 }
38806
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,zero)38807 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, zero) {
38808 TEST_REQUIRES_X86_AVX512SKX;
38809 for (size_t k = 1; k <= 40; k += 9) {
38810 for (uint32_t mz = 0; mz < 1; mz++) {
38811 GemmMicrokernelTester()
38812 .mr(1)
38813 .nr(16)
38814 .kr(8)
38815 .sr(1)
38816 .m(1)
38817 .n(16)
38818 .k(k)
38819 .ks(3)
38820 .a_offset(43)
38821 .zero_index(mz)
38822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38823 }
38824 }
38825 }
38826
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmin)38827 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
38828 TEST_REQUIRES_X86_AVX512SKX;
38829 GemmMicrokernelTester()
38830 .mr(1)
38831 .nr(16)
38832 .kr(8)
38833 .sr(1)
38834 .m(1)
38835 .n(16)
38836 .k(8)
38837 .qmin(128)
38838 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38839 }
38840
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmax)38841 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
38842 TEST_REQUIRES_X86_AVX512SKX;
38843 GemmMicrokernelTester()
38844 .mr(1)
38845 .nr(16)
38846 .kr(8)
38847 .sr(1)
38848 .m(1)
38849 .n(16)
38850 .k(8)
38851 .qmax(128)
38852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38853 }
38854
TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm)38855 TEST(QS8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
38856 TEST_REQUIRES_X86_AVX512SKX;
38857 GemmMicrokernelTester()
38858 .mr(1)
38859 .nr(16)
38860 .kr(8)
38861 .sr(1)
38862 .m(1)
38863 .n(16)
38864 .k(8)
38865 .cm_stride(19)
38866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38867 }
38868 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
38869
38870
38871 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8)38872 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
38873 TEST_REQUIRES_X86_AVX512SKX;
38874 GemmMicrokernelTester()
38875 .mr(2)
38876 .nr(16)
38877 .kr(8)
38878 .sr(1)
38879 .m(2)
38880 .n(16)
38881 .k(8)
38882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38883 }
38884
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cn)38885 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
38886 TEST_REQUIRES_X86_AVX512SKX;
38887 GemmMicrokernelTester()
38888 .mr(2)
38889 .nr(16)
38890 .kr(8)
38891 .sr(1)
38892 .m(2)
38893 .n(16)
38894 .k(8)
38895 .cn_stride(19)
38896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38897 }
38898
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile)38899 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
38900 TEST_REQUIRES_X86_AVX512SKX;
38901 for (uint32_t n = 1; n <= 16; n++) {
38902 for (uint32_t m = 1; m <= 2; m++) {
38903 GemmMicrokernelTester()
38904 .mr(2)
38905 .nr(16)
38906 .kr(8)
38907 .sr(1)
38908 .m(m)
38909 .n(n)
38910 .k(8)
38911 .iterations(1)
38912 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38913 }
38914 }
38915 }
38916
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_m)38917 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
38918 TEST_REQUIRES_X86_AVX512SKX;
38919 for (uint32_t m = 1; m <= 2; m++) {
38920 GemmMicrokernelTester()
38921 .mr(2)
38922 .nr(16)
38923 .kr(8)
38924 .sr(1)
38925 .m(m)
38926 .n(16)
38927 .k(8)
38928 .iterations(1)
38929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38930 }
38931 }
38932
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_n)38933 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
38934 TEST_REQUIRES_X86_AVX512SKX;
38935 for (uint32_t n = 1; n <= 16; n++) {
38936 GemmMicrokernelTester()
38937 .mr(2)
38938 .nr(16)
38939 .kr(8)
38940 .sr(1)
38941 .m(2)
38942 .n(n)
38943 .k(8)
38944 .iterations(1)
38945 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38946 }
38947 }
38948
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8)38949 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
38950 TEST_REQUIRES_X86_AVX512SKX;
38951 for (size_t k = 1; k < 8; k++) {
38952 GemmMicrokernelTester()
38953 .mr(2)
38954 .nr(16)
38955 .kr(8)
38956 .sr(1)
38957 .m(2)
38958 .n(16)
38959 .k(k)
38960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38961 }
38962 }
38963
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8_subtile)38964 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
38965 TEST_REQUIRES_X86_AVX512SKX;
38966 for (size_t k = 1; k < 8; k++) {
38967 for (uint32_t n = 1; n <= 16; n++) {
38968 for (uint32_t m = 1; m <= 2; m++) {
38969 GemmMicrokernelTester()
38970 .mr(2)
38971 .nr(16)
38972 .kr(8)
38973 .sr(1)
38974 .m(m)
38975 .n(n)
38976 .k(k)
38977 .iterations(1)
38978 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38979 }
38980 }
38981 }
38982 }
38983
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8)38984 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
38985 TEST_REQUIRES_X86_AVX512SKX;
38986 for (size_t k = 9; k < 16; k++) {
38987 GemmMicrokernelTester()
38988 .mr(2)
38989 .nr(16)
38990 .kr(8)
38991 .sr(1)
38992 .m(2)
38993 .n(16)
38994 .k(k)
38995 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
38996 }
38997 }
38998
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8_subtile)38999 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
39000 TEST_REQUIRES_X86_AVX512SKX;
39001 for (size_t k = 9; k < 16; k++) {
39002 for (uint32_t n = 1; n <= 16; n++) {
39003 for (uint32_t m = 1; m <= 2; m++) {
39004 GemmMicrokernelTester()
39005 .mr(2)
39006 .nr(16)
39007 .kr(8)
39008 .sr(1)
39009 .m(m)
39010 .n(n)
39011 .k(k)
39012 .iterations(1)
39013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39014 }
39015 }
39016 }
39017 }
39018
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8)39019 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
39020 TEST_REQUIRES_X86_AVX512SKX;
39021 for (size_t k = 16; k <= 80; k += 8) {
39022 GemmMicrokernelTester()
39023 .mr(2)
39024 .nr(16)
39025 .kr(8)
39026 .sr(1)
39027 .m(2)
39028 .n(16)
39029 .k(k)
39030 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39031 }
39032 }
39033
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8_subtile)39034 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
39035 TEST_REQUIRES_X86_AVX512SKX;
39036 for (size_t k = 16; k <= 80; k += 8) {
39037 for (uint32_t n = 1; n <= 16; n++) {
39038 for (uint32_t m = 1; m <= 2; m++) {
39039 GemmMicrokernelTester()
39040 .mr(2)
39041 .nr(16)
39042 .kr(8)
39043 .sr(1)
39044 .m(m)
39045 .n(n)
39046 .k(k)
39047 .iterations(1)
39048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39049 }
39050 }
39051 }
39052 }
39053
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16)39054 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
39055 TEST_REQUIRES_X86_AVX512SKX;
39056 for (uint32_t n = 17; n < 32; n++) {
39057 for (size_t k = 1; k <= 40; k += 9) {
39058 GemmMicrokernelTester()
39059 .mr(2)
39060 .nr(16)
39061 .kr(8)
39062 .sr(1)
39063 .m(2)
39064 .n(n)
39065 .k(k)
39066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39067 }
39068 }
39069 }
39070
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_strided_cn)39071 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
39072 TEST_REQUIRES_X86_AVX512SKX;
39073 for (uint32_t n = 17; n < 32; n++) {
39074 for (size_t k = 1; k <= 40; k += 9) {
39075 GemmMicrokernelTester()
39076 .mr(2)
39077 .nr(16)
39078 .kr(8)
39079 .sr(1)
39080 .m(2)
39081 .n(n)
39082 .k(k)
39083 .cn_stride(19)
39084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39085 }
39086 }
39087 }
39088
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_subtile)39089 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
39090 TEST_REQUIRES_X86_AVX512SKX;
39091 for (uint32_t n = 17; n < 32; n++) {
39092 for (size_t k = 1; k <= 40; k += 9) {
39093 for (uint32_t m = 1; m <= 2; m++) {
39094 GemmMicrokernelTester()
39095 .mr(2)
39096 .nr(16)
39097 .kr(8)
39098 .sr(1)
39099 .m(m)
39100 .n(n)
39101 .k(k)
39102 .iterations(1)
39103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39104 }
39105 }
39106 }
39107 }
39108
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16)39109 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
39110 TEST_REQUIRES_X86_AVX512SKX;
39111 for (uint32_t n = 32; n <= 48; n += 16) {
39112 for (size_t k = 1; k <= 40; k += 9) {
39113 GemmMicrokernelTester()
39114 .mr(2)
39115 .nr(16)
39116 .kr(8)
39117 .sr(1)
39118 .m(2)
39119 .n(n)
39120 .k(k)
39121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39122 }
39123 }
39124 }
39125
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_strided_cn)39126 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
39127 TEST_REQUIRES_X86_AVX512SKX;
39128 for (uint32_t n = 32; n <= 48; n += 16) {
39129 for (size_t k = 1; k <= 40; k += 9) {
39130 GemmMicrokernelTester()
39131 .mr(2)
39132 .nr(16)
39133 .kr(8)
39134 .sr(1)
39135 .m(2)
39136 .n(n)
39137 .k(k)
39138 .cn_stride(19)
39139 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39140 }
39141 }
39142 }
39143
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_subtile)39144 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
39145 TEST_REQUIRES_X86_AVX512SKX;
39146 for (uint32_t n = 32; n <= 48; n += 16) {
39147 for (size_t k = 1; k <= 40; k += 9) {
39148 for (uint32_t m = 1; m <= 2; m++) {
39149 GemmMicrokernelTester()
39150 .mr(2)
39151 .nr(16)
39152 .kr(8)
39153 .sr(1)
39154 .m(m)
39155 .n(n)
39156 .k(k)
39157 .iterations(1)
39158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39159 }
39160 }
39161 }
39162 }
39163
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel)39164 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel) {
39165 TEST_REQUIRES_X86_AVX512SKX;
39166 for (size_t k = 1; k <= 40; k += 9) {
39167 GemmMicrokernelTester()
39168 .mr(2)
39169 .nr(16)
39170 .kr(8)
39171 .sr(1)
39172 .m(2)
39173 .n(16)
39174 .k(k)
39175 .ks(3)
39176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39177 }
39178 }
39179
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel_subtile)39180 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel_subtile) {
39181 TEST_REQUIRES_X86_AVX512SKX;
39182 for (size_t k = 1; k <= 40; k += 9) {
39183 for (uint32_t n = 1; n <= 16; n++) {
39184 for (uint32_t m = 1; m <= 2; m++) {
39185 GemmMicrokernelTester()
39186 .mr(2)
39187 .nr(16)
39188 .kr(8)
39189 .sr(1)
39190 .m(m)
39191 .n(n)
39192 .k(k)
39193 .ks(3)
39194 .iterations(1)
39195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39196 }
39197 }
39198 }
39199 }
39200
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_small_kernel)39201 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
39202 TEST_REQUIRES_X86_AVX512SKX;
39203 for (uint32_t n = 17; n < 32; n++) {
39204 for (size_t k = 1; k <= 40; k += 9) {
39205 GemmMicrokernelTester()
39206 .mr(2)
39207 .nr(16)
39208 .kr(8)
39209 .sr(1)
39210 .m(2)
39211 .n(n)
39212 .k(k)
39213 .ks(3)
39214 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39215 }
39216 }
39217 }
39218
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_small_kernel)39219 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_small_kernel) {
39220 TEST_REQUIRES_X86_AVX512SKX;
39221 for (uint32_t n = 32; n <= 48; n += 16) {
39222 for (size_t k = 1; k <= 40; k += 9) {
39223 GemmMicrokernelTester()
39224 .mr(2)
39225 .nr(16)
39226 .kr(8)
39227 .sr(1)
39228 .m(2)
39229 .n(n)
39230 .k(k)
39231 .ks(3)
39232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39233 }
39234 }
39235 }
39236
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm_subtile)39237 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
39238 TEST_REQUIRES_X86_AVX512SKX;
39239 for (size_t k = 1; k <= 40; k += 9) {
39240 for (uint32_t n = 1; n <= 16; n++) {
39241 for (uint32_t m = 1; m <= 2; m++) {
39242 GemmMicrokernelTester()
39243 .mr(2)
39244 .nr(16)
39245 .kr(8)
39246 .sr(1)
39247 .m(m)
39248 .n(n)
39249 .k(k)
39250 .cm_stride(19)
39251 .iterations(1)
39252 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39253 }
39254 }
39255 }
39256 }
39257
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,a_offset)39258 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, a_offset) {
39259 TEST_REQUIRES_X86_AVX512SKX;
39260 for (size_t k = 1; k <= 40; k += 9) {
39261 GemmMicrokernelTester()
39262 .mr(2)
39263 .nr(16)
39264 .kr(8)
39265 .sr(1)
39266 .m(2)
39267 .n(16)
39268 .k(k)
39269 .ks(3)
39270 .a_offset(83)
39271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39272 }
39273 }
39274
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,zero)39275 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, zero) {
39276 TEST_REQUIRES_X86_AVX512SKX;
39277 for (size_t k = 1; k <= 40; k += 9) {
39278 for (uint32_t mz = 0; mz < 2; mz++) {
39279 GemmMicrokernelTester()
39280 .mr(2)
39281 .nr(16)
39282 .kr(8)
39283 .sr(1)
39284 .m(2)
39285 .n(16)
39286 .k(k)
39287 .ks(3)
39288 .a_offset(83)
39289 .zero_index(mz)
39290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39291 }
39292 }
39293 }
39294
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmin)39295 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
39296 TEST_REQUIRES_X86_AVX512SKX;
39297 GemmMicrokernelTester()
39298 .mr(2)
39299 .nr(16)
39300 .kr(8)
39301 .sr(1)
39302 .m(2)
39303 .n(16)
39304 .k(8)
39305 .qmin(128)
39306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39307 }
39308
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmax)39309 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
39310 TEST_REQUIRES_X86_AVX512SKX;
39311 GemmMicrokernelTester()
39312 .mr(2)
39313 .nr(16)
39314 .kr(8)
39315 .sr(1)
39316 .m(2)
39317 .n(16)
39318 .k(8)
39319 .qmax(128)
39320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39321 }
39322
TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm)39323 TEST(QS8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
39324 TEST_REQUIRES_X86_AVX512SKX;
39325 GemmMicrokernelTester()
39326 .mr(2)
39327 .nr(16)
39328 .kr(8)
39329 .sr(1)
39330 .m(2)
39331 .n(16)
39332 .k(8)
39333 .cm_stride(19)
39334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39335 }
39336 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
39337
39338
39339 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8)39340 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
39341 TEST_REQUIRES_X86_AVX512SKX;
39342 GemmMicrokernelTester()
39343 .mr(4)
39344 .nr(16)
39345 .kr(8)
39346 .sr(1)
39347 .m(4)
39348 .n(16)
39349 .k(8)
39350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39351 }
39352
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cn)39353 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
39354 TEST_REQUIRES_X86_AVX512SKX;
39355 GemmMicrokernelTester()
39356 .mr(4)
39357 .nr(16)
39358 .kr(8)
39359 .sr(1)
39360 .m(4)
39361 .n(16)
39362 .k(8)
39363 .cn_stride(19)
39364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39365 }
39366
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile)39367 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
39368 TEST_REQUIRES_X86_AVX512SKX;
39369 for (uint32_t n = 1; n <= 16; n++) {
39370 for (uint32_t m = 1; m <= 4; m++) {
39371 GemmMicrokernelTester()
39372 .mr(4)
39373 .nr(16)
39374 .kr(8)
39375 .sr(1)
39376 .m(m)
39377 .n(n)
39378 .k(8)
39379 .iterations(1)
39380 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39381 }
39382 }
39383 }
39384
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_m)39385 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
39386 TEST_REQUIRES_X86_AVX512SKX;
39387 for (uint32_t m = 1; m <= 4; m++) {
39388 GemmMicrokernelTester()
39389 .mr(4)
39390 .nr(16)
39391 .kr(8)
39392 .sr(1)
39393 .m(m)
39394 .n(16)
39395 .k(8)
39396 .iterations(1)
39397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39398 }
39399 }
39400
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_n)39401 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
39402 TEST_REQUIRES_X86_AVX512SKX;
39403 for (uint32_t n = 1; n <= 16; n++) {
39404 GemmMicrokernelTester()
39405 .mr(4)
39406 .nr(16)
39407 .kr(8)
39408 .sr(1)
39409 .m(4)
39410 .n(n)
39411 .k(8)
39412 .iterations(1)
39413 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39414 }
39415 }
39416
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8)39417 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
39418 TEST_REQUIRES_X86_AVX512SKX;
39419 for (size_t k = 1; k < 8; k++) {
39420 GemmMicrokernelTester()
39421 .mr(4)
39422 .nr(16)
39423 .kr(8)
39424 .sr(1)
39425 .m(4)
39426 .n(16)
39427 .k(k)
39428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39429 }
39430 }
39431
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8_subtile)39432 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
39433 TEST_REQUIRES_X86_AVX512SKX;
39434 for (size_t k = 1; k < 8; k++) {
39435 for (uint32_t n = 1; n <= 16; n++) {
39436 for (uint32_t m = 1; m <= 4; m++) {
39437 GemmMicrokernelTester()
39438 .mr(4)
39439 .nr(16)
39440 .kr(8)
39441 .sr(1)
39442 .m(m)
39443 .n(n)
39444 .k(k)
39445 .iterations(1)
39446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39447 }
39448 }
39449 }
39450 }
39451
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8)39452 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
39453 TEST_REQUIRES_X86_AVX512SKX;
39454 for (size_t k = 9; k < 16; k++) {
39455 GemmMicrokernelTester()
39456 .mr(4)
39457 .nr(16)
39458 .kr(8)
39459 .sr(1)
39460 .m(4)
39461 .n(16)
39462 .k(k)
39463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39464 }
39465 }
39466
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8_subtile)39467 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
39468 TEST_REQUIRES_X86_AVX512SKX;
39469 for (size_t k = 9; k < 16; k++) {
39470 for (uint32_t n = 1; n <= 16; n++) {
39471 for (uint32_t m = 1; m <= 4; m++) {
39472 GemmMicrokernelTester()
39473 .mr(4)
39474 .nr(16)
39475 .kr(8)
39476 .sr(1)
39477 .m(m)
39478 .n(n)
39479 .k(k)
39480 .iterations(1)
39481 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39482 }
39483 }
39484 }
39485 }
39486
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8)39487 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
39488 TEST_REQUIRES_X86_AVX512SKX;
39489 for (size_t k = 16; k <= 80; k += 8) {
39490 GemmMicrokernelTester()
39491 .mr(4)
39492 .nr(16)
39493 .kr(8)
39494 .sr(1)
39495 .m(4)
39496 .n(16)
39497 .k(k)
39498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39499 }
39500 }
39501
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8_subtile)39502 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
39503 TEST_REQUIRES_X86_AVX512SKX;
39504 for (size_t k = 16; k <= 80; k += 8) {
39505 for (uint32_t n = 1; n <= 16; n++) {
39506 for (uint32_t m = 1; m <= 4; m++) {
39507 GemmMicrokernelTester()
39508 .mr(4)
39509 .nr(16)
39510 .kr(8)
39511 .sr(1)
39512 .m(m)
39513 .n(n)
39514 .k(k)
39515 .iterations(1)
39516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39517 }
39518 }
39519 }
39520 }
39521
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16)39522 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
39523 TEST_REQUIRES_X86_AVX512SKX;
39524 for (uint32_t n = 17; n < 32; n++) {
39525 for (size_t k = 1; k <= 40; k += 9) {
39526 GemmMicrokernelTester()
39527 .mr(4)
39528 .nr(16)
39529 .kr(8)
39530 .sr(1)
39531 .m(4)
39532 .n(n)
39533 .k(k)
39534 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39535 }
39536 }
39537 }
39538
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_strided_cn)39539 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
39540 TEST_REQUIRES_X86_AVX512SKX;
39541 for (uint32_t n = 17; n < 32; n++) {
39542 for (size_t k = 1; k <= 40; k += 9) {
39543 GemmMicrokernelTester()
39544 .mr(4)
39545 .nr(16)
39546 .kr(8)
39547 .sr(1)
39548 .m(4)
39549 .n(n)
39550 .k(k)
39551 .cn_stride(19)
39552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39553 }
39554 }
39555 }
39556
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_subtile)39557 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
39558 TEST_REQUIRES_X86_AVX512SKX;
39559 for (uint32_t n = 17; n < 32; n++) {
39560 for (size_t k = 1; k <= 40; k += 9) {
39561 for (uint32_t m = 1; m <= 4; m++) {
39562 GemmMicrokernelTester()
39563 .mr(4)
39564 .nr(16)
39565 .kr(8)
39566 .sr(1)
39567 .m(m)
39568 .n(n)
39569 .k(k)
39570 .iterations(1)
39571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39572 }
39573 }
39574 }
39575 }
39576
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16)39577 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
39578 TEST_REQUIRES_X86_AVX512SKX;
39579 for (uint32_t n = 32; n <= 48; n += 16) {
39580 for (size_t k = 1; k <= 40; k += 9) {
39581 GemmMicrokernelTester()
39582 .mr(4)
39583 .nr(16)
39584 .kr(8)
39585 .sr(1)
39586 .m(4)
39587 .n(n)
39588 .k(k)
39589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39590 }
39591 }
39592 }
39593
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_strided_cn)39594 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
39595 TEST_REQUIRES_X86_AVX512SKX;
39596 for (uint32_t n = 32; n <= 48; n += 16) {
39597 for (size_t k = 1; k <= 40; k += 9) {
39598 GemmMicrokernelTester()
39599 .mr(4)
39600 .nr(16)
39601 .kr(8)
39602 .sr(1)
39603 .m(4)
39604 .n(n)
39605 .k(k)
39606 .cn_stride(19)
39607 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39608 }
39609 }
39610 }
39611
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_subtile)39612 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
39613 TEST_REQUIRES_X86_AVX512SKX;
39614 for (uint32_t n = 32; n <= 48; n += 16) {
39615 for (size_t k = 1; k <= 40; k += 9) {
39616 for (uint32_t m = 1; m <= 4; m++) {
39617 GemmMicrokernelTester()
39618 .mr(4)
39619 .nr(16)
39620 .kr(8)
39621 .sr(1)
39622 .m(m)
39623 .n(n)
39624 .k(k)
39625 .iterations(1)
39626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39627 }
39628 }
39629 }
39630 }
39631
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel)39632 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel) {
39633 TEST_REQUIRES_X86_AVX512SKX;
39634 for (size_t k = 1; k <= 40; k += 9) {
39635 GemmMicrokernelTester()
39636 .mr(4)
39637 .nr(16)
39638 .kr(8)
39639 .sr(1)
39640 .m(4)
39641 .n(16)
39642 .k(k)
39643 .ks(3)
39644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39645 }
39646 }
39647
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel_subtile)39648 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel_subtile) {
39649 TEST_REQUIRES_X86_AVX512SKX;
39650 for (size_t k = 1; k <= 40; k += 9) {
39651 for (uint32_t n = 1; n <= 16; n++) {
39652 for (uint32_t m = 1; m <= 4; m++) {
39653 GemmMicrokernelTester()
39654 .mr(4)
39655 .nr(16)
39656 .kr(8)
39657 .sr(1)
39658 .m(m)
39659 .n(n)
39660 .k(k)
39661 .ks(3)
39662 .iterations(1)
39663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39664 }
39665 }
39666 }
39667 }
39668
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_small_kernel)39669 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
39670 TEST_REQUIRES_X86_AVX512SKX;
39671 for (uint32_t n = 17; n < 32; n++) {
39672 for (size_t k = 1; k <= 40; k += 9) {
39673 GemmMicrokernelTester()
39674 .mr(4)
39675 .nr(16)
39676 .kr(8)
39677 .sr(1)
39678 .m(4)
39679 .n(n)
39680 .k(k)
39681 .ks(3)
39682 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39683 }
39684 }
39685 }
39686
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_small_kernel)39687 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_small_kernel) {
39688 TEST_REQUIRES_X86_AVX512SKX;
39689 for (uint32_t n = 32; n <= 48; n += 16) {
39690 for (size_t k = 1; k <= 40; k += 9) {
39691 GemmMicrokernelTester()
39692 .mr(4)
39693 .nr(16)
39694 .kr(8)
39695 .sr(1)
39696 .m(4)
39697 .n(n)
39698 .k(k)
39699 .ks(3)
39700 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39701 }
39702 }
39703 }
39704
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm_subtile)39705 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
39706 TEST_REQUIRES_X86_AVX512SKX;
39707 for (size_t k = 1; k <= 40; k += 9) {
39708 for (uint32_t n = 1; n <= 16; n++) {
39709 for (uint32_t m = 1; m <= 4; m++) {
39710 GemmMicrokernelTester()
39711 .mr(4)
39712 .nr(16)
39713 .kr(8)
39714 .sr(1)
39715 .m(m)
39716 .n(n)
39717 .k(k)
39718 .cm_stride(19)
39719 .iterations(1)
39720 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39721 }
39722 }
39723 }
39724 }
39725
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,a_offset)39726 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, a_offset) {
39727 TEST_REQUIRES_X86_AVX512SKX;
39728 for (size_t k = 1; k <= 40; k += 9) {
39729 GemmMicrokernelTester()
39730 .mr(4)
39731 .nr(16)
39732 .kr(8)
39733 .sr(1)
39734 .m(4)
39735 .n(16)
39736 .k(k)
39737 .ks(3)
39738 .a_offset(163)
39739 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39740 }
39741 }
39742
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,zero)39743 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, zero) {
39744 TEST_REQUIRES_X86_AVX512SKX;
39745 for (size_t k = 1; k <= 40; k += 9) {
39746 for (uint32_t mz = 0; mz < 4; mz++) {
39747 GemmMicrokernelTester()
39748 .mr(4)
39749 .nr(16)
39750 .kr(8)
39751 .sr(1)
39752 .m(4)
39753 .n(16)
39754 .k(k)
39755 .ks(3)
39756 .a_offset(163)
39757 .zero_index(mz)
39758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39759 }
39760 }
39761 }
39762
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmin)39763 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
39764 TEST_REQUIRES_X86_AVX512SKX;
39765 GemmMicrokernelTester()
39766 .mr(4)
39767 .nr(16)
39768 .kr(8)
39769 .sr(1)
39770 .m(4)
39771 .n(16)
39772 .k(8)
39773 .qmin(128)
39774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39775 }
39776
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmax)39777 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
39778 TEST_REQUIRES_X86_AVX512SKX;
39779 GemmMicrokernelTester()
39780 .mr(4)
39781 .nr(16)
39782 .kr(8)
39783 .sr(1)
39784 .m(4)
39785 .n(16)
39786 .k(8)
39787 .qmax(128)
39788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39789 }
39790
TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm)39791 TEST(QS8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
39792 TEST_REQUIRES_X86_AVX512SKX;
39793 GemmMicrokernelTester()
39794 .mr(4)
39795 .nr(16)
39796 .kr(8)
39797 .sr(1)
39798 .m(4)
39799 .n(16)
39800 .k(8)
39801 .cm_stride(19)
39802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
39803 }
39804 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
39805
39806
39807 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)39808 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
39809 GemmMicrokernelTester()
39810 .mr(1)
39811 .nr(4)
39812 .kr(2)
39813 .sr(4)
39814 .m(1)
39815 .n(4)
39816 .k(8)
39817 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39818 }
39819
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)39820 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
39821 GemmMicrokernelTester()
39822 .mr(1)
39823 .nr(4)
39824 .kr(2)
39825 .sr(4)
39826 .m(1)
39827 .n(4)
39828 .k(8)
39829 .cn_stride(7)
39830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39831 }
39832
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)39833 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
39834 for (uint32_t n = 1; n <= 4; n++) {
39835 for (uint32_t m = 1; m <= 1; m++) {
39836 GemmMicrokernelTester()
39837 .mr(1)
39838 .nr(4)
39839 .kr(2)
39840 .sr(4)
39841 .m(m)
39842 .n(n)
39843 .k(8)
39844 .iterations(1)
39845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39846 }
39847 }
39848 }
39849
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)39850 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
39851 for (uint32_t m = 1; m <= 1; m++) {
39852 GemmMicrokernelTester()
39853 .mr(1)
39854 .nr(4)
39855 .kr(2)
39856 .sr(4)
39857 .m(m)
39858 .n(4)
39859 .k(8)
39860 .iterations(1)
39861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39862 }
39863 }
39864
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)39865 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
39866 for (uint32_t n = 1; n <= 4; n++) {
39867 GemmMicrokernelTester()
39868 .mr(1)
39869 .nr(4)
39870 .kr(2)
39871 .sr(4)
39872 .m(1)
39873 .n(n)
39874 .k(8)
39875 .iterations(1)
39876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39877 }
39878 }
39879
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)39880 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
39881 for (size_t k = 1; k < 8; k++) {
39882 GemmMicrokernelTester()
39883 .mr(1)
39884 .nr(4)
39885 .kr(2)
39886 .sr(4)
39887 .m(1)
39888 .n(4)
39889 .k(k)
39890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39891 }
39892 }
39893
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)39894 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
39895 for (size_t k = 1; k < 8; k++) {
39896 for (uint32_t n = 1; n <= 4; n++) {
39897 for (uint32_t m = 1; m <= 1; m++) {
39898 GemmMicrokernelTester()
39899 .mr(1)
39900 .nr(4)
39901 .kr(2)
39902 .sr(4)
39903 .m(m)
39904 .n(n)
39905 .k(k)
39906 .iterations(1)
39907 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39908 }
39909 }
39910 }
39911 }
39912
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)39913 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
39914 for (size_t k = 9; k < 16; k++) {
39915 GemmMicrokernelTester()
39916 .mr(1)
39917 .nr(4)
39918 .kr(2)
39919 .sr(4)
39920 .m(1)
39921 .n(4)
39922 .k(k)
39923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39924 }
39925 }
39926
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)39927 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
39928 for (size_t k = 9; k < 16; k++) {
39929 for (uint32_t n = 1; n <= 4; n++) {
39930 for (uint32_t m = 1; m <= 1; m++) {
39931 GemmMicrokernelTester()
39932 .mr(1)
39933 .nr(4)
39934 .kr(2)
39935 .sr(4)
39936 .m(m)
39937 .n(n)
39938 .k(k)
39939 .iterations(1)
39940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39941 }
39942 }
39943 }
39944 }
39945
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)39946 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
39947 for (size_t k = 16; k <= 80; k += 8) {
39948 GemmMicrokernelTester()
39949 .mr(1)
39950 .nr(4)
39951 .kr(2)
39952 .sr(4)
39953 .m(1)
39954 .n(4)
39955 .k(k)
39956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39957 }
39958 }
39959
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)39960 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
39961 for (size_t k = 16; k <= 80; k += 8) {
39962 for (uint32_t n = 1; n <= 4; n++) {
39963 for (uint32_t m = 1; m <= 1; m++) {
39964 GemmMicrokernelTester()
39965 .mr(1)
39966 .nr(4)
39967 .kr(2)
39968 .sr(4)
39969 .m(m)
39970 .n(n)
39971 .k(k)
39972 .iterations(1)
39973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39974 }
39975 }
39976 }
39977 }
39978
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)39979 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
39980 for (uint32_t n = 5; n < 8; n++) {
39981 for (size_t k = 1; k <= 40; k += 9) {
39982 GemmMicrokernelTester()
39983 .mr(1)
39984 .nr(4)
39985 .kr(2)
39986 .sr(4)
39987 .m(1)
39988 .n(n)
39989 .k(k)
39990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39991 }
39992 }
39993 }
39994
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)39995 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
39996 for (uint32_t n = 5; n < 8; n++) {
39997 for (size_t k = 1; k <= 40; k += 9) {
39998 GemmMicrokernelTester()
39999 .mr(1)
40000 .nr(4)
40001 .kr(2)
40002 .sr(4)
40003 .m(1)
40004 .n(n)
40005 .k(k)
40006 .cn_stride(7)
40007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40008 }
40009 }
40010 }
40011
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)40012 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
40013 for (uint32_t n = 5; n < 8; n++) {
40014 for (size_t k = 1; k <= 40; k += 9) {
40015 for (uint32_t m = 1; m <= 1; m++) {
40016 GemmMicrokernelTester()
40017 .mr(1)
40018 .nr(4)
40019 .kr(2)
40020 .sr(4)
40021 .m(m)
40022 .n(n)
40023 .k(k)
40024 .iterations(1)
40025 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40026 }
40027 }
40028 }
40029 }
40030
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)40031 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
40032 for (uint32_t n = 8; n <= 12; n += 4) {
40033 for (size_t k = 1; k <= 40; k += 9) {
40034 GemmMicrokernelTester()
40035 .mr(1)
40036 .nr(4)
40037 .kr(2)
40038 .sr(4)
40039 .m(1)
40040 .n(n)
40041 .k(k)
40042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40043 }
40044 }
40045 }
40046
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)40047 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
40048 for (uint32_t n = 8; n <= 12; n += 4) {
40049 for (size_t k = 1; k <= 40; k += 9) {
40050 GemmMicrokernelTester()
40051 .mr(1)
40052 .nr(4)
40053 .kr(2)
40054 .sr(4)
40055 .m(1)
40056 .n(n)
40057 .k(k)
40058 .cn_stride(7)
40059 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40060 }
40061 }
40062 }
40063
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)40064 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
40065 for (uint32_t n = 8; n <= 12; n += 4) {
40066 for (size_t k = 1; k <= 40; k += 9) {
40067 for (uint32_t m = 1; m <= 1; m++) {
40068 GemmMicrokernelTester()
40069 .mr(1)
40070 .nr(4)
40071 .kr(2)
40072 .sr(4)
40073 .m(m)
40074 .n(n)
40075 .k(k)
40076 .iterations(1)
40077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40078 }
40079 }
40080 }
40081 }
40082
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)40083 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
40084 for (size_t k = 1; k <= 40; k += 9) {
40085 GemmMicrokernelTester()
40086 .mr(1)
40087 .nr(4)
40088 .kr(2)
40089 .sr(4)
40090 .m(1)
40091 .n(4)
40092 .k(k)
40093 .ks(3)
40094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40095 }
40096 }
40097
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)40098 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
40099 for (size_t k = 1; k <= 40; k += 9) {
40100 for (uint32_t n = 1; n <= 4; n++) {
40101 for (uint32_t m = 1; m <= 1; m++) {
40102 GemmMicrokernelTester()
40103 .mr(1)
40104 .nr(4)
40105 .kr(2)
40106 .sr(4)
40107 .m(m)
40108 .n(n)
40109 .k(k)
40110 .ks(3)
40111 .iterations(1)
40112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40113 }
40114 }
40115 }
40116 }
40117
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)40118 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
40119 for (uint32_t n = 5; n < 8; n++) {
40120 for (size_t k = 1; k <= 40; k += 9) {
40121 GemmMicrokernelTester()
40122 .mr(1)
40123 .nr(4)
40124 .kr(2)
40125 .sr(4)
40126 .m(1)
40127 .n(n)
40128 .k(k)
40129 .ks(3)
40130 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40131 }
40132 }
40133 }
40134
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)40135 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
40136 for (uint32_t n = 8; n <= 12; n += 4) {
40137 for (size_t k = 1; k <= 40; k += 9) {
40138 GemmMicrokernelTester()
40139 .mr(1)
40140 .nr(4)
40141 .kr(2)
40142 .sr(4)
40143 .m(1)
40144 .n(n)
40145 .k(k)
40146 .ks(3)
40147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40148 }
40149 }
40150 }
40151
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)40152 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
40153 for (size_t k = 1; k <= 40; k += 9) {
40154 for (uint32_t n = 1; n <= 4; n++) {
40155 for (uint32_t m = 1; m <= 1; m++) {
40156 GemmMicrokernelTester()
40157 .mr(1)
40158 .nr(4)
40159 .kr(2)
40160 .sr(4)
40161 .m(m)
40162 .n(n)
40163 .k(k)
40164 .cm_stride(7)
40165 .iterations(1)
40166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40167 }
40168 }
40169 }
40170 }
40171
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)40172 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
40173 for (size_t k = 1; k <= 40; k += 9) {
40174 GemmMicrokernelTester()
40175 .mr(1)
40176 .nr(4)
40177 .kr(2)
40178 .sr(4)
40179 .m(1)
40180 .n(4)
40181 .k(k)
40182 .ks(3)
40183 .a_offset(43)
40184 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40185 }
40186 }
40187
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,zero)40188 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
40189 for (size_t k = 1; k <= 40; k += 9) {
40190 for (uint32_t mz = 0; mz < 1; mz++) {
40191 GemmMicrokernelTester()
40192 .mr(1)
40193 .nr(4)
40194 .kr(2)
40195 .sr(4)
40196 .m(1)
40197 .n(4)
40198 .k(k)
40199 .ks(3)
40200 .a_offset(43)
40201 .zero_index(mz)
40202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40203 }
40204 }
40205 }
40206
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)40207 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
40208 GemmMicrokernelTester()
40209 .mr(1)
40210 .nr(4)
40211 .kr(2)
40212 .sr(4)
40213 .m(1)
40214 .n(4)
40215 .k(8)
40216 .qmin(128)
40217 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40218 }
40219
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)40220 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
40221 GemmMicrokernelTester()
40222 .mr(1)
40223 .nr(4)
40224 .kr(2)
40225 .sr(4)
40226 .m(1)
40227 .n(4)
40228 .k(8)
40229 .qmax(128)
40230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40231 }
40232
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)40233 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
40234 GemmMicrokernelTester()
40235 .mr(1)
40236 .nr(4)
40237 .kr(2)
40238 .sr(4)
40239 .m(1)
40240 .n(4)
40241 .k(8)
40242 .cm_stride(7)
40243 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40244 }
40245 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40246
40247
40248 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)40249 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
40250 GemmMicrokernelTester()
40251 .mr(1)
40252 .nr(4)
40253 .kr(8)
40254 .sr(1)
40255 .m(1)
40256 .n(4)
40257 .k(8)
40258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40259 }
40260
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)40261 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
40262 GemmMicrokernelTester()
40263 .mr(1)
40264 .nr(4)
40265 .kr(8)
40266 .sr(1)
40267 .m(1)
40268 .n(4)
40269 .k(8)
40270 .cn_stride(7)
40271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40272 }
40273
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)40274 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
40275 for (uint32_t n = 1; n <= 4; n++) {
40276 for (uint32_t m = 1; m <= 1; m++) {
40277 GemmMicrokernelTester()
40278 .mr(1)
40279 .nr(4)
40280 .kr(8)
40281 .sr(1)
40282 .m(m)
40283 .n(n)
40284 .k(8)
40285 .iterations(1)
40286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40287 }
40288 }
40289 }
40290
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)40291 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
40292 for (uint32_t m = 1; m <= 1; m++) {
40293 GemmMicrokernelTester()
40294 .mr(1)
40295 .nr(4)
40296 .kr(8)
40297 .sr(1)
40298 .m(m)
40299 .n(4)
40300 .k(8)
40301 .iterations(1)
40302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40303 }
40304 }
40305
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)40306 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
40307 for (uint32_t n = 1; n <= 4; n++) {
40308 GemmMicrokernelTester()
40309 .mr(1)
40310 .nr(4)
40311 .kr(8)
40312 .sr(1)
40313 .m(1)
40314 .n(n)
40315 .k(8)
40316 .iterations(1)
40317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40318 }
40319 }
40320
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)40321 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
40322 for (size_t k = 1; k < 8; k++) {
40323 GemmMicrokernelTester()
40324 .mr(1)
40325 .nr(4)
40326 .kr(8)
40327 .sr(1)
40328 .m(1)
40329 .n(4)
40330 .k(k)
40331 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40332 }
40333 }
40334
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)40335 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
40336 for (size_t k = 1; k < 8; k++) {
40337 for (uint32_t n = 1; n <= 4; n++) {
40338 for (uint32_t m = 1; m <= 1; m++) {
40339 GemmMicrokernelTester()
40340 .mr(1)
40341 .nr(4)
40342 .kr(8)
40343 .sr(1)
40344 .m(m)
40345 .n(n)
40346 .k(k)
40347 .iterations(1)
40348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40349 }
40350 }
40351 }
40352 }
40353
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)40354 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
40355 for (size_t k = 9; k < 16; k++) {
40356 GemmMicrokernelTester()
40357 .mr(1)
40358 .nr(4)
40359 .kr(8)
40360 .sr(1)
40361 .m(1)
40362 .n(4)
40363 .k(k)
40364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40365 }
40366 }
40367
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)40368 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
40369 for (size_t k = 9; k < 16; k++) {
40370 for (uint32_t n = 1; n <= 4; n++) {
40371 for (uint32_t m = 1; m <= 1; m++) {
40372 GemmMicrokernelTester()
40373 .mr(1)
40374 .nr(4)
40375 .kr(8)
40376 .sr(1)
40377 .m(m)
40378 .n(n)
40379 .k(k)
40380 .iterations(1)
40381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40382 }
40383 }
40384 }
40385 }
40386
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)40387 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
40388 for (size_t k = 16; k <= 80; k += 8) {
40389 GemmMicrokernelTester()
40390 .mr(1)
40391 .nr(4)
40392 .kr(8)
40393 .sr(1)
40394 .m(1)
40395 .n(4)
40396 .k(k)
40397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40398 }
40399 }
40400
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)40401 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
40402 for (size_t k = 16; k <= 80; k += 8) {
40403 for (uint32_t n = 1; n <= 4; n++) {
40404 for (uint32_t m = 1; m <= 1; m++) {
40405 GemmMicrokernelTester()
40406 .mr(1)
40407 .nr(4)
40408 .kr(8)
40409 .sr(1)
40410 .m(m)
40411 .n(n)
40412 .k(k)
40413 .iterations(1)
40414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40415 }
40416 }
40417 }
40418 }
40419
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)40420 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
40421 for (uint32_t n = 5; n < 8; n++) {
40422 for (size_t k = 1; k <= 40; k += 9) {
40423 GemmMicrokernelTester()
40424 .mr(1)
40425 .nr(4)
40426 .kr(8)
40427 .sr(1)
40428 .m(1)
40429 .n(n)
40430 .k(k)
40431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40432 }
40433 }
40434 }
40435
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)40436 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
40437 for (uint32_t n = 5; n < 8; n++) {
40438 for (size_t k = 1; k <= 40; k += 9) {
40439 GemmMicrokernelTester()
40440 .mr(1)
40441 .nr(4)
40442 .kr(8)
40443 .sr(1)
40444 .m(1)
40445 .n(n)
40446 .k(k)
40447 .cn_stride(7)
40448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40449 }
40450 }
40451 }
40452
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)40453 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
40454 for (uint32_t n = 5; n < 8; n++) {
40455 for (size_t k = 1; k <= 40; k += 9) {
40456 for (uint32_t m = 1; m <= 1; m++) {
40457 GemmMicrokernelTester()
40458 .mr(1)
40459 .nr(4)
40460 .kr(8)
40461 .sr(1)
40462 .m(m)
40463 .n(n)
40464 .k(k)
40465 .iterations(1)
40466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40467 }
40468 }
40469 }
40470 }
40471
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)40472 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
40473 for (uint32_t n = 8; n <= 12; n += 4) {
40474 for (size_t k = 1; k <= 40; k += 9) {
40475 GemmMicrokernelTester()
40476 .mr(1)
40477 .nr(4)
40478 .kr(8)
40479 .sr(1)
40480 .m(1)
40481 .n(n)
40482 .k(k)
40483 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40484 }
40485 }
40486 }
40487
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)40488 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
40489 for (uint32_t n = 8; n <= 12; n += 4) {
40490 for (size_t k = 1; k <= 40; k += 9) {
40491 GemmMicrokernelTester()
40492 .mr(1)
40493 .nr(4)
40494 .kr(8)
40495 .sr(1)
40496 .m(1)
40497 .n(n)
40498 .k(k)
40499 .cn_stride(7)
40500 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40501 }
40502 }
40503 }
40504
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)40505 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
40506 for (uint32_t n = 8; n <= 12; n += 4) {
40507 for (size_t k = 1; k <= 40; k += 9) {
40508 for (uint32_t m = 1; m <= 1; m++) {
40509 GemmMicrokernelTester()
40510 .mr(1)
40511 .nr(4)
40512 .kr(8)
40513 .sr(1)
40514 .m(m)
40515 .n(n)
40516 .k(k)
40517 .iterations(1)
40518 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40519 }
40520 }
40521 }
40522 }
40523
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)40524 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
40525 for (size_t k = 1; k <= 40; k += 9) {
40526 GemmMicrokernelTester()
40527 .mr(1)
40528 .nr(4)
40529 .kr(8)
40530 .sr(1)
40531 .m(1)
40532 .n(4)
40533 .k(k)
40534 .ks(3)
40535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40536 }
40537 }
40538
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)40539 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
40540 for (size_t k = 1; k <= 40; k += 9) {
40541 for (uint32_t n = 1; n <= 4; n++) {
40542 for (uint32_t m = 1; m <= 1; m++) {
40543 GemmMicrokernelTester()
40544 .mr(1)
40545 .nr(4)
40546 .kr(8)
40547 .sr(1)
40548 .m(m)
40549 .n(n)
40550 .k(k)
40551 .ks(3)
40552 .iterations(1)
40553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40554 }
40555 }
40556 }
40557 }
40558
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)40559 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
40560 for (uint32_t n = 5; n < 8; n++) {
40561 for (size_t k = 1; k <= 40; k += 9) {
40562 GemmMicrokernelTester()
40563 .mr(1)
40564 .nr(4)
40565 .kr(8)
40566 .sr(1)
40567 .m(1)
40568 .n(n)
40569 .k(k)
40570 .ks(3)
40571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40572 }
40573 }
40574 }
40575
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)40576 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
40577 for (uint32_t n = 8; n <= 12; n += 4) {
40578 for (size_t k = 1; k <= 40; k += 9) {
40579 GemmMicrokernelTester()
40580 .mr(1)
40581 .nr(4)
40582 .kr(8)
40583 .sr(1)
40584 .m(1)
40585 .n(n)
40586 .k(k)
40587 .ks(3)
40588 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40589 }
40590 }
40591 }
40592
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)40593 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
40594 for (size_t k = 1; k <= 40; k += 9) {
40595 for (uint32_t n = 1; n <= 4; n++) {
40596 for (uint32_t m = 1; m <= 1; m++) {
40597 GemmMicrokernelTester()
40598 .mr(1)
40599 .nr(4)
40600 .kr(8)
40601 .sr(1)
40602 .m(m)
40603 .n(n)
40604 .k(k)
40605 .cm_stride(7)
40606 .iterations(1)
40607 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40608 }
40609 }
40610 }
40611 }
40612
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,a_offset)40613 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
40614 for (size_t k = 1; k <= 40; k += 9) {
40615 GemmMicrokernelTester()
40616 .mr(1)
40617 .nr(4)
40618 .kr(8)
40619 .sr(1)
40620 .m(1)
40621 .n(4)
40622 .k(k)
40623 .ks(3)
40624 .a_offset(43)
40625 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40626 }
40627 }
40628
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,zero)40629 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, zero) {
40630 for (size_t k = 1; k <= 40; k += 9) {
40631 for (uint32_t mz = 0; mz < 1; mz++) {
40632 GemmMicrokernelTester()
40633 .mr(1)
40634 .nr(4)
40635 .kr(8)
40636 .sr(1)
40637 .m(1)
40638 .n(4)
40639 .k(k)
40640 .ks(3)
40641 .a_offset(43)
40642 .zero_index(mz)
40643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40644 }
40645 }
40646 }
40647
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmin)40648 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
40649 GemmMicrokernelTester()
40650 .mr(1)
40651 .nr(4)
40652 .kr(8)
40653 .sr(1)
40654 .m(1)
40655 .n(4)
40656 .k(8)
40657 .qmin(128)
40658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40659 }
40660
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmax)40661 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
40662 GemmMicrokernelTester()
40663 .mr(1)
40664 .nr(4)
40665 .kr(8)
40666 .sr(1)
40667 .m(1)
40668 .n(4)
40669 .k(8)
40670 .qmax(128)
40671 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40672 }
40673
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)40674 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
40675 GemmMicrokernelTester()
40676 .mr(1)
40677 .nr(4)
40678 .kr(8)
40679 .sr(1)
40680 .m(1)
40681 .n(4)
40682 .k(8)
40683 .cm_stride(7)
40684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40685 }
40686 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40687
40688
40689 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)40690 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
40691 GemmMicrokernelTester()
40692 .mr(2)
40693 .nr(4)
40694 .kr(2)
40695 .sr(1)
40696 .m(2)
40697 .n(4)
40698 .k(8)
40699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40700 }
40701
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)40702 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
40703 GemmMicrokernelTester()
40704 .mr(2)
40705 .nr(4)
40706 .kr(2)
40707 .sr(1)
40708 .m(2)
40709 .n(4)
40710 .k(8)
40711 .cn_stride(7)
40712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40713 }
40714
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)40715 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
40716 for (uint32_t n = 1; n <= 4; n++) {
40717 for (uint32_t m = 1; m <= 2; m++) {
40718 GemmMicrokernelTester()
40719 .mr(2)
40720 .nr(4)
40721 .kr(2)
40722 .sr(1)
40723 .m(m)
40724 .n(n)
40725 .k(8)
40726 .iterations(1)
40727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40728 }
40729 }
40730 }
40731
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)40732 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
40733 for (uint32_t m = 1; m <= 2; m++) {
40734 GemmMicrokernelTester()
40735 .mr(2)
40736 .nr(4)
40737 .kr(2)
40738 .sr(1)
40739 .m(m)
40740 .n(4)
40741 .k(8)
40742 .iterations(1)
40743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40744 }
40745 }
40746
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)40747 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
40748 for (uint32_t n = 1; n <= 4; n++) {
40749 GemmMicrokernelTester()
40750 .mr(2)
40751 .nr(4)
40752 .kr(2)
40753 .sr(1)
40754 .m(2)
40755 .n(n)
40756 .k(8)
40757 .iterations(1)
40758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40759 }
40760 }
40761
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)40762 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
40763 for (size_t k = 1; k < 8; k++) {
40764 GemmMicrokernelTester()
40765 .mr(2)
40766 .nr(4)
40767 .kr(2)
40768 .sr(1)
40769 .m(2)
40770 .n(4)
40771 .k(k)
40772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40773 }
40774 }
40775
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)40776 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
40777 for (size_t k = 1; k < 8; k++) {
40778 for (uint32_t n = 1; n <= 4; n++) {
40779 for (uint32_t m = 1; m <= 2; m++) {
40780 GemmMicrokernelTester()
40781 .mr(2)
40782 .nr(4)
40783 .kr(2)
40784 .sr(1)
40785 .m(m)
40786 .n(n)
40787 .k(k)
40788 .iterations(1)
40789 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40790 }
40791 }
40792 }
40793 }
40794
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)40795 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
40796 for (size_t k = 9; k < 16; k++) {
40797 GemmMicrokernelTester()
40798 .mr(2)
40799 .nr(4)
40800 .kr(2)
40801 .sr(1)
40802 .m(2)
40803 .n(4)
40804 .k(k)
40805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40806 }
40807 }
40808
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)40809 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
40810 for (size_t k = 9; k < 16; k++) {
40811 for (uint32_t n = 1; n <= 4; n++) {
40812 for (uint32_t m = 1; m <= 2; m++) {
40813 GemmMicrokernelTester()
40814 .mr(2)
40815 .nr(4)
40816 .kr(2)
40817 .sr(1)
40818 .m(m)
40819 .n(n)
40820 .k(k)
40821 .iterations(1)
40822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40823 }
40824 }
40825 }
40826 }
40827
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)40828 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
40829 for (size_t k = 16; k <= 80; k += 8) {
40830 GemmMicrokernelTester()
40831 .mr(2)
40832 .nr(4)
40833 .kr(2)
40834 .sr(1)
40835 .m(2)
40836 .n(4)
40837 .k(k)
40838 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40839 }
40840 }
40841
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)40842 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
40843 for (size_t k = 16; k <= 80; k += 8) {
40844 for (uint32_t n = 1; n <= 4; n++) {
40845 for (uint32_t m = 1; m <= 2; m++) {
40846 GemmMicrokernelTester()
40847 .mr(2)
40848 .nr(4)
40849 .kr(2)
40850 .sr(1)
40851 .m(m)
40852 .n(n)
40853 .k(k)
40854 .iterations(1)
40855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40856 }
40857 }
40858 }
40859 }
40860
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)40861 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
40862 for (uint32_t n = 5; n < 8; n++) {
40863 for (size_t k = 1; k <= 40; k += 9) {
40864 GemmMicrokernelTester()
40865 .mr(2)
40866 .nr(4)
40867 .kr(2)
40868 .sr(1)
40869 .m(2)
40870 .n(n)
40871 .k(k)
40872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40873 }
40874 }
40875 }
40876
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)40877 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
40878 for (uint32_t n = 5; n < 8; n++) {
40879 for (size_t k = 1; k <= 40; k += 9) {
40880 GemmMicrokernelTester()
40881 .mr(2)
40882 .nr(4)
40883 .kr(2)
40884 .sr(1)
40885 .m(2)
40886 .n(n)
40887 .k(k)
40888 .cn_stride(7)
40889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40890 }
40891 }
40892 }
40893
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)40894 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
40895 for (uint32_t n = 5; n < 8; n++) {
40896 for (size_t k = 1; k <= 40; k += 9) {
40897 for (uint32_t m = 1; m <= 2; m++) {
40898 GemmMicrokernelTester()
40899 .mr(2)
40900 .nr(4)
40901 .kr(2)
40902 .sr(1)
40903 .m(m)
40904 .n(n)
40905 .k(k)
40906 .iterations(1)
40907 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40908 }
40909 }
40910 }
40911 }
40912
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)40913 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
40914 for (uint32_t n = 8; n <= 12; n += 4) {
40915 for (size_t k = 1; k <= 40; k += 9) {
40916 GemmMicrokernelTester()
40917 .mr(2)
40918 .nr(4)
40919 .kr(2)
40920 .sr(1)
40921 .m(2)
40922 .n(n)
40923 .k(k)
40924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40925 }
40926 }
40927 }
40928
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)40929 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
40930 for (uint32_t n = 8; n <= 12; n += 4) {
40931 for (size_t k = 1; k <= 40; k += 9) {
40932 GemmMicrokernelTester()
40933 .mr(2)
40934 .nr(4)
40935 .kr(2)
40936 .sr(1)
40937 .m(2)
40938 .n(n)
40939 .k(k)
40940 .cn_stride(7)
40941 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40942 }
40943 }
40944 }
40945
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)40946 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
40947 for (uint32_t n = 8; n <= 12; n += 4) {
40948 for (size_t k = 1; k <= 40; k += 9) {
40949 for (uint32_t m = 1; m <= 2; m++) {
40950 GemmMicrokernelTester()
40951 .mr(2)
40952 .nr(4)
40953 .kr(2)
40954 .sr(1)
40955 .m(m)
40956 .n(n)
40957 .k(k)
40958 .iterations(1)
40959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40960 }
40961 }
40962 }
40963 }
40964
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)40965 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
40966 for (size_t k = 1; k <= 40; k += 9) {
40967 GemmMicrokernelTester()
40968 .mr(2)
40969 .nr(4)
40970 .kr(2)
40971 .sr(1)
40972 .m(2)
40973 .n(4)
40974 .k(k)
40975 .ks(3)
40976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40977 }
40978 }
40979
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)40980 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
40981 for (size_t k = 1; k <= 40; k += 9) {
40982 for (uint32_t n = 1; n <= 4; n++) {
40983 for (uint32_t m = 1; m <= 2; m++) {
40984 GemmMicrokernelTester()
40985 .mr(2)
40986 .nr(4)
40987 .kr(2)
40988 .sr(1)
40989 .m(m)
40990 .n(n)
40991 .k(k)
40992 .ks(3)
40993 .iterations(1)
40994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40995 }
40996 }
40997 }
40998 }
40999
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)41000 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
41001 for (uint32_t n = 5; n < 8; n++) {
41002 for (size_t k = 1; k <= 40; k += 9) {
41003 GemmMicrokernelTester()
41004 .mr(2)
41005 .nr(4)
41006 .kr(2)
41007 .sr(1)
41008 .m(2)
41009 .n(n)
41010 .k(k)
41011 .ks(3)
41012 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41013 }
41014 }
41015 }
41016
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)41017 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
41018 for (uint32_t n = 8; n <= 12; n += 4) {
41019 for (size_t k = 1; k <= 40; k += 9) {
41020 GemmMicrokernelTester()
41021 .mr(2)
41022 .nr(4)
41023 .kr(2)
41024 .sr(1)
41025 .m(2)
41026 .n(n)
41027 .k(k)
41028 .ks(3)
41029 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41030 }
41031 }
41032 }
41033
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)41034 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
41035 for (size_t k = 1; k <= 40; k += 9) {
41036 for (uint32_t n = 1; n <= 4; n++) {
41037 for (uint32_t m = 1; m <= 2; m++) {
41038 GemmMicrokernelTester()
41039 .mr(2)
41040 .nr(4)
41041 .kr(2)
41042 .sr(1)
41043 .m(m)
41044 .n(n)
41045 .k(k)
41046 .cm_stride(7)
41047 .iterations(1)
41048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41049 }
41050 }
41051 }
41052 }
41053
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,a_offset)41054 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
41055 for (size_t k = 1; k <= 40; k += 9) {
41056 GemmMicrokernelTester()
41057 .mr(2)
41058 .nr(4)
41059 .kr(2)
41060 .sr(1)
41061 .m(2)
41062 .n(4)
41063 .k(k)
41064 .ks(3)
41065 .a_offset(83)
41066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41067 }
41068 }
41069
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,zero)41070 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, zero) {
41071 for (size_t k = 1; k <= 40; k += 9) {
41072 for (uint32_t mz = 0; mz < 2; mz++) {
41073 GemmMicrokernelTester()
41074 .mr(2)
41075 .nr(4)
41076 .kr(2)
41077 .sr(1)
41078 .m(2)
41079 .n(4)
41080 .k(k)
41081 .ks(3)
41082 .a_offset(83)
41083 .zero_index(mz)
41084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41085 }
41086 }
41087 }
41088
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmin)41089 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
41090 GemmMicrokernelTester()
41091 .mr(2)
41092 .nr(4)
41093 .kr(2)
41094 .sr(1)
41095 .m(2)
41096 .n(4)
41097 .k(8)
41098 .qmin(128)
41099 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41100 }
41101
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmax)41102 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
41103 GemmMicrokernelTester()
41104 .mr(2)
41105 .nr(4)
41106 .kr(2)
41107 .sr(1)
41108 .m(2)
41109 .n(4)
41110 .k(8)
41111 .qmax(128)
41112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41113 }
41114
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)41115 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
41116 GemmMicrokernelTester()
41117 .mr(2)
41118 .nr(4)
41119 .kr(2)
41120 .sr(1)
41121 .m(2)
41122 .n(4)
41123 .k(8)
41124 .cm_stride(7)
41125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41126 }
41127 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41128
41129
41130 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)41131 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
41132 GemmMicrokernelTester()
41133 .mr(2)
41134 .nr(4)
41135 .kr(2)
41136 .sr(4)
41137 .m(2)
41138 .n(4)
41139 .k(8)
41140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41141 }
41142
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)41143 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
41144 GemmMicrokernelTester()
41145 .mr(2)
41146 .nr(4)
41147 .kr(2)
41148 .sr(4)
41149 .m(2)
41150 .n(4)
41151 .k(8)
41152 .cn_stride(7)
41153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41154 }
41155
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)41156 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
41157 for (uint32_t n = 1; n <= 4; n++) {
41158 for (uint32_t m = 1; m <= 2; m++) {
41159 GemmMicrokernelTester()
41160 .mr(2)
41161 .nr(4)
41162 .kr(2)
41163 .sr(4)
41164 .m(m)
41165 .n(n)
41166 .k(8)
41167 .iterations(1)
41168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41169 }
41170 }
41171 }
41172
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)41173 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
41174 for (uint32_t m = 1; m <= 2; m++) {
41175 GemmMicrokernelTester()
41176 .mr(2)
41177 .nr(4)
41178 .kr(2)
41179 .sr(4)
41180 .m(m)
41181 .n(4)
41182 .k(8)
41183 .iterations(1)
41184 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41185 }
41186 }
41187
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)41188 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
41189 for (uint32_t n = 1; n <= 4; n++) {
41190 GemmMicrokernelTester()
41191 .mr(2)
41192 .nr(4)
41193 .kr(2)
41194 .sr(4)
41195 .m(2)
41196 .n(n)
41197 .k(8)
41198 .iterations(1)
41199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41200 }
41201 }
41202
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)41203 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
41204 for (size_t k = 1; k < 8; k++) {
41205 GemmMicrokernelTester()
41206 .mr(2)
41207 .nr(4)
41208 .kr(2)
41209 .sr(4)
41210 .m(2)
41211 .n(4)
41212 .k(k)
41213 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41214 }
41215 }
41216
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)41217 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
41218 for (size_t k = 1; k < 8; k++) {
41219 for (uint32_t n = 1; n <= 4; n++) {
41220 for (uint32_t m = 1; m <= 2; m++) {
41221 GemmMicrokernelTester()
41222 .mr(2)
41223 .nr(4)
41224 .kr(2)
41225 .sr(4)
41226 .m(m)
41227 .n(n)
41228 .k(k)
41229 .iterations(1)
41230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41231 }
41232 }
41233 }
41234 }
41235
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)41236 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
41237 for (size_t k = 9; k < 16; k++) {
41238 GemmMicrokernelTester()
41239 .mr(2)
41240 .nr(4)
41241 .kr(2)
41242 .sr(4)
41243 .m(2)
41244 .n(4)
41245 .k(k)
41246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41247 }
41248 }
41249
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)41250 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
41251 for (size_t k = 9; k < 16; k++) {
41252 for (uint32_t n = 1; n <= 4; n++) {
41253 for (uint32_t m = 1; m <= 2; m++) {
41254 GemmMicrokernelTester()
41255 .mr(2)
41256 .nr(4)
41257 .kr(2)
41258 .sr(4)
41259 .m(m)
41260 .n(n)
41261 .k(k)
41262 .iterations(1)
41263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41264 }
41265 }
41266 }
41267 }
41268
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)41269 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
41270 for (size_t k = 16; k <= 80; k += 8) {
41271 GemmMicrokernelTester()
41272 .mr(2)
41273 .nr(4)
41274 .kr(2)
41275 .sr(4)
41276 .m(2)
41277 .n(4)
41278 .k(k)
41279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41280 }
41281 }
41282
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)41283 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
41284 for (size_t k = 16; k <= 80; k += 8) {
41285 for (uint32_t n = 1; n <= 4; n++) {
41286 for (uint32_t m = 1; m <= 2; m++) {
41287 GemmMicrokernelTester()
41288 .mr(2)
41289 .nr(4)
41290 .kr(2)
41291 .sr(4)
41292 .m(m)
41293 .n(n)
41294 .k(k)
41295 .iterations(1)
41296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41297 }
41298 }
41299 }
41300 }
41301
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)41302 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
41303 for (uint32_t n = 5; n < 8; n++) {
41304 for (size_t k = 1; k <= 40; k += 9) {
41305 GemmMicrokernelTester()
41306 .mr(2)
41307 .nr(4)
41308 .kr(2)
41309 .sr(4)
41310 .m(2)
41311 .n(n)
41312 .k(k)
41313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41314 }
41315 }
41316 }
41317
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)41318 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
41319 for (uint32_t n = 5; n < 8; n++) {
41320 for (size_t k = 1; k <= 40; k += 9) {
41321 GemmMicrokernelTester()
41322 .mr(2)
41323 .nr(4)
41324 .kr(2)
41325 .sr(4)
41326 .m(2)
41327 .n(n)
41328 .k(k)
41329 .cn_stride(7)
41330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41331 }
41332 }
41333 }
41334
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)41335 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
41336 for (uint32_t n = 5; n < 8; n++) {
41337 for (size_t k = 1; k <= 40; k += 9) {
41338 for (uint32_t m = 1; m <= 2; m++) {
41339 GemmMicrokernelTester()
41340 .mr(2)
41341 .nr(4)
41342 .kr(2)
41343 .sr(4)
41344 .m(m)
41345 .n(n)
41346 .k(k)
41347 .iterations(1)
41348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41349 }
41350 }
41351 }
41352 }
41353
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)41354 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
41355 for (uint32_t n = 8; n <= 12; n += 4) {
41356 for (size_t k = 1; k <= 40; k += 9) {
41357 GemmMicrokernelTester()
41358 .mr(2)
41359 .nr(4)
41360 .kr(2)
41361 .sr(4)
41362 .m(2)
41363 .n(n)
41364 .k(k)
41365 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41366 }
41367 }
41368 }
41369
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)41370 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
41371 for (uint32_t n = 8; n <= 12; n += 4) {
41372 for (size_t k = 1; k <= 40; k += 9) {
41373 GemmMicrokernelTester()
41374 .mr(2)
41375 .nr(4)
41376 .kr(2)
41377 .sr(4)
41378 .m(2)
41379 .n(n)
41380 .k(k)
41381 .cn_stride(7)
41382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41383 }
41384 }
41385 }
41386
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)41387 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
41388 for (uint32_t n = 8; n <= 12; n += 4) {
41389 for (size_t k = 1; k <= 40; k += 9) {
41390 for (uint32_t m = 1; m <= 2; m++) {
41391 GemmMicrokernelTester()
41392 .mr(2)
41393 .nr(4)
41394 .kr(2)
41395 .sr(4)
41396 .m(m)
41397 .n(n)
41398 .k(k)
41399 .iterations(1)
41400 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41401 }
41402 }
41403 }
41404 }
41405
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)41406 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
41407 for (size_t k = 1; k <= 40; k += 9) {
41408 GemmMicrokernelTester()
41409 .mr(2)
41410 .nr(4)
41411 .kr(2)
41412 .sr(4)
41413 .m(2)
41414 .n(4)
41415 .k(k)
41416 .ks(3)
41417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41418 }
41419 }
41420
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)41421 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
41422 for (size_t k = 1; k <= 40; k += 9) {
41423 for (uint32_t n = 1; n <= 4; n++) {
41424 for (uint32_t m = 1; m <= 2; m++) {
41425 GemmMicrokernelTester()
41426 .mr(2)
41427 .nr(4)
41428 .kr(2)
41429 .sr(4)
41430 .m(m)
41431 .n(n)
41432 .k(k)
41433 .ks(3)
41434 .iterations(1)
41435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41436 }
41437 }
41438 }
41439 }
41440
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)41441 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
41442 for (uint32_t n = 5; n < 8; n++) {
41443 for (size_t k = 1; k <= 40; k += 9) {
41444 GemmMicrokernelTester()
41445 .mr(2)
41446 .nr(4)
41447 .kr(2)
41448 .sr(4)
41449 .m(2)
41450 .n(n)
41451 .k(k)
41452 .ks(3)
41453 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41454 }
41455 }
41456 }
41457
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)41458 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
41459 for (uint32_t n = 8; n <= 12; n += 4) {
41460 for (size_t k = 1; k <= 40; k += 9) {
41461 GemmMicrokernelTester()
41462 .mr(2)
41463 .nr(4)
41464 .kr(2)
41465 .sr(4)
41466 .m(2)
41467 .n(n)
41468 .k(k)
41469 .ks(3)
41470 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41471 }
41472 }
41473 }
41474
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)41475 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
41476 for (size_t k = 1; k <= 40; k += 9) {
41477 for (uint32_t n = 1; n <= 4; n++) {
41478 for (uint32_t m = 1; m <= 2; m++) {
41479 GemmMicrokernelTester()
41480 .mr(2)
41481 .nr(4)
41482 .kr(2)
41483 .sr(4)
41484 .m(m)
41485 .n(n)
41486 .k(k)
41487 .cm_stride(7)
41488 .iterations(1)
41489 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41490 }
41491 }
41492 }
41493 }
41494
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)41495 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
41496 for (size_t k = 1; k <= 40; k += 9) {
41497 GemmMicrokernelTester()
41498 .mr(2)
41499 .nr(4)
41500 .kr(2)
41501 .sr(4)
41502 .m(2)
41503 .n(4)
41504 .k(k)
41505 .ks(3)
41506 .a_offset(83)
41507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41508 }
41509 }
41510
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,zero)41511 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
41512 for (size_t k = 1; k <= 40; k += 9) {
41513 for (uint32_t mz = 0; mz < 2; mz++) {
41514 GemmMicrokernelTester()
41515 .mr(2)
41516 .nr(4)
41517 .kr(2)
41518 .sr(4)
41519 .m(2)
41520 .n(4)
41521 .k(k)
41522 .ks(3)
41523 .a_offset(83)
41524 .zero_index(mz)
41525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41526 }
41527 }
41528 }
41529
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)41530 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
41531 GemmMicrokernelTester()
41532 .mr(2)
41533 .nr(4)
41534 .kr(2)
41535 .sr(4)
41536 .m(2)
41537 .n(4)
41538 .k(8)
41539 .qmin(128)
41540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41541 }
41542
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)41543 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
41544 GemmMicrokernelTester()
41545 .mr(2)
41546 .nr(4)
41547 .kr(2)
41548 .sr(4)
41549 .m(2)
41550 .n(4)
41551 .k(8)
41552 .qmax(128)
41553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41554 }
41555
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)41556 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
41557 GemmMicrokernelTester()
41558 .mr(2)
41559 .nr(4)
41560 .kr(2)
41561 .sr(4)
41562 .m(2)
41563 .n(4)
41564 .k(8)
41565 .cm_stride(7)
41566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41567 }
41568 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41569
41570
41571 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)41572 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
41573 GemmMicrokernelTester()
41574 .mr(2)
41575 .nr(4)
41576 .kr(2)
41577 .sr(4)
41578 .m(2)
41579 .n(4)
41580 .k(8)
41581 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41582 }
41583
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)41584 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
41585 GemmMicrokernelTester()
41586 .mr(2)
41587 .nr(4)
41588 .kr(2)
41589 .sr(4)
41590 .m(2)
41591 .n(4)
41592 .k(8)
41593 .cn_stride(7)
41594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41595 }
41596
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)41597 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
41598 for (uint32_t n = 1; n <= 4; n++) {
41599 for (uint32_t m = 1; m <= 2; m++) {
41600 GemmMicrokernelTester()
41601 .mr(2)
41602 .nr(4)
41603 .kr(2)
41604 .sr(4)
41605 .m(m)
41606 .n(n)
41607 .k(8)
41608 .iterations(1)
41609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41610 }
41611 }
41612 }
41613
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)41614 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
41615 for (uint32_t m = 1; m <= 2; m++) {
41616 GemmMicrokernelTester()
41617 .mr(2)
41618 .nr(4)
41619 .kr(2)
41620 .sr(4)
41621 .m(m)
41622 .n(4)
41623 .k(8)
41624 .iterations(1)
41625 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41626 }
41627 }
41628
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)41629 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
41630 for (uint32_t n = 1; n <= 4; n++) {
41631 GemmMicrokernelTester()
41632 .mr(2)
41633 .nr(4)
41634 .kr(2)
41635 .sr(4)
41636 .m(2)
41637 .n(n)
41638 .k(8)
41639 .iterations(1)
41640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41641 }
41642 }
41643
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)41644 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
41645 for (size_t k = 1; k < 8; k++) {
41646 GemmMicrokernelTester()
41647 .mr(2)
41648 .nr(4)
41649 .kr(2)
41650 .sr(4)
41651 .m(2)
41652 .n(4)
41653 .k(k)
41654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41655 }
41656 }
41657
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)41658 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
41659 for (size_t k = 1; k < 8; k++) {
41660 for (uint32_t n = 1; n <= 4; n++) {
41661 for (uint32_t m = 1; m <= 2; m++) {
41662 GemmMicrokernelTester()
41663 .mr(2)
41664 .nr(4)
41665 .kr(2)
41666 .sr(4)
41667 .m(m)
41668 .n(n)
41669 .k(k)
41670 .iterations(1)
41671 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41672 }
41673 }
41674 }
41675 }
41676
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)41677 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
41678 for (size_t k = 9; k < 16; k++) {
41679 GemmMicrokernelTester()
41680 .mr(2)
41681 .nr(4)
41682 .kr(2)
41683 .sr(4)
41684 .m(2)
41685 .n(4)
41686 .k(k)
41687 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41688 }
41689 }
41690
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)41691 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
41692 for (size_t k = 9; k < 16; k++) {
41693 for (uint32_t n = 1; n <= 4; n++) {
41694 for (uint32_t m = 1; m <= 2; m++) {
41695 GemmMicrokernelTester()
41696 .mr(2)
41697 .nr(4)
41698 .kr(2)
41699 .sr(4)
41700 .m(m)
41701 .n(n)
41702 .k(k)
41703 .iterations(1)
41704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41705 }
41706 }
41707 }
41708 }
41709
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)41710 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
41711 for (size_t k = 16; k <= 80; k += 8) {
41712 GemmMicrokernelTester()
41713 .mr(2)
41714 .nr(4)
41715 .kr(2)
41716 .sr(4)
41717 .m(2)
41718 .n(4)
41719 .k(k)
41720 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41721 }
41722 }
41723
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)41724 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
41725 for (size_t k = 16; k <= 80; k += 8) {
41726 for (uint32_t n = 1; n <= 4; n++) {
41727 for (uint32_t m = 1; m <= 2; m++) {
41728 GemmMicrokernelTester()
41729 .mr(2)
41730 .nr(4)
41731 .kr(2)
41732 .sr(4)
41733 .m(m)
41734 .n(n)
41735 .k(k)
41736 .iterations(1)
41737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41738 }
41739 }
41740 }
41741 }
41742
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)41743 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
41744 for (uint32_t n = 5; n < 8; n++) {
41745 for (size_t k = 1; k <= 40; k += 9) {
41746 GemmMicrokernelTester()
41747 .mr(2)
41748 .nr(4)
41749 .kr(2)
41750 .sr(4)
41751 .m(2)
41752 .n(n)
41753 .k(k)
41754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41755 }
41756 }
41757 }
41758
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)41759 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
41760 for (uint32_t n = 5; n < 8; n++) {
41761 for (size_t k = 1; k <= 40; k += 9) {
41762 GemmMicrokernelTester()
41763 .mr(2)
41764 .nr(4)
41765 .kr(2)
41766 .sr(4)
41767 .m(2)
41768 .n(n)
41769 .k(k)
41770 .cn_stride(7)
41771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41772 }
41773 }
41774 }
41775
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)41776 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
41777 for (uint32_t n = 5; n < 8; n++) {
41778 for (size_t k = 1; k <= 40; k += 9) {
41779 for (uint32_t m = 1; m <= 2; m++) {
41780 GemmMicrokernelTester()
41781 .mr(2)
41782 .nr(4)
41783 .kr(2)
41784 .sr(4)
41785 .m(m)
41786 .n(n)
41787 .k(k)
41788 .iterations(1)
41789 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41790 }
41791 }
41792 }
41793 }
41794
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)41795 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
41796 for (uint32_t n = 8; n <= 12; n += 4) {
41797 for (size_t k = 1; k <= 40; k += 9) {
41798 GemmMicrokernelTester()
41799 .mr(2)
41800 .nr(4)
41801 .kr(2)
41802 .sr(4)
41803 .m(2)
41804 .n(n)
41805 .k(k)
41806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41807 }
41808 }
41809 }
41810
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)41811 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
41812 for (uint32_t n = 8; n <= 12; n += 4) {
41813 for (size_t k = 1; k <= 40; k += 9) {
41814 GemmMicrokernelTester()
41815 .mr(2)
41816 .nr(4)
41817 .kr(2)
41818 .sr(4)
41819 .m(2)
41820 .n(n)
41821 .k(k)
41822 .cn_stride(7)
41823 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41824 }
41825 }
41826 }
41827
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)41828 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
41829 for (uint32_t n = 8; n <= 12; n += 4) {
41830 for (size_t k = 1; k <= 40; k += 9) {
41831 for (uint32_t m = 1; m <= 2; m++) {
41832 GemmMicrokernelTester()
41833 .mr(2)
41834 .nr(4)
41835 .kr(2)
41836 .sr(4)
41837 .m(m)
41838 .n(n)
41839 .k(k)
41840 .iterations(1)
41841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41842 }
41843 }
41844 }
41845 }
41846
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)41847 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
41848 for (size_t k = 1; k <= 40; k += 9) {
41849 GemmMicrokernelTester()
41850 .mr(2)
41851 .nr(4)
41852 .kr(2)
41853 .sr(4)
41854 .m(2)
41855 .n(4)
41856 .k(k)
41857 .ks(3)
41858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41859 }
41860 }
41861
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)41862 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
41863 for (size_t k = 1; k <= 40; k += 9) {
41864 for (uint32_t n = 1; n <= 4; n++) {
41865 for (uint32_t m = 1; m <= 2; m++) {
41866 GemmMicrokernelTester()
41867 .mr(2)
41868 .nr(4)
41869 .kr(2)
41870 .sr(4)
41871 .m(m)
41872 .n(n)
41873 .k(k)
41874 .ks(3)
41875 .iterations(1)
41876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41877 }
41878 }
41879 }
41880 }
41881
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)41882 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
41883 for (uint32_t n = 5; n < 8; n++) {
41884 for (size_t k = 1; k <= 40; k += 9) {
41885 GemmMicrokernelTester()
41886 .mr(2)
41887 .nr(4)
41888 .kr(2)
41889 .sr(4)
41890 .m(2)
41891 .n(n)
41892 .k(k)
41893 .ks(3)
41894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41895 }
41896 }
41897 }
41898
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)41899 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
41900 for (uint32_t n = 8; n <= 12; n += 4) {
41901 for (size_t k = 1; k <= 40; k += 9) {
41902 GemmMicrokernelTester()
41903 .mr(2)
41904 .nr(4)
41905 .kr(2)
41906 .sr(4)
41907 .m(2)
41908 .n(n)
41909 .k(k)
41910 .ks(3)
41911 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41912 }
41913 }
41914 }
41915
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)41916 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
41917 for (size_t k = 1; k <= 40; k += 9) {
41918 for (uint32_t n = 1; n <= 4; n++) {
41919 for (uint32_t m = 1; m <= 2; m++) {
41920 GemmMicrokernelTester()
41921 .mr(2)
41922 .nr(4)
41923 .kr(2)
41924 .sr(4)
41925 .m(m)
41926 .n(n)
41927 .k(k)
41928 .cm_stride(7)
41929 .iterations(1)
41930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41931 }
41932 }
41933 }
41934 }
41935
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)41936 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
41937 for (size_t k = 1; k <= 40; k += 9) {
41938 GemmMicrokernelTester()
41939 .mr(2)
41940 .nr(4)
41941 .kr(2)
41942 .sr(4)
41943 .m(2)
41944 .n(4)
41945 .k(k)
41946 .ks(3)
41947 .a_offset(83)
41948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41949 }
41950 }
41951
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,zero)41952 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
41953 for (size_t k = 1; k <= 40; k += 9) {
41954 for (uint32_t mz = 0; mz < 2; mz++) {
41955 GemmMicrokernelTester()
41956 .mr(2)
41957 .nr(4)
41958 .kr(2)
41959 .sr(4)
41960 .m(2)
41961 .n(4)
41962 .k(k)
41963 .ks(3)
41964 .a_offset(83)
41965 .zero_index(mz)
41966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41967 }
41968 }
41969 }
41970
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)41971 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
41972 GemmMicrokernelTester()
41973 .mr(2)
41974 .nr(4)
41975 .kr(2)
41976 .sr(4)
41977 .m(2)
41978 .n(4)
41979 .k(8)
41980 .qmin(128)
41981 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41982 }
41983
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)41984 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
41985 GemmMicrokernelTester()
41986 .mr(2)
41987 .nr(4)
41988 .kr(2)
41989 .sr(4)
41990 .m(2)
41991 .n(4)
41992 .k(8)
41993 .qmax(128)
41994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41995 }
41996
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)41997 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
41998 GemmMicrokernelTester()
41999 .mr(2)
42000 .nr(4)
42001 .kr(2)
42002 .sr(4)
42003 .m(2)
42004 .n(4)
42005 .k(8)
42006 .cm_stride(7)
42007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42008 }
42009 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42010
42011
42012 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)42013 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
42014 GemmMicrokernelTester()
42015 .mr(3)
42016 .nr(4)
42017 .kr(2)
42018 .sr(4)
42019 .m(3)
42020 .n(4)
42021 .k(8)
42022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42023 }
42024
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)42025 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
42026 GemmMicrokernelTester()
42027 .mr(3)
42028 .nr(4)
42029 .kr(2)
42030 .sr(4)
42031 .m(3)
42032 .n(4)
42033 .k(8)
42034 .cn_stride(7)
42035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42036 }
42037
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)42038 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
42039 for (uint32_t n = 1; n <= 4; n++) {
42040 for (uint32_t m = 1; m <= 3; m++) {
42041 GemmMicrokernelTester()
42042 .mr(3)
42043 .nr(4)
42044 .kr(2)
42045 .sr(4)
42046 .m(m)
42047 .n(n)
42048 .k(8)
42049 .iterations(1)
42050 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42051 }
42052 }
42053 }
42054
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42055 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42056 for (uint32_t m = 1; m <= 3; m++) {
42057 GemmMicrokernelTester()
42058 .mr(3)
42059 .nr(4)
42060 .kr(2)
42061 .sr(4)
42062 .m(m)
42063 .n(4)
42064 .k(8)
42065 .iterations(1)
42066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42067 }
42068 }
42069
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42070 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42071 for (uint32_t n = 1; n <= 4; n++) {
42072 GemmMicrokernelTester()
42073 .mr(3)
42074 .nr(4)
42075 .kr(2)
42076 .sr(4)
42077 .m(3)
42078 .n(n)
42079 .k(8)
42080 .iterations(1)
42081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42082 }
42083 }
42084
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)42085 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42086 for (size_t k = 1; k < 8; k++) {
42087 GemmMicrokernelTester()
42088 .mr(3)
42089 .nr(4)
42090 .kr(2)
42091 .sr(4)
42092 .m(3)
42093 .n(4)
42094 .k(k)
42095 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42096 }
42097 }
42098
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42099 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42100 for (size_t k = 1; k < 8; k++) {
42101 for (uint32_t n = 1; n <= 4; n++) {
42102 for (uint32_t m = 1; m <= 3; m++) {
42103 GemmMicrokernelTester()
42104 .mr(3)
42105 .nr(4)
42106 .kr(2)
42107 .sr(4)
42108 .m(m)
42109 .n(n)
42110 .k(k)
42111 .iterations(1)
42112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42113 }
42114 }
42115 }
42116 }
42117
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)42118 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
42119 for (size_t k = 9; k < 16; k++) {
42120 GemmMicrokernelTester()
42121 .mr(3)
42122 .nr(4)
42123 .kr(2)
42124 .sr(4)
42125 .m(3)
42126 .n(4)
42127 .k(k)
42128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42129 }
42130 }
42131
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)42132 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
42133 for (size_t k = 9; k < 16; k++) {
42134 for (uint32_t n = 1; n <= 4; n++) {
42135 for (uint32_t m = 1; m <= 3; m++) {
42136 GemmMicrokernelTester()
42137 .mr(3)
42138 .nr(4)
42139 .kr(2)
42140 .sr(4)
42141 .m(m)
42142 .n(n)
42143 .k(k)
42144 .iterations(1)
42145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42146 }
42147 }
42148 }
42149 }
42150
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)42151 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
42152 for (size_t k = 16; k <= 80; k += 8) {
42153 GemmMicrokernelTester()
42154 .mr(3)
42155 .nr(4)
42156 .kr(2)
42157 .sr(4)
42158 .m(3)
42159 .n(4)
42160 .k(k)
42161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42162 }
42163 }
42164
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)42165 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
42166 for (size_t k = 16; k <= 80; k += 8) {
42167 for (uint32_t n = 1; n <= 4; n++) {
42168 for (uint32_t m = 1; m <= 3; m++) {
42169 GemmMicrokernelTester()
42170 .mr(3)
42171 .nr(4)
42172 .kr(2)
42173 .sr(4)
42174 .m(m)
42175 .n(n)
42176 .k(k)
42177 .iterations(1)
42178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42179 }
42180 }
42181 }
42182 }
42183
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)42184 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
42185 for (uint32_t n = 5; n < 8; n++) {
42186 for (size_t k = 1; k <= 40; k += 9) {
42187 GemmMicrokernelTester()
42188 .mr(3)
42189 .nr(4)
42190 .kr(2)
42191 .sr(4)
42192 .m(3)
42193 .n(n)
42194 .k(k)
42195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42196 }
42197 }
42198 }
42199
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)42200 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
42201 for (uint32_t n = 5; n < 8; n++) {
42202 for (size_t k = 1; k <= 40; k += 9) {
42203 GemmMicrokernelTester()
42204 .mr(3)
42205 .nr(4)
42206 .kr(2)
42207 .sr(4)
42208 .m(3)
42209 .n(n)
42210 .k(k)
42211 .cn_stride(7)
42212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42213 }
42214 }
42215 }
42216
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)42217 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
42218 for (uint32_t n = 5; n < 8; n++) {
42219 for (size_t k = 1; k <= 40; k += 9) {
42220 for (uint32_t m = 1; m <= 3; m++) {
42221 GemmMicrokernelTester()
42222 .mr(3)
42223 .nr(4)
42224 .kr(2)
42225 .sr(4)
42226 .m(m)
42227 .n(n)
42228 .k(k)
42229 .iterations(1)
42230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42231 }
42232 }
42233 }
42234 }
42235
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)42236 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
42237 for (uint32_t n = 8; n <= 12; n += 4) {
42238 for (size_t k = 1; k <= 40; k += 9) {
42239 GemmMicrokernelTester()
42240 .mr(3)
42241 .nr(4)
42242 .kr(2)
42243 .sr(4)
42244 .m(3)
42245 .n(n)
42246 .k(k)
42247 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42248 }
42249 }
42250 }
42251
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)42252 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
42253 for (uint32_t n = 8; n <= 12; n += 4) {
42254 for (size_t k = 1; k <= 40; k += 9) {
42255 GemmMicrokernelTester()
42256 .mr(3)
42257 .nr(4)
42258 .kr(2)
42259 .sr(4)
42260 .m(3)
42261 .n(n)
42262 .k(k)
42263 .cn_stride(7)
42264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42265 }
42266 }
42267 }
42268
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)42269 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
42270 for (uint32_t n = 8; n <= 12; n += 4) {
42271 for (size_t k = 1; k <= 40; k += 9) {
42272 for (uint32_t m = 1; m <= 3; m++) {
42273 GemmMicrokernelTester()
42274 .mr(3)
42275 .nr(4)
42276 .kr(2)
42277 .sr(4)
42278 .m(m)
42279 .n(n)
42280 .k(k)
42281 .iterations(1)
42282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42283 }
42284 }
42285 }
42286 }
42287
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)42288 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
42289 for (size_t k = 1; k <= 40; k += 9) {
42290 GemmMicrokernelTester()
42291 .mr(3)
42292 .nr(4)
42293 .kr(2)
42294 .sr(4)
42295 .m(3)
42296 .n(4)
42297 .k(k)
42298 .ks(3)
42299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42300 }
42301 }
42302
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)42303 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
42304 for (size_t k = 1; k <= 40; k += 9) {
42305 for (uint32_t n = 1; n <= 4; n++) {
42306 for (uint32_t m = 1; m <= 3; m++) {
42307 GemmMicrokernelTester()
42308 .mr(3)
42309 .nr(4)
42310 .kr(2)
42311 .sr(4)
42312 .m(m)
42313 .n(n)
42314 .k(k)
42315 .ks(3)
42316 .iterations(1)
42317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42318 }
42319 }
42320 }
42321 }
42322
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)42323 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
42324 for (uint32_t n = 5; n < 8; n++) {
42325 for (size_t k = 1; k <= 40; k += 9) {
42326 GemmMicrokernelTester()
42327 .mr(3)
42328 .nr(4)
42329 .kr(2)
42330 .sr(4)
42331 .m(3)
42332 .n(n)
42333 .k(k)
42334 .ks(3)
42335 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42336 }
42337 }
42338 }
42339
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)42340 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
42341 for (uint32_t n = 8; n <= 12; n += 4) {
42342 for (size_t k = 1; k <= 40; k += 9) {
42343 GemmMicrokernelTester()
42344 .mr(3)
42345 .nr(4)
42346 .kr(2)
42347 .sr(4)
42348 .m(3)
42349 .n(n)
42350 .k(k)
42351 .ks(3)
42352 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42353 }
42354 }
42355 }
42356
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)42357 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
42358 for (size_t k = 1; k <= 40; k += 9) {
42359 for (uint32_t n = 1; n <= 4; n++) {
42360 for (uint32_t m = 1; m <= 3; m++) {
42361 GemmMicrokernelTester()
42362 .mr(3)
42363 .nr(4)
42364 .kr(2)
42365 .sr(4)
42366 .m(m)
42367 .n(n)
42368 .k(k)
42369 .cm_stride(7)
42370 .iterations(1)
42371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42372 }
42373 }
42374 }
42375 }
42376
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)42377 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
42378 for (size_t k = 1; k <= 40; k += 9) {
42379 GemmMicrokernelTester()
42380 .mr(3)
42381 .nr(4)
42382 .kr(2)
42383 .sr(4)
42384 .m(3)
42385 .n(4)
42386 .k(k)
42387 .ks(3)
42388 .a_offset(127)
42389 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42390 }
42391 }
42392
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,zero)42393 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
42394 for (size_t k = 1; k <= 40; k += 9) {
42395 for (uint32_t mz = 0; mz < 3; mz++) {
42396 GemmMicrokernelTester()
42397 .mr(3)
42398 .nr(4)
42399 .kr(2)
42400 .sr(4)
42401 .m(3)
42402 .n(4)
42403 .k(k)
42404 .ks(3)
42405 .a_offset(127)
42406 .zero_index(mz)
42407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42408 }
42409 }
42410 }
42411
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)42412 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
42413 GemmMicrokernelTester()
42414 .mr(3)
42415 .nr(4)
42416 .kr(2)
42417 .sr(4)
42418 .m(3)
42419 .n(4)
42420 .k(8)
42421 .qmin(128)
42422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42423 }
42424
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)42425 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
42426 GemmMicrokernelTester()
42427 .mr(3)
42428 .nr(4)
42429 .kr(2)
42430 .sr(4)
42431 .m(3)
42432 .n(4)
42433 .k(8)
42434 .qmax(128)
42435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42436 }
42437
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)42438 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
42439 GemmMicrokernelTester()
42440 .mr(3)
42441 .nr(4)
42442 .kr(2)
42443 .sr(4)
42444 .m(3)
42445 .n(4)
42446 .k(8)
42447 .cm_stride(7)
42448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42449 }
42450 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42451
42452
42453 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)42454 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
42455 GemmMicrokernelTester()
42456 .mr(3)
42457 .nr(4)
42458 .kr(8)
42459 .sr(1)
42460 .m(3)
42461 .n(4)
42462 .k(8)
42463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42464 }
42465
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)42466 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
42467 GemmMicrokernelTester()
42468 .mr(3)
42469 .nr(4)
42470 .kr(8)
42471 .sr(1)
42472 .m(3)
42473 .n(4)
42474 .k(8)
42475 .cn_stride(7)
42476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42477 }
42478
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)42479 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
42480 for (uint32_t n = 1; n <= 4; n++) {
42481 for (uint32_t m = 1; m <= 3; m++) {
42482 GemmMicrokernelTester()
42483 .mr(3)
42484 .nr(4)
42485 .kr(8)
42486 .sr(1)
42487 .m(m)
42488 .n(n)
42489 .k(8)
42490 .iterations(1)
42491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42492 }
42493 }
42494 }
42495
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42496 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42497 for (uint32_t m = 1; m <= 3; m++) {
42498 GemmMicrokernelTester()
42499 .mr(3)
42500 .nr(4)
42501 .kr(8)
42502 .sr(1)
42503 .m(m)
42504 .n(4)
42505 .k(8)
42506 .iterations(1)
42507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42508 }
42509 }
42510
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42511 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42512 for (uint32_t n = 1; n <= 4; n++) {
42513 GemmMicrokernelTester()
42514 .mr(3)
42515 .nr(4)
42516 .kr(8)
42517 .sr(1)
42518 .m(3)
42519 .n(n)
42520 .k(8)
42521 .iterations(1)
42522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42523 }
42524 }
42525
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)42526 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42527 for (size_t k = 1; k < 8; k++) {
42528 GemmMicrokernelTester()
42529 .mr(3)
42530 .nr(4)
42531 .kr(8)
42532 .sr(1)
42533 .m(3)
42534 .n(4)
42535 .k(k)
42536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42537 }
42538 }
42539
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42540 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42541 for (size_t k = 1; k < 8; k++) {
42542 for (uint32_t n = 1; n <= 4; n++) {
42543 for (uint32_t m = 1; m <= 3; m++) {
42544 GemmMicrokernelTester()
42545 .mr(3)
42546 .nr(4)
42547 .kr(8)
42548 .sr(1)
42549 .m(m)
42550 .n(n)
42551 .k(k)
42552 .iterations(1)
42553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42554 }
42555 }
42556 }
42557 }
42558
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)42559 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
42560 for (size_t k = 9; k < 16; k++) {
42561 GemmMicrokernelTester()
42562 .mr(3)
42563 .nr(4)
42564 .kr(8)
42565 .sr(1)
42566 .m(3)
42567 .n(4)
42568 .k(k)
42569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42570 }
42571 }
42572
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)42573 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
42574 for (size_t k = 9; k < 16; k++) {
42575 for (uint32_t n = 1; n <= 4; n++) {
42576 for (uint32_t m = 1; m <= 3; m++) {
42577 GemmMicrokernelTester()
42578 .mr(3)
42579 .nr(4)
42580 .kr(8)
42581 .sr(1)
42582 .m(m)
42583 .n(n)
42584 .k(k)
42585 .iterations(1)
42586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42587 }
42588 }
42589 }
42590 }
42591
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)42592 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
42593 for (size_t k = 16; k <= 80; k += 8) {
42594 GemmMicrokernelTester()
42595 .mr(3)
42596 .nr(4)
42597 .kr(8)
42598 .sr(1)
42599 .m(3)
42600 .n(4)
42601 .k(k)
42602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42603 }
42604 }
42605
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)42606 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
42607 for (size_t k = 16; k <= 80; k += 8) {
42608 for (uint32_t n = 1; n <= 4; n++) {
42609 for (uint32_t m = 1; m <= 3; m++) {
42610 GemmMicrokernelTester()
42611 .mr(3)
42612 .nr(4)
42613 .kr(8)
42614 .sr(1)
42615 .m(m)
42616 .n(n)
42617 .k(k)
42618 .iterations(1)
42619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42620 }
42621 }
42622 }
42623 }
42624
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)42625 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
42626 for (uint32_t n = 5; n < 8; n++) {
42627 for (size_t k = 1; k <= 40; k += 9) {
42628 GemmMicrokernelTester()
42629 .mr(3)
42630 .nr(4)
42631 .kr(8)
42632 .sr(1)
42633 .m(3)
42634 .n(n)
42635 .k(k)
42636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42637 }
42638 }
42639 }
42640
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)42641 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
42642 for (uint32_t n = 5; n < 8; n++) {
42643 for (size_t k = 1; k <= 40; k += 9) {
42644 GemmMicrokernelTester()
42645 .mr(3)
42646 .nr(4)
42647 .kr(8)
42648 .sr(1)
42649 .m(3)
42650 .n(n)
42651 .k(k)
42652 .cn_stride(7)
42653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42654 }
42655 }
42656 }
42657
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)42658 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
42659 for (uint32_t n = 5; n < 8; n++) {
42660 for (size_t k = 1; k <= 40; k += 9) {
42661 for (uint32_t m = 1; m <= 3; m++) {
42662 GemmMicrokernelTester()
42663 .mr(3)
42664 .nr(4)
42665 .kr(8)
42666 .sr(1)
42667 .m(m)
42668 .n(n)
42669 .k(k)
42670 .iterations(1)
42671 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42672 }
42673 }
42674 }
42675 }
42676
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)42677 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
42678 for (uint32_t n = 8; n <= 12; n += 4) {
42679 for (size_t k = 1; k <= 40; k += 9) {
42680 GemmMicrokernelTester()
42681 .mr(3)
42682 .nr(4)
42683 .kr(8)
42684 .sr(1)
42685 .m(3)
42686 .n(n)
42687 .k(k)
42688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42689 }
42690 }
42691 }
42692
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)42693 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
42694 for (uint32_t n = 8; n <= 12; n += 4) {
42695 for (size_t k = 1; k <= 40; k += 9) {
42696 GemmMicrokernelTester()
42697 .mr(3)
42698 .nr(4)
42699 .kr(8)
42700 .sr(1)
42701 .m(3)
42702 .n(n)
42703 .k(k)
42704 .cn_stride(7)
42705 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42706 }
42707 }
42708 }
42709
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)42710 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
42711 for (uint32_t n = 8; n <= 12; n += 4) {
42712 for (size_t k = 1; k <= 40; k += 9) {
42713 for (uint32_t m = 1; m <= 3; m++) {
42714 GemmMicrokernelTester()
42715 .mr(3)
42716 .nr(4)
42717 .kr(8)
42718 .sr(1)
42719 .m(m)
42720 .n(n)
42721 .k(k)
42722 .iterations(1)
42723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42724 }
42725 }
42726 }
42727 }
42728
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)42729 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
42730 for (size_t k = 1; k <= 40; k += 9) {
42731 GemmMicrokernelTester()
42732 .mr(3)
42733 .nr(4)
42734 .kr(8)
42735 .sr(1)
42736 .m(3)
42737 .n(4)
42738 .k(k)
42739 .ks(3)
42740 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42741 }
42742 }
42743
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)42744 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
42745 for (size_t k = 1; k <= 40; k += 9) {
42746 for (uint32_t n = 1; n <= 4; n++) {
42747 for (uint32_t m = 1; m <= 3; m++) {
42748 GemmMicrokernelTester()
42749 .mr(3)
42750 .nr(4)
42751 .kr(8)
42752 .sr(1)
42753 .m(m)
42754 .n(n)
42755 .k(k)
42756 .ks(3)
42757 .iterations(1)
42758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42759 }
42760 }
42761 }
42762 }
42763
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)42764 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
42765 for (uint32_t n = 5; n < 8; n++) {
42766 for (size_t k = 1; k <= 40; k += 9) {
42767 GemmMicrokernelTester()
42768 .mr(3)
42769 .nr(4)
42770 .kr(8)
42771 .sr(1)
42772 .m(3)
42773 .n(n)
42774 .k(k)
42775 .ks(3)
42776 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42777 }
42778 }
42779 }
42780
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)42781 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
42782 for (uint32_t n = 8; n <= 12; n += 4) {
42783 for (size_t k = 1; k <= 40; k += 9) {
42784 GemmMicrokernelTester()
42785 .mr(3)
42786 .nr(4)
42787 .kr(8)
42788 .sr(1)
42789 .m(3)
42790 .n(n)
42791 .k(k)
42792 .ks(3)
42793 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42794 }
42795 }
42796 }
42797
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)42798 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
42799 for (size_t k = 1; k <= 40; k += 9) {
42800 for (uint32_t n = 1; n <= 4; n++) {
42801 for (uint32_t m = 1; m <= 3; m++) {
42802 GemmMicrokernelTester()
42803 .mr(3)
42804 .nr(4)
42805 .kr(8)
42806 .sr(1)
42807 .m(m)
42808 .n(n)
42809 .k(k)
42810 .cm_stride(7)
42811 .iterations(1)
42812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42813 }
42814 }
42815 }
42816 }
42817
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,a_offset)42818 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
42819 for (size_t k = 1; k <= 40; k += 9) {
42820 GemmMicrokernelTester()
42821 .mr(3)
42822 .nr(4)
42823 .kr(8)
42824 .sr(1)
42825 .m(3)
42826 .n(4)
42827 .k(k)
42828 .ks(3)
42829 .a_offset(127)
42830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42831 }
42832 }
42833
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,zero)42834 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, zero) {
42835 for (size_t k = 1; k <= 40; k += 9) {
42836 for (uint32_t mz = 0; mz < 3; mz++) {
42837 GemmMicrokernelTester()
42838 .mr(3)
42839 .nr(4)
42840 .kr(8)
42841 .sr(1)
42842 .m(3)
42843 .n(4)
42844 .k(k)
42845 .ks(3)
42846 .a_offset(127)
42847 .zero_index(mz)
42848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42849 }
42850 }
42851 }
42852
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmin)42853 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
42854 GemmMicrokernelTester()
42855 .mr(3)
42856 .nr(4)
42857 .kr(8)
42858 .sr(1)
42859 .m(3)
42860 .n(4)
42861 .k(8)
42862 .qmin(128)
42863 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42864 }
42865
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmax)42866 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
42867 GemmMicrokernelTester()
42868 .mr(3)
42869 .nr(4)
42870 .kr(8)
42871 .sr(1)
42872 .m(3)
42873 .n(4)
42874 .k(8)
42875 .qmax(128)
42876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42877 }
42878
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)42879 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
42880 GemmMicrokernelTester()
42881 .mr(3)
42882 .nr(4)
42883 .kr(8)
42884 .sr(1)
42885 .m(3)
42886 .n(4)
42887 .k(8)
42888 .cm_stride(7)
42889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42890 }
42891 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42892
42893
42894 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)42895 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
42896 GemmMicrokernelTester()
42897 .mr(4)
42898 .nr(4)
42899 .kr(2)
42900 .sr(1)
42901 .m(4)
42902 .n(4)
42903 .k(8)
42904 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42905 }
42906
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)42907 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
42908 GemmMicrokernelTester()
42909 .mr(4)
42910 .nr(4)
42911 .kr(2)
42912 .sr(1)
42913 .m(4)
42914 .n(4)
42915 .k(8)
42916 .cn_stride(7)
42917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42918 }
42919
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)42920 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
42921 for (uint32_t n = 1; n <= 4; n++) {
42922 for (uint32_t m = 1; m <= 4; m++) {
42923 GemmMicrokernelTester()
42924 .mr(4)
42925 .nr(4)
42926 .kr(2)
42927 .sr(1)
42928 .m(m)
42929 .n(n)
42930 .k(8)
42931 .iterations(1)
42932 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42933 }
42934 }
42935 }
42936
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42937 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42938 for (uint32_t m = 1; m <= 4; m++) {
42939 GemmMicrokernelTester()
42940 .mr(4)
42941 .nr(4)
42942 .kr(2)
42943 .sr(1)
42944 .m(m)
42945 .n(4)
42946 .k(8)
42947 .iterations(1)
42948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42949 }
42950 }
42951
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42952 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42953 for (uint32_t n = 1; n <= 4; n++) {
42954 GemmMicrokernelTester()
42955 .mr(4)
42956 .nr(4)
42957 .kr(2)
42958 .sr(1)
42959 .m(4)
42960 .n(n)
42961 .k(8)
42962 .iterations(1)
42963 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42964 }
42965 }
42966
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)42967 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42968 for (size_t k = 1; k < 8; k++) {
42969 GemmMicrokernelTester()
42970 .mr(4)
42971 .nr(4)
42972 .kr(2)
42973 .sr(1)
42974 .m(4)
42975 .n(4)
42976 .k(k)
42977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42978 }
42979 }
42980
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42981 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42982 for (size_t k = 1; k < 8; k++) {
42983 for (uint32_t n = 1; n <= 4; n++) {
42984 for (uint32_t m = 1; m <= 4; m++) {
42985 GemmMicrokernelTester()
42986 .mr(4)
42987 .nr(4)
42988 .kr(2)
42989 .sr(1)
42990 .m(m)
42991 .n(n)
42992 .k(k)
42993 .iterations(1)
42994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42995 }
42996 }
42997 }
42998 }
42999
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)43000 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
43001 for (size_t k = 9; k < 16; k++) {
43002 GemmMicrokernelTester()
43003 .mr(4)
43004 .nr(4)
43005 .kr(2)
43006 .sr(1)
43007 .m(4)
43008 .n(4)
43009 .k(k)
43010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43011 }
43012 }
43013
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)43014 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
43015 for (size_t k = 9; k < 16; k++) {
43016 for (uint32_t n = 1; n <= 4; n++) {
43017 for (uint32_t m = 1; m <= 4; m++) {
43018 GemmMicrokernelTester()
43019 .mr(4)
43020 .nr(4)
43021 .kr(2)
43022 .sr(1)
43023 .m(m)
43024 .n(n)
43025 .k(k)
43026 .iterations(1)
43027 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43028 }
43029 }
43030 }
43031 }
43032
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)43033 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
43034 for (size_t k = 16; k <= 80; k += 8) {
43035 GemmMicrokernelTester()
43036 .mr(4)
43037 .nr(4)
43038 .kr(2)
43039 .sr(1)
43040 .m(4)
43041 .n(4)
43042 .k(k)
43043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43044 }
43045 }
43046
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)43047 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
43048 for (size_t k = 16; k <= 80; k += 8) {
43049 for (uint32_t n = 1; n <= 4; n++) {
43050 for (uint32_t m = 1; m <= 4; m++) {
43051 GemmMicrokernelTester()
43052 .mr(4)
43053 .nr(4)
43054 .kr(2)
43055 .sr(1)
43056 .m(m)
43057 .n(n)
43058 .k(k)
43059 .iterations(1)
43060 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43061 }
43062 }
43063 }
43064 }
43065
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)43066 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
43067 for (uint32_t n = 5; n < 8; n++) {
43068 for (size_t k = 1; k <= 40; k += 9) {
43069 GemmMicrokernelTester()
43070 .mr(4)
43071 .nr(4)
43072 .kr(2)
43073 .sr(1)
43074 .m(4)
43075 .n(n)
43076 .k(k)
43077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43078 }
43079 }
43080 }
43081
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)43082 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
43083 for (uint32_t n = 5; n < 8; n++) {
43084 for (size_t k = 1; k <= 40; k += 9) {
43085 GemmMicrokernelTester()
43086 .mr(4)
43087 .nr(4)
43088 .kr(2)
43089 .sr(1)
43090 .m(4)
43091 .n(n)
43092 .k(k)
43093 .cn_stride(7)
43094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43095 }
43096 }
43097 }
43098
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)43099 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
43100 for (uint32_t n = 5; n < 8; n++) {
43101 for (size_t k = 1; k <= 40; k += 9) {
43102 for (uint32_t m = 1; m <= 4; m++) {
43103 GemmMicrokernelTester()
43104 .mr(4)
43105 .nr(4)
43106 .kr(2)
43107 .sr(1)
43108 .m(m)
43109 .n(n)
43110 .k(k)
43111 .iterations(1)
43112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43113 }
43114 }
43115 }
43116 }
43117
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)43118 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
43119 for (uint32_t n = 8; n <= 12; n += 4) {
43120 for (size_t k = 1; k <= 40; k += 9) {
43121 GemmMicrokernelTester()
43122 .mr(4)
43123 .nr(4)
43124 .kr(2)
43125 .sr(1)
43126 .m(4)
43127 .n(n)
43128 .k(k)
43129 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43130 }
43131 }
43132 }
43133
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)43134 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
43135 for (uint32_t n = 8; n <= 12; n += 4) {
43136 for (size_t k = 1; k <= 40; k += 9) {
43137 GemmMicrokernelTester()
43138 .mr(4)
43139 .nr(4)
43140 .kr(2)
43141 .sr(1)
43142 .m(4)
43143 .n(n)
43144 .k(k)
43145 .cn_stride(7)
43146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43147 }
43148 }
43149 }
43150
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)43151 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
43152 for (uint32_t n = 8; n <= 12; n += 4) {
43153 for (size_t k = 1; k <= 40; k += 9) {
43154 for (uint32_t m = 1; m <= 4; m++) {
43155 GemmMicrokernelTester()
43156 .mr(4)
43157 .nr(4)
43158 .kr(2)
43159 .sr(1)
43160 .m(m)
43161 .n(n)
43162 .k(k)
43163 .iterations(1)
43164 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43165 }
43166 }
43167 }
43168 }
43169
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)43170 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
43171 for (size_t k = 1; k <= 40; k += 9) {
43172 GemmMicrokernelTester()
43173 .mr(4)
43174 .nr(4)
43175 .kr(2)
43176 .sr(1)
43177 .m(4)
43178 .n(4)
43179 .k(k)
43180 .ks(3)
43181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43182 }
43183 }
43184
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)43185 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
43186 for (size_t k = 1; k <= 40; k += 9) {
43187 for (uint32_t n = 1; n <= 4; n++) {
43188 for (uint32_t m = 1; m <= 4; m++) {
43189 GemmMicrokernelTester()
43190 .mr(4)
43191 .nr(4)
43192 .kr(2)
43193 .sr(1)
43194 .m(m)
43195 .n(n)
43196 .k(k)
43197 .ks(3)
43198 .iterations(1)
43199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43200 }
43201 }
43202 }
43203 }
43204
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)43205 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
43206 for (uint32_t n = 5; n < 8; n++) {
43207 for (size_t k = 1; k <= 40; k += 9) {
43208 GemmMicrokernelTester()
43209 .mr(4)
43210 .nr(4)
43211 .kr(2)
43212 .sr(1)
43213 .m(4)
43214 .n(n)
43215 .k(k)
43216 .ks(3)
43217 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43218 }
43219 }
43220 }
43221
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)43222 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
43223 for (uint32_t n = 8; n <= 12; n += 4) {
43224 for (size_t k = 1; k <= 40; k += 9) {
43225 GemmMicrokernelTester()
43226 .mr(4)
43227 .nr(4)
43228 .kr(2)
43229 .sr(1)
43230 .m(4)
43231 .n(n)
43232 .k(k)
43233 .ks(3)
43234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43235 }
43236 }
43237 }
43238
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)43239 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
43240 for (size_t k = 1; k <= 40; k += 9) {
43241 for (uint32_t n = 1; n <= 4; n++) {
43242 for (uint32_t m = 1; m <= 4; m++) {
43243 GemmMicrokernelTester()
43244 .mr(4)
43245 .nr(4)
43246 .kr(2)
43247 .sr(1)
43248 .m(m)
43249 .n(n)
43250 .k(k)
43251 .cm_stride(7)
43252 .iterations(1)
43253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43254 }
43255 }
43256 }
43257 }
43258
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,a_offset)43259 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
43260 for (size_t k = 1; k <= 40; k += 9) {
43261 GemmMicrokernelTester()
43262 .mr(4)
43263 .nr(4)
43264 .kr(2)
43265 .sr(1)
43266 .m(4)
43267 .n(4)
43268 .k(k)
43269 .ks(3)
43270 .a_offset(163)
43271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43272 }
43273 }
43274
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,zero)43275 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, zero) {
43276 for (size_t k = 1; k <= 40; k += 9) {
43277 for (uint32_t mz = 0; mz < 4; mz++) {
43278 GemmMicrokernelTester()
43279 .mr(4)
43280 .nr(4)
43281 .kr(2)
43282 .sr(1)
43283 .m(4)
43284 .n(4)
43285 .k(k)
43286 .ks(3)
43287 .a_offset(163)
43288 .zero_index(mz)
43289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43290 }
43291 }
43292 }
43293
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmin)43294 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
43295 GemmMicrokernelTester()
43296 .mr(4)
43297 .nr(4)
43298 .kr(2)
43299 .sr(1)
43300 .m(4)
43301 .n(4)
43302 .k(8)
43303 .qmin(128)
43304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43305 }
43306
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmax)43307 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
43308 GemmMicrokernelTester()
43309 .mr(4)
43310 .nr(4)
43311 .kr(2)
43312 .sr(1)
43313 .m(4)
43314 .n(4)
43315 .k(8)
43316 .qmax(128)
43317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43318 }
43319
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)43320 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
43321 GemmMicrokernelTester()
43322 .mr(4)
43323 .nr(4)
43324 .kr(2)
43325 .sr(1)
43326 .m(4)
43327 .n(4)
43328 .k(8)
43329 .cm_stride(7)
43330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43331 }
43332 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43333
43334
43335 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)43336 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
43337 GemmMicrokernelTester()
43338 .mr(4)
43339 .nr(4)
43340 .kr(2)
43341 .sr(1)
43342 .m(4)
43343 .n(4)
43344 .k(8)
43345 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43346 }
43347
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)43348 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
43349 GemmMicrokernelTester()
43350 .mr(4)
43351 .nr(4)
43352 .kr(2)
43353 .sr(1)
43354 .m(4)
43355 .n(4)
43356 .k(8)
43357 .cn_stride(7)
43358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43359 }
43360
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)43361 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
43362 for (uint32_t n = 1; n <= 4; n++) {
43363 for (uint32_t m = 1; m <= 4; m++) {
43364 GemmMicrokernelTester()
43365 .mr(4)
43366 .nr(4)
43367 .kr(2)
43368 .sr(1)
43369 .m(m)
43370 .n(n)
43371 .k(8)
43372 .iterations(1)
43373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43374 }
43375 }
43376 }
43377
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)43378 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
43379 for (uint32_t m = 1; m <= 4; m++) {
43380 GemmMicrokernelTester()
43381 .mr(4)
43382 .nr(4)
43383 .kr(2)
43384 .sr(1)
43385 .m(m)
43386 .n(4)
43387 .k(8)
43388 .iterations(1)
43389 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43390 }
43391 }
43392
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)43393 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
43394 for (uint32_t n = 1; n <= 4; n++) {
43395 GemmMicrokernelTester()
43396 .mr(4)
43397 .nr(4)
43398 .kr(2)
43399 .sr(1)
43400 .m(4)
43401 .n(n)
43402 .k(8)
43403 .iterations(1)
43404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43405 }
43406 }
43407
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)43408 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
43409 for (size_t k = 1; k < 8; k++) {
43410 GemmMicrokernelTester()
43411 .mr(4)
43412 .nr(4)
43413 .kr(2)
43414 .sr(1)
43415 .m(4)
43416 .n(4)
43417 .k(k)
43418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43419 }
43420 }
43421
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)43422 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
43423 for (size_t k = 1; k < 8; k++) {
43424 for (uint32_t n = 1; n <= 4; n++) {
43425 for (uint32_t m = 1; m <= 4; m++) {
43426 GemmMicrokernelTester()
43427 .mr(4)
43428 .nr(4)
43429 .kr(2)
43430 .sr(1)
43431 .m(m)
43432 .n(n)
43433 .k(k)
43434 .iterations(1)
43435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43436 }
43437 }
43438 }
43439 }
43440
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)43441 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
43442 for (size_t k = 9; k < 16; k++) {
43443 GemmMicrokernelTester()
43444 .mr(4)
43445 .nr(4)
43446 .kr(2)
43447 .sr(1)
43448 .m(4)
43449 .n(4)
43450 .k(k)
43451 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43452 }
43453 }
43454
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)43455 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
43456 for (size_t k = 9; k < 16; k++) {
43457 for (uint32_t n = 1; n <= 4; n++) {
43458 for (uint32_t m = 1; m <= 4; m++) {
43459 GemmMicrokernelTester()
43460 .mr(4)
43461 .nr(4)
43462 .kr(2)
43463 .sr(1)
43464 .m(m)
43465 .n(n)
43466 .k(k)
43467 .iterations(1)
43468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43469 }
43470 }
43471 }
43472 }
43473
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)43474 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
43475 for (size_t k = 16; k <= 80; k += 8) {
43476 GemmMicrokernelTester()
43477 .mr(4)
43478 .nr(4)
43479 .kr(2)
43480 .sr(1)
43481 .m(4)
43482 .n(4)
43483 .k(k)
43484 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43485 }
43486 }
43487
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)43488 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
43489 for (size_t k = 16; k <= 80; k += 8) {
43490 for (uint32_t n = 1; n <= 4; n++) {
43491 for (uint32_t m = 1; m <= 4; m++) {
43492 GemmMicrokernelTester()
43493 .mr(4)
43494 .nr(4)
43495 .kr(2)
43496 .sr(1)
43497 .m(m)
43498 .n(n)
43499 .k(k)
43500 .iterations(1)
43501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43502 }
43503 }
43504 }
43505 }
43506
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)43507 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
43508 for (uint32_t n = 5; n < 8; n++) {
43509 for (size_t k = 1; k <= 40; k += 9) {
43510 GemmMicrokernelTester()
43511 .mr(4)
43512 .nr(4)
43513 .kr(2)
43514 .sr(1)
43515 .m(4)
43516 .n(n)
43517 .k(k)
43518 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43519 }
43520 }
43521 }
43522
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)43523 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
43524 for (uint32_t n = 5; n < 8; n++) {
43525 for (size_t k = 1; k <= 40; k += 9) {
43526 GemmMicrokernelTester()
43527 .mr(4)
43528 .nr(4)
43529 .kr(2)
43530 .sr(1)
43531 .m(4)
43532 .n(n)
43533 .k(k)
43534 .cn_stride(7)
43535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43536 }
43537 }
43538 }
43539
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)43540 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
43541 for (uint32_t n = 5; n < 8; n++) {
43542 for (size_t k = 1; k <= 40; k += 9) {
43543 for (uint32_t m = 1; m <= 4; m++) {
43544 GemmMicrokernelTester()
43545 .mr(4)
43546 .nr(4)
43547 .kr(2)
43548 .sr(1)
43549 .m(m)
43550 .n(n)
43551 .k(k)
43552 .iterations(1)
43553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43554 }
43555 }
43556 }
43557 }
43558
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)43559 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
43560 for (uint32_t n = 8; n <= 12; n += 4) {
43561 for (size_t k = 1; k <= 40; k += 9) {
43562 GemmMicrokernelTester()
43563 .mr(4)
43564 .nr(4)
43565 .kr(2)
43566 .sr(1)
43567 .m(4)
43568 .n(n)
43569 .k(k)
43570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43571 }
43572 }
43573 }
43574
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)43575 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
43576 for (uint32_t n = 8; n <= 12; n += 4) {
43577 for (size_t k = 1; k <= 40; k += 9) {
43578 GemmMicrokernelTester()
43579 .mr(4)
43580 .nr(4)
43581 .kr(2)
43582 .sr(1)
43583 .m(4)
43584 .n(n)
43585 .k(k)
43586 .cn_stride(7)
43587 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43588 }
43589 }
43590 }
43591
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)43592 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
43593 for (uint32_t n = 8; n <= 12; n += 4) {
43594 for (size_t k = 1; k <= 40; k += 9) {
43595 for (uint32_t m = 1; m <= 4; m++) {
43596 GemmMicrokernelTester()
43597 .mr(4)
43598 .nr(4)
43599 .kr(2)
43600 .sr(1)
43601 .m(m)
43602 .n(n)
43603 .k(k)
43604 .iterations(1)
43605 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43606 }
43607 }
43608 }
43609 }
43610
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)43611 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
43612 for (size_t k = 1; k <= 40; k += 9) {
43613 GemmMicrokernelTester()
43614 .mr(4)
43615 .nr(4)
43616 .kr(2)
43617 .sr(1)
43618 .m(4)
43619 .n(4)
43620 .k(k)
43621 .ks(3)
43622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43623 }
43624 }
43625
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)43626 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
43627 for (size_t k = 1; k <= 40; k += 9) {
43628 for (uint32_t n = 1; n <= 4; n++) {
43629 for (uint32_t m = 1; m <= 4; m++) {
43630 GemmMicrokernelTester()
43631 .mr(4)
43632 .nr(4)
43633 .kr(2)
43634 .sr(1)
43635 .m(m)
43636 .n(n)
43637 .k(k)
43638 .ks(3)
43639 .iterations(1)
43640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43641 }
43642 }
43643 }
43644 }
43645
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)43646 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
43647 for (uint32_t n = 5; n < 8; n++) {
43648 for (size_t k = 1; k <= 40; k += 9) {
43649 GemmMicrokernelTester()
43650 .mr(4)
43651 .nr(4)
43652 .kr(2)
43653 .sr(1)
43654 .m(4)
43655 .n(n)
43656 .k(k)
43657 .ks(3)
43658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43659 }
43660 }
43661 }
43662
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)43663 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
43664 for (uint32_t n = 8; n <= 12; n += 4) {
43665 for (size_t k = 1; k <= 40; k += 9) {
43666 GemmMicrokernelTester()
43667 .mr(4)
43668 .nr(4)
43669 .kr(2)
43670 .sr(1)
43671 .m(4)
43672 .n(n)
43673 .k(k)
43674 .ks(3)
43675 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43676 }
43677 }
43678 }
43679
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)43680 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
43681 for (size_t k = 1; k <= 40; k += 9) {
43682 for (uint32_t n = 1; n <= 4; n++) {
43683 for (uint32_t m = 1; m <= 4; m++) {
43684 GemmMicrokernelTester()
43685 .mr(4)
43686 .nr(4)
43687 .kr(2)
43688 .sr(1)
43689 .m(m)
43690 .n(n)
43691 .k(k)
43692 .cm_stride(7)
43693 .iterations(1)
43694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43695 }
43696 }
43697 }
43698 }
43699
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,a_offset)43700 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
43701 for (size_t k = 1; k <= 40; k += 9) {
43702 GemmMicrokernelTester()
43703 .mr(4)
43704 .nr(4)
43705 .kr(2)
43706 .sr(1)
43707 .m(4)
43708 .n(4)
43709 .k(k)
43710 .ks(3)
43711 .a_offset(163)
43712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43713 }
43714 }
43715
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,zero)43716 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, zero) {
43717 for (size_t k = 1; k <= 40; k += 9) {
43718 for (uint32_t mz = 0; mz < 4; mz++) {
43719 GemmMicrokernelTester()
43720 .mr(4)
43721 .nr(4)
43722 .kr(2)
43723 .sr(1)
43724 .m(4)
43725 .n(4)
43726 .k(k)
43727 .ks(3)
43728 .a_offset(163)
43729 .zero_index(mz)
43730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43731 }
43732 }
43733 }
43734
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmin)43735 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
43736 GemmMicrokernelTester()
43737 .mr(4)
43738 .nr(4)
43739 .kr(2)
43740 .sr(1)
43741 .m(4)
43742 .n(4)
43743 .k(8)
43744 .qmin(128)
43745 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43746 }
43747
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmax)43748 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
43749 GemmMicrokernelTester()
43750 .mr(4)
43751 .nr(4)
43752 .kr(2)
43753 .sr(1)
43754 .m(4)
43755 .n(4)
43756 .k(8)
43757 .qmax(128)
43758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43759 }
43760
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)43761 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
43762 GemmMicrokernelTester()
43763 .mr(4)
43764 .nr(4)
43765 .kr(2)
43766 .sr(1)
43767 .m(4)
43768 .n(4)
43769 .k(8)
43770 .cm_stride(7)
43771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43772 }
43773 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43774
43775
43776 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)43777 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
43778 GemmMicrokernelTester()
43779 .mr(4)
43780 .nr(4)
43781 .kr(2)
43782 .sr(4)
43783 .m(4)
43784 .n(4)
43785 .k(8)
43786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43787 }
43788
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)43789 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
43790 GemmMicrokernelTester()
43791 .mr(4)
43792 .nr(4)
43793 .kr(2)
43794 .sr(4)
43795 .m(4)
43796 .n(4)
43797 .k(8)
43798 .cn_stride(7)
43799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43800 }
43801
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)43802 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
43803 for (uint32_t n = 1; n <= 4; n++) {
43804 for (uint32_t m = 1; m <= 4; m++) {
43805 GemmMicrokernelTester()
43806 .mr(4)
43807 .nr(4)
43808 .kr(2)
43809 .sr(4)
43810 .m(m)
43811 .n(n)
43812 .k(8)
43813 .iterations(1)
43814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43815 }
43816 }
43817 }
43818
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)43819 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
43820 for (uint32_t m = 1; m <= 4; m++) {
43821 GemmMicrokernelTester()
43822 .mr(4)
43823 .nr(4)
43824 .kr(2)
43825 .sr(4)
43826 .m(m)
43827 .n(4)
43828 .k(8)
43829 .iterations(1)
43830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43831 }
43832 }
43833
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)43834 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
43835 for (uint32_t n = 1; n <= 4; n++) {
43836 GemmMicrokernelTester()
43837 .mr(4)
43838 .nr(4)
43839 .kr(2)
43840 .sr(4)
43841 .m(4)
43842 .n(n)
43843 .k(8)
43844 .iterations(1)
43845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43846 }
43847 }
43848
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)43849 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
43850 for (size_t k = 1; k < 8; k++) {
43851 GemmMicrokernelTester()
43852 .mr(4)
43853 .nr(4)
43854 .kr(2)
43855 .sr(4)
43856 .m(4)
43857 .n(4)
43858 .k(k)
43859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43860 }
43861 }
43862
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)43863 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
43864 for (size_t k = 1; k < 8; k++) {
43865 for (uint32_t n = 1; n <= 4; n++) {
43866 for (uint32_t m = 1; m <= 4; m++) {
43867 GemmMicrokernelTester()
43868 .mr(4)
43869 .nr(4)
43870 .kr(2)
43871 .sr(4)
43872 .m(m)
43873 .n(n)
43874 .k(k)
43875 .iterations(1)
43876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43877 }
43878 }
43879 }
43880 }
43881
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)43882 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
43883 for (size_t k = 9; k < 16; k++) {
43884 GemmMicrokernelTester()
43885 .mr(4)
43886 .nr(4)
43887 .kr(2)
43888 .sr(4)
43889 .m(4)
43890 .n(4)
43891 .k(k)
43892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43893 }
43894 }
43895
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)43896 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
43897 for (size_t k = 9; k < 16; k++) {
43898 for (uint32_t n = 1; n <= 4; n++) {
43899 for (uint32_t m = 1; m <= 4; m++) {
43900 GemmMicrokernelTester()
43901 .mr(4)
43902 .nr(4)
43903 .kr(2)
43904 .sr(4)
43905 .m(m)
43906 .n(n)
43907 .k(k)
43908 .iterations(1)
43909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43910 }
43911 }
43912 }
43913 }
43914
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)43915 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
43916 for (size_t k = 16; k <= 80; k += 8) {
43917 GemmMicrokernelTester()
43918 .mr(4)
43919 .nr(4)
43920 .kr(2)
43921 .sr(4)
43922 .m(4)
43923 .n(4)
43924 .k(k)
43925 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43926 }
43927 }
43928
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)43929 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
43930 for (size_t k = 16; k <= 80; k += 8) {
43931 for (uint32_t n = 1; n <= 4; n++) {
43932 for (uint32_t m = 1; m <= 4; m++) {
43933 GemmMicrokernelTester()
43934 .mr(4)
43935 .nr(4)
43936 .kr(2)
43937 .sr(4)
43938 .m(m)
43939 .n(n)
43940 .k(k)
43941 .iterations(1)
43942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43943 }
43944 }
43945 }
43946 }
43947
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)43948 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
43949 for (uint32_t n = 5; n < 8; n++) {
43950 for (size_t k = 1; k <= 40; k += 9) {
43951 GemmMicrokernelTester()
43952 .mr(4)
43953 .nr(4)
43954 .kr(2)
43955 .sr(4)
43956 .m(4)
43957 .n(n)
43958 .k(k)
43959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43960 }
43961 }
43962 }
43963
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)43964 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
43965 for (uint32_t n = 5; n < 8; n++) {
43966 for (size_t k = 1; k <= 40; k += 9) {
43967 GemmMicrokernelTester()
43968 .mr(4)
43969 .nr(4)
43970 .kr(2)
43971 .sr(4)
43972 .m(4)
43973 .n(n)
43974 .k(k)
43975 .cn_stride(7)
43976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43977 }
43978 }
43979 }
43980
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)43981 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
43982 for (uint32_t n = 5; n < 8; n++) {
43983 for (size_t k = 1; k <= 40; k += 9) {
43984 for (uint32_t m = 1; m <= 4; m++) {
43985 GemmMicrokernelTester()
43986 .mr(4)
43987 .nr(4)
43988 .kr(2)
43989 .sr(4)
43990 .m(m)
43991 .n(n)
43992 .k(k)
43993 .iterations(1)
43994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43995 }
43996 }
43997 }
43998 }
43999
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)44000 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
44001 for (uint32_t n = 8; n <= 12; n += 4) {
44002 for (size_t k = 1; k <= 40; k += 9) {
44003 GemmMicrokernelTester()
44004 .mr(4)
44005 .nr(4)
44006 .kr(2)
44007 .sr(4)
44008 .m(4)
44009 .n(n)
44010 .k(k)
44011 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44012 }
44013 }
44014 }
44015
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)44016 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
44017 for (uint32_t n = 8; n <= 12; n += 4) {
44018 for (size_t k = 1; k <= 40; k += 9) {
44019 GemmMicrokernelTester()
44020 .mr(4)
44021 .nr(4)
44022 .kr(2)
44023 .sr(4)
44024 .m(4)
44025 .n(n)
44026 .k(k)
44027 .cn_stride(7)
44028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44029 }
44030 }
44031 }
44032
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)44033 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
44034 for (uint32_t n = 8; n <= 12; n += 4) {
44035 for (size_t k = 1; k <= 40; k += 9) {
44036 for (uint32_t m = 1; m <= 4; m++) {
44037 GemmMicrokernelTester()
44038 .mr(4)
44039 .nr(4)
44040 .kr(2)
44041 .sr(4)
44042 .m(m)
44043 .n(n)
44044 .k(k)
44045 .iterations(1)
44046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44047 }
44048 }
44049 }
44050 }
44051
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)44052 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
44053 for (size_t k = 1; k <= 40; k += 9) {
44054 GemmMicrokernelTester()
44055 .mr(4)
44056 .nr(4)
44057 .kr(2)
44058 .sr(4)
44059 .m(4)
44060 .n(4)
44061 .k(k)
44062 .ks(3)
44063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44064 }
44065 }
44066
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)44067 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
44068 for (size_t k = 1; k <= 40; k += 9) {
44069 for (uint32_t n = 1; n <= 4; n++) {
44070 for (uint32_t m = 1; m <= 4; m++) {
44071 GemmMicrokernelTester()
44072 .mr(4)
44073 .nr(4)
44074 .kr(2)
44075 .sr(4)
44076 .m(m)
44077 .n(n)
44078 .k(k)
44079 .ks(3)
44080 .iterations(1)
44081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44082 }
44083 }
44084 }
44085 }
44086
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)44087 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
44088 for (uint32_t n = 5; n < 8; n++) {
44089 for (size_t k = 1; k <= 40; k += 9) {
44090 GemmMicrokernelTester()
44091 .mr(4)
44092 .nr(4)
44093 .kr(2)
44094 .sr(4)
44095 .m(4)
44096 .n(n)
44097 .k(k)
44098 .ks(3)
44099 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44100 }
44101 }
44102 }
44103
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)44104 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
44105 for (uint32_t n = 8; n <= 12; n += 4) {
44106 for (size_t k = 1; k <= 40; k += 9) {
44107 GemmMicrokernelTester()
44108 .mr(4)
44109 .nr(4)
44110 .kr(2)
44111 .sr(4)
44112 .m(4)
44113 .n(n)
44114 .k(k)
44115 .ks(3)
44116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44117 }
44118 }
44119 }
44120
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)44121 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
44122 for (size_t k = 1; k <= 40; k += 9) {
44123 for (uint32_t n = 1; n <= 4; n++) {
44124 for (uint32_t m = 1; m <= 4; m++) {
44125 GemmMicrokernelTester()
44126 .mr(4)
44127 .nr(4)
44128 .kr(2)
44129 .sr(4)
44130 .m(m)
44131 .n(n)
44132 .k(k)
44133 .cm_stride(7)
44134 .iterations(1)
44135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44136 }
44137 }
44138 }
44139 }
44140
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)44141 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
44142 for (size_t k = 1; k <= 40; k += 9) {
44143 GemmMicrokernelTester()
44144 .mr(4)
44145 .nr(4)
44146 .kr(2)
44147 .sr(4)
44148 .m(4)
44149 .n(4)
44150 .k(k)
44151 .ks(3)
44152 .a_offset(163)
44153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44154 }
44155 }
44156
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,zero)44157 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
44158 for (size_t k = 1; k <= 40; k += 9) {
44159 for (uint32_t mz = 0; mz < 4; mz++) {
44160 GemmMicrokernelTester()
44161 .mr(4)
44162 .nr(4)
44163 .kr(2)
44164 .sr(4)
44165 .m(4)
44166 .n(4)
44167 .k(k)
44168 .ks(3)
44169 .a_offset(163)
44170 .zero_index(mz)
44171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44172 }
44173 }
44174 }
44175
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)44176 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
44177 GemmMicrokernelTester()
44178 .mr(4)
44179 .nr(4)
44180 .kr(2)
44181 .sr(4)
44182 .m(4)
44183 .n(4)
44184 .k(8)
44185 .qmin(128)
44186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44187 }
44188
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)44189 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
44190 GemmMicrokernelTester()
44191 .mr(4)
44192 .nr(4)
44193 .kr(2)
44194 .sr(4)
44195 .m(4)
44196 .n(4)
44197 .k(8)
44198 .qmax(128)
44199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44200 }
44201
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)44202 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
44203 GemmMicrokernelTester()
44204 .mr(4)
44205 .nr(4)
44206 .kr(2)
44207 .sr(4)
44208 .m(4)
44209 .n(4)
44210 .k(8)
44211 .cm_stride(7)
44212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44213 }
44214 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44215
44216
44217 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)44218 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
44219 GemmMicrokernelTester()
44220 .mr(4)
44221 .nr(4)
44222 .kr(2)
44223 .sr(4)
44224 .m(4)
44225 .n(4)
44226 .k(8)
44227 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44228 }
44229
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)44230 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
44231 GemmMicrokernelTester()
44232 .mr(4)
44233 .nr(4)
44234 .kr(2)
44235 .sr(4)
44236 .m(4)
44237 .n(4)
44238 .k(8)
44239 .cn_stride(7)
44240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44241 }
44242
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)44243 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
44244 for (uint32_t n = 1; n <= 4; n++) {
44245 for (uint32_t m = 1; m <= 4; m++) {
44246 GemmMicrokernelTester()
44247 .mr(4)
44248 .nr(4)
44249 .kr(2)
44250 .sr(4)
44251 .m(m)
44252 .n(n)
44253 .k(8)
44254 .iterations(1)
44255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44256 }
44257 }
44258 }
44259
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)44260 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
44261 for (uint32_t m = 1; m <= 4; m++) {
44262 GemmMicrokernelTester()
44263 .mr(4)
44264 .nr(4)
44265 .kr(2)
44266 .sr(4)
44267 .m(m)
44268 .n(4)
44269 .k(8)
44270 .iterations(1)
44271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44272 }
44273 }
44274
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)44275 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
44276 for (uint32_t n = 1; n <= 4; n++) {
44277 GemmMicrokernelTester()
44278 .mr(4)
44279 .nr(4)
44280 .kr(2)
44281 .sr(4)
44282 .m(4)
44283 .n(n)
44284 .k(8)
44285 .iterations(1)
44286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44287 }
44288 }
44289
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)44290 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
44291 for (size_t k = 1; k < 8; k++) {
44292 GemmMicrokernelTester()
44293 .mr(4)
44294 .nr(4)
44295 .kr(2)
44296 .sr(4)
44297 .m(4)
44298 .n(4)
44299 .k(k)
44300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44301 }
44302 }
44303
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)44304 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
44305 for (size_t k = 1; k < 8; k++) {
44306 for (uint32_t n = 1; n <= 4; n++) {
44307 for (uint32_t m = 1; m <= 4; m++) {
44308 GemmMicrokernelTester()
44309 .mr(4)
44310 .nr(4)
44311 .kr(2)
44312 .sr(4)
44313 .m(m)
44314 .n(n)
44315 .k(k)
44316 .iterations(1)
44317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44318 }
44319 }
44320 }
44321 }
44322
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)44323 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
44324 for (size_t k = 9; k < 16; k++) {
44325 GemmMicrokernelTester()
44326 .mr(4)
44327 .nr(4)
44328 .kr(2)
44329 .sr(4)
44330 .m(4)
44331 .n(4)
44332 .k(k)
44333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44334 }
44335 }
44336
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)44337 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
44338 for (size_t k = 9; k < 16; k++) {
44339 for (uint32_t n = 1; n <= 4; n++) {
44340 for (uint32_t m = 1; m <= 4; m++) {
44341 GemmMicrokernelTester()
44342 .mr(4)
44343 .nr(4)
44344 .kr(2)
44345 .sr(4)
44346 .m(m)
44347 .n(n)
44348 .k(k)
44349 .iterations(1)
44350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44351 }
44352 }
44353 }
44354 }
44355
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)44356 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
44357 for (size_t k = 16; k <= 80; k += 8) {
44358 GemmMicrokernelTester()
44359 .mr(4)
44360 .nr(4)
44361 .kr(2)
44362 .sr(4)
44363 .m(4)
44364 .n(4)
44365 .k(k)
44366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44367 }
44368 }
44369
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)44370 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
44371 for (size_t k = 16; k <= 80; k += 8) {
44372 for (uint32_t n = 1; n <= 4; n++) {
44373 for (uint32_t m = 1; m <= 4; m++) {
44374 GemmMicrokernelTester()
44375 .mr(4)
44376 .nr(4)
44377 .kr(2)
44378 .sr(4)
44379 .m(m)
44380 .n(n)
44381 .k(k)
44382 .iterations(1)
44383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44384 }
44385 }
44386 }
44387 }
44388
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)44389 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
44390 for (uint32_t n = 5; n < 8; n++) {
44391 for (size_t k = 1; k <= 40; k += 9) {
44392 GemmMicrokernelTester()
44393 .mr(4)
44394 .nr(4)
44395 .kr(2)
44396 .sr(4)
44397 .m(4)
44398 .n(n)
44399 .k(k)
44400 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44401 }
44402 }
44403 }
44404
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)44405 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
44406 for (uint32_t n = 5; n < 8; n++) {
44407 for (size_t k = 1; k <= 40; k += 9) {
44408 GemmMicrokernelTester()
44409 .mr(4)
44410 .nr(4)
44411 .kr(2)
44412 .sr(4)
44413 .m(4)
44414 .n(n)
44415 .k(k)
44416 .cn_stride(7)
44417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44418 }
44419 }
44420 }
44421
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)44422 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
44423 for (uint32_t n = 5; n < 8; n++) {
44424 for (size_t k = 1; k <= 40; k += 9) {
44425 for (uint32_t m = 1; m <= 4; m++) {
44426 GemmMicrokernelTester()
44427 .mr(4)
44428 .nr(4)
44429 .kr(2)
44430 .sr(4)
44431 .m(m)
44432 .n(n)
44433 .k(k)
44434 .iterations(1)
44435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44436 }
44437 }
44438 }
44439 }
44440
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)44441 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
44442 for (uint32_t n = 8; n <= 12; n += 4) {
44443 for (size_t k = 1; k <= 40; k += 9) {
44444 GemmMicrokernelTester()
44445 .mr(4)
44446 .nr(4)
44447 .kr(2)
44448 .sr(4)
44449 .m(4)
44450 .n(n)
44451 .k(k)
44452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44453 }
44454 }
44455 }
44456
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)44457 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
44458 for (uint32_t n = 8; n <= 12; n += 4) {
44459 for (size_t k = 1; k <= 40; k += 9) {
44460 GemmMicrokernelTester()
44461 .mr(4)
44462 .nr(4)
44463 .kr(2)
44464 .sr(4)
44465 .m(4)
44466 .n(n)
44467 .k(k)
44468 .cn_stride(7)
44469 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44470 }
44471 }
44472 }
44473
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)44474 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
44475 for (uint32_t n = 8; n <= 12; n += 4) {
44476 for (size_t k = 1; k <= 40; k += 9) {
44477 for (uint32_t m = 1; m <= 4; m++) {
44478 GemmMicrokernelTester()
44479 .mr(4)
44480 .nr(4)
44481 .kr(2)
44482 .sr(4)
44483 .m(m)
44484 .n(n)
44485 .k(k)
44486 .iterations(1)
44487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44488 }
44489 }
44490 }
44491 }
44492
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)44493 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
44494 for (size_t k = 1; k <= 40; k += 9) {
44495 GemmMicrokernelTester()
44496 .mr(4)
44497 .nr(4)
44498 .kr(2)
44499 .sr(4)
44500 .m(4)
44501 .n(4)
44502 .k(k)
44503 .ks(3)
44504 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44505 }
44506 }
44507
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)44508 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
44509 for (size_t k = 1; k <= 40; k += 9) {
44510 for (uint32_t n = 1; n <= 4; n++) {
44511 for (uint32_t m = 1; m <= 4; m++) {
44512 GemmMicrokernelTester()
44513 .mr(4)
44514 .nr(4)
44515 .kr(2)
44516 .sr(4)
44517 .m(m)
44518 .n(n)
44519 .k(k)
44520 .ks(3)
44521 .iterations(1)
44522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44523 }
44524 }
44525 }
44526 }
44527
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)44528 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
44529 for (uint32_t n = 5; n < 8; n++) {
44530 for (size_t k = 1; k <= 40; k += 9) {
44531 GemmMicrokernelTester()
44532 .mr(4)
44533 .nr(4)
44534 .kr(2)
44535 .sr(4)
44536 .m(4)
44537 .n(n)
44538 .k(k)
44539 .ks(3)
44540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44541 }
44542 }
44543 }
44544
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)44545 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
44546 for (uint32_t n = 8; n <= 12; n += 4) {
44547 for (size_t k = 1; k <= 40; k += 9) {
44548 GemmMicrokernelTester()
44549 .mr(4)
44550 .nr(4)
44551 .kr(2)
44552 .sr(4)
44553 .m(4)
44554 .n(n)
44555 .k(k)
44556 .ks(3)
44557 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44558 }
44559 }
44560 }
44561
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)44562 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
44563 for (size_t k = 1; k <= 40; k += 9) {
44564 for (uint32_t n = 1; n <= 4; n++) {
44565 for (uint32_t m = 1; m <= 4; m++) {
44566 GemmMicrokernelTester()
44567 .mr(4)
44568 .nr(4)
44569 .kr(2)
44570 .sr(4)
44571 .m(m)
44572 .n(n)
44573 .k(k)
44574 .cm_stride(7)
44575 .iterations(1)
44576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44577 }
44578 }
44579 }
44580 }
44581
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)44582 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
44583 for (size_t k = 1; k <= 40; k += 9) {
44584 GemmMicrokernelTester()
44585 .mr(4)
44586 .nr(4)
44587 .kr(2)
44588 .sr(4)
44589 .m(4)
44590 .n(4)
44591 .k(k)
44592 .ks(3)
44593 .a_offset(163)
44594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44595 }
44596 }
44597
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,zero)44598 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
44599 for (size_t k = 1; k <= 40; k += 9) {
44600 for (uint32_t mz = 0; mz < 4; mz++) {
44601 GemmMicrokernelTester()
44602 .mr(4)
44603 .nr(4)
44604 .kr(2)
44605 .sr(4)
44606 .m(4)
44607 .n(4)
44608 .k(k)
44609 .ks(3)
44610 .a_offset(163)
44611 .zero_index(mz)
44612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44613 }
44614 }
44615 }
44616
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)44617 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
44618 GemmMicrokernelTester()
44619 .mr(4)
44620 .nr(4)
44621 .kr(2)
44622 .sr(4)
44623 .m(4)
44624 .n(4)
44625 .k(8)
44626 .qmin(128)
44627 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44628 }
44629
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)44630 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
44631 GemmMicrokernelTester()
44632 .mr(4)
44633 .nr(4)
44634 .kr(2)
44635 .sr(4)
44636 .m(4)
44637 .n(4)
44638 .k(8)
44639 .qmax(128)
44640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44641 }
44642
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)44643 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
44644 GemmMicrokernelTester()
44645 .mr(4)
44646 .nr(4)
44647 .kr(2)
44648 .sr(4)
44649 .m(4)
44650 .n(4)
44651 .k(8)
44652 .cm_stride(7)
44653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44654 }
44655 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44656
44657
44658 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)44659 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
44660 GemmMicrokernelTester()
44661 .mr(4)
44662 .nr(4)
44663 .kr(8)
44664 .sr(1)
44665 .m(4)
44666 .n(4)
44667 .k(8)
44668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44669 }
44670
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)44671 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
44672 GemmMicrokernelTester()
44673 .mr(4)
44674 .nr(4)
44675 .kr(8)
44676 .sr(1)
44677 .m(4)
44678 .n(4)
44679 .k(8)
44680 .cn_stride(7)
44681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44682 }
44683
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)44684 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
44685 for (uint32_t n = 1; n <= 4; n++) {
44686 for (uint32_t m = 1; m <= 4; m++) {
44687 GemmMicrokernelTester()
44688 .mr(4)
44689 .nr(4)
44690 .kr(8)
44691 .sr(1)
44692 .m(m)
44693 .n(n)
44694 .k(8)
44695 .iterations(1)
44696 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44697 }
44698 }
44699 }
44700
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)44701 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
44702 for (uint32_t m = 1; m <= 4; m++) {
44703 GemmMicrokernelTester()
44704 .mr(4)
44705 .nr(4)
44706 .kr(8)
44707 .sr(1)
44708 .m(m)
44709 .n(4)
44710 .k(8)
44711 .iterations(1)
44712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44713 }
44714 }
44715
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)44716 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
44717 for (uint32_t n = 1; n <= 4; n++) {
44718 GemmMicrokernelTester()
44719 .mr(4)
44720 .nr(4)
44721 .kr(8)
44722 .sr(1)
44723 .m(4)
44724 .n(n)
44725 .k(8)
44726 .iterations(1)
44727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44728 }
44729 }
44730
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)44731 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
44732 for (size_t k = 1; k < 8; k++) {
44733 GemmMicrokernelTester()
44734 .mr(4)
44735 .nr(4)
44736 .kr(8)
44737 .sr(1)
44738 .m(4)
44739 .n(4)
44740 .k(k)
44741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44742 }
44743 }
44744
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)44745 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
44746 for (size_t k = 1; k < 8; k++) {
44747 for (uint32_t n = 1; n <= 4; n++) {
44748 for (uint32_t m = 1; m <= 4; m++) {
44749 GemmMicrokernelTester()
44750 .mr(4)
44751 .nr(4)
44752 .kr(8)
44753 .sr(1)
44754 .m(m)
44755 .n(n)
44756 .k(k)
44757 .iterations(1)
44758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44759 }
44760 }
44761 }
44762 }
44763
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)44764 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
44765 for (size_t k = 9; k < 16; k++) {
44766 GemmMicrokernelTester()
44767 .mr(4)
44768 .nr(4)
44769 .kr(8)
44770 .sr(1)
44771 .m(4)
44772 .n(4)
44773 .k(k)
44774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44775 }
44776 }
44777
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)44778 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
44779 for (size_t k = 9; k < 16; k++) {
44780 for (uint32_t n = 1; n <= 4; n++) {
44781 for (uint32_t m = 1; m <= 4; m++) {
44782 GemmMicrokernelTester()
44783 .mr(4)
44784 .nr(4)
44785 .kr(8)
44786 .sr(1)
44787 .m(m)
44788 .n(n)
44789 .k(k)
44790 .iterations(1)
44791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44792 }
44793 }
44794 }
44795 }
44796
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)44797 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
44798 for (size_t k = 16; k <= 80; k += 8) {
44799 GemmMicrokernelTester()
44800 .mr(4)
44801 .nr(4)
44802 .kr(8)
44803 .sr(1)
44804 .m(4)
44805 .n(4)
44806 .k(k)
44807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44808 }
44809 }
44810
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)44811 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
44812 for (size_t k = 16; k <= 80; k += 8) {
44813 for (uint32_t n = 1; n <= 4; n++) {
44814 for (uint32_t m = 1; m <= 4; m++) {
44815 GemmMicrokernelTester()
44816 .mr(4)
44817 .nr(4)
44818 .kr(8)
44819 .sr(1)
44820 .m(m)
44821 .n(n)
44822 .k(k)
44823 .iterations(1)
44824 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44825 }
44826 }
44827 }
44828 }
44829
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)44830 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
44831 for (uint32_t n = 5; n < 8; n++) {
44832 for (size_t k = 1; k <= 40; k += 9) {
44833 GemmMicrokernelTester()
44834 .mr(4)
44835 .nr(4)
44836 .kr(8)
44837 .sr(1)
44838 .m(4)
44839 .n(n)
44840 .k(k)
44841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44842 }
44843 }
44844 }
44845
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)44846 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
44847 for (uint32_t n = 5; n < 8; n++) {
44848 for (size_t k = 1; k <= 40; k += 9) {
44849 GemmMicrokernelTester()
44850 .mr(4)
44851 .nr(4)
44852 .kr(8)
44853 .sr(1)
44854 .m(4)
44855 .n(n)
44856 .k(k)
44857 .cn_stride(7)
44858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44859 }
44860 }
44861 }
44862
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)44863 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
44864 for (uint32_t n = 5; n < 8; n++) {
44865 for (size_t k = 1; k <= 40; k += 9) {
44866 for (uint32_t m = 1; m <= 4; m++) {
44867 GemmMicrokernelTester()
44868 .mr(4)
44869 .nr(4)
44870 .kr(8)
44871 .sr(1)
44872 .m(m)
44873 .n(n)
44874 .k(k)
44875 .iterations(1)
44876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44877 }
44878 }
44879 }
44880 }
44881
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)44882 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
44883 for (uint32_t n = 8; n <= 12; n += 4) {
44884 for (size_t k = 1; k <= 40; k += 9) {
44885 GemmMicrokernelTester()
44886 .mr(4)
44887 .nr(4)
44888 .kr(8)
44889 .sr(1)
44890 .m(4)
44891 .n(n)
44892 .k(k)
44893 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44894 }
44895 }
44896 }
44897
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)44898 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
44899 for (uint32_t n = 8; n <= 12; n += 4) {
44900 for (size_t k = 1; k <= 40; k += 9) {
44901 GemmMicrokernelTester()
44902 .mr(4)
44903 .nr(4)
44904 .kr(8)
44905 .sr(1)
44906 .m(4)
44907 .n(n)
44908 .k(k)
44909 .cn_stride(7)
44910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44911 }
44912 }
44913 }
44914
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)44915 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
44916 for (uint32_t n = 8; n <= 12; n += 4) {
44917 for (size_t k = 1; k <= 40; k += 9) {
44918 for (uint32_t m = 1; m <= 4; m++) {
44919 GemmMicrokernelTester()
44920 .mr(4)
44921 .nr(4)
44922 .kr(8)
44923 .sr(1)
44924 .m(m)
44925 .n(n)
44926 .k(k)
44927 .iterations(1)
44928 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44929 }
44930 }
44931 }
44932 }
44933
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)44934 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
44935 for (size_t k = 1; k <= 40; k += 9) {
44936 GemmMicrokernelTester()
44937 .mr(4)
44938 .nr(4)
44939 .kr(8)
44940 .sr(1)
44941 .m(4)
44942 .n(4)
44943 .k(k)
44944 .ks(3)
44945 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44946 }
44947 }
44948
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)44949 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
44950 for (size_t k = 1; k <= 40; k += 9) {
44951 for (uint32_t n = 1; n <= 4; n++) {
44952 for (uint32_t m = 1; m <= 4; m++) {
44953 GemmMicrokernelTester()
44954 .mr(4)
44955 .nr(4)
44956 .kr(8)
44957 .sr(1)
44958 .m(m)
44959 .n(n)
44960 .k(k)
44961 .ks(3)
44962 .iterations(1)
44963 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44964 }
44965 }
44966 }
44967 }
44968
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)44969 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
44970 for (uint32_t n = 5; n < 8; n++) {
44971 for (size_t k = 1; k <= 40; k += 9) {
44972 GemmMicrokernelTester()
44973 .mr(4)
44974 .nr(4)
44975 .kr(8)
44976 .sr(1)
44977 .m(4)
44978 .n(n)
44979 .k(k)
44980 .ks(3)
44981 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44982 }
44983 }
44984 }
44985
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)44986 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
44987 for (uint32_t n = 8; n <= 12; n += 4) {
44988 for (size_t k = 1; k <= 40; k += 9) {
44989 GemmMicrokernelTester()
44990 .mr(4)
44991 .nr(4)
44992 .kr(8)
44993 .sr(1)
44994 .m(4)
44995 .n(n)
44996 .k(k)
44997 .ks(3)
44998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44999 }
45000 }
45001 }
45002
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)45003 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
45004 for (size_t k = 1; k <= 40; k += 9) {
45005 for (uint32_t n = 1; n <= 4; n++) {
45006 for (uint32_t m = 1; m <= 4; m++) {
45007 GemmMicrokernelTester()
45008 .mr(4)
45009 .nr(4)
45010 .kr(8)
45011 .sr(1)
45012 .m(m)
45013 .n(n)
45014 .k(k)
45015 .cm_stride(7)
45016 .iterations(1)
45017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45018 }
45019 }
45020 }
45021 }
45022
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,a_offset)45023 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
45024 for (size_t k = 1; k <= 40; k += 9) {
45025 GemmMicrokernelTester()
45026 .mr(4)
45027 .nr(4)
45028 .kr(8)
45029 .sr(1)
45030 .m(4)
45031 .n(4)
45032 .k(k)
45033 .ks(3)
45034 .a_offset(163)
45035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45036 }
45037 }
45038
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,zero)45039 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, zero) {
45040 for (size_t k = 1; k <= 40; k += 9) {
45041 for (uint32_t mz = 0; mz < 4; mz++) {
45042 GemmMicrokernelTester()
45043 .mr(4)
45044 .nr(4)
45045 .kr(8)
45046 .sr(1)
45047 .m(4)
45048 .n(4)
45049 .k(k)
45050 .ks(3)
45051 .a_offset(163)
45052 .zero_index(mz)
45053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45054 }
45055 }
45056 }
45057
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmin)45058 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
45059 GemmMicrokernelTester()
45060 .mr(4)
45061 .nr(4)
45062 .kr(8)
45063 .sr(1)
45064 .m(4)
45065 .n(4)
45066 .k(8)
45067 .qmin(128)
45068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45069 }
45070
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmax)45071 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
45072 GemmMicrokernelTester()
45073 .mr(4)
45074 .nr(4)
45075 .kr(8)
45076 .sr(1)
45077 .m(4)
45078 .n(4)
45079 .k(8)
45080 .qmax(128)
45081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45082 }
45083
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)45084 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
45085 GemmMicrokernelTester()
45086 .mr(4)
45087 .nr(4)
45088 .kr(8)
45089 .sr(1)
45090 .m(4)
45091 .n(4)
45092 .k(8)
45093 .cm_stride(7)
45094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45095 }
45096 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45097
45098
45099 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1)45100 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1) {
45101 GemmMicrokernelTester()
45102 .mr(3)
45103 .nr(2)
45104 .kr(1)
45105 .sr(1)
45106 .m(3)
45107 .n(2)
45108 .k(1)
45109 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45110 }
45111
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cn)45112 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cn) {
45113 GemmMicrokernelTester()
45114 .mr(3)
45115 .nr(2)
45116 .kr(1)
45117 .sr(1)
45118 .m(3)
45119 .n(2)
45120 .k(1)
45121 .cn_stride(5)
45122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45123 }
45124
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile)45125 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile) {
45126 for (uint32_t n = 1; n <= 2; n++) {
45127 for (uint32_t m = 1; m <= 3; m++) {
45128 GemmMicrokernelTester()
45129 .mr(3)
45130 .nr(2)
45131 .kr(1)
45132 .sr(1)
45133 .m(m)
45134 .n(n)
45135 .k(1)
45136 .iterations(1)
45137 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45138 }
45139 }
45140 }
45141
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_m)45142 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_m) {
45143 for (uint32_t m = 1; m <= 3; m++) {
45144 GemmMicrokernelTester()
45145 .mr(3)
45146 .nr(2)
45147 .kr(1)
45148 .sr(1)
45149 .m(m)
45150 .n(2)
45151 .k(1)
45152 .iterations(1)
45153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45154 }
45155 }
45156
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_n)45157 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_n) {
45158 for (uint32_t n = 1; n <= 2; n++) {
45159 GemmMicrokernelTester()
45160 .mr(3)
45161 .nr(2)
45162 .kr(1)
45163 .sr(1)
45164 .m(3)
45165 .n(n)
45166 .k(1)
45167 .iterations(1)
45168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45169 }
45170 }
45171
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1)45172 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1) {
45173 for (size_t k = 2; k < 10; k++) {
45174 GemmMicrokernelTester()
45175 .mr(3)
45176 .nr(2)
45177 .kr(1)
45178 .sr(1)
45179 .m(3)
45180 .n(2)
45181 .k(k)
45182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45183 }
45184 }
45185
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1_subtile)45186 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_subtile) {
45187 for (size_t k = 2; k < 10; k++) {
45188 for (uint32_t n = 1; n <= 2; n++) {
45189 for (uint32_t m = 1; m <= 3; m++) {
45190 GemmMicrokernelTester()
45191 .mr(3)
45192 .nr(2)
45193 .kr(1)
45194 .sr(1)
45195 .m(m)
45196 .n(n)
45197 .k(k)
45198 .iterations(1)
45199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45200 }
45201 }
45202 }
45203 }
45204
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2)45205 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2) {
45206 for (uint32_t n = 3; n < 4; n++) {
45207 for (size_t k = 1; k <= 5; k += 2) {
45208 GemmMicrokernelTester()
45209 .mr(3)
45210 .nr(2)
45211 .kr(1)
45212 .sr(1)
45213 .m(3)
45214 .n(n)
45215 .k(k)
45216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45217 }
45218 }
45219 }
45220
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_strided_cn)45221 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_cn) {
45222 for (uint32_t n = 3; n < 4; n++) {
45223 for (size_t k = 1; k <= 5; k += 2) {
45224 GemmMicrokernelTester()
45225 .mr(3)
45226 .nr(2)
45227 .kr(1)
45228 .sr(1)
45229 .m(3)
45230 .n(n)
45231 .k(k)
45232 .cn_stride(5)
45233 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45234 }
45235 }
45236 }
45237
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_subtile)45238 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_subtile) {
45239 for (uint32_t n = 3; n < 4; n++) {
45240 for (size_t k = 1; k <= 5; k += 2) {
45241 for (uint32_t m = 1; m <= 3; m++) {
45242 GemmMicrokernelTester()
45243 .mr(3)
45244 .nr(2)
45245 .kr(1)
45246 .sr(1)
45247 .m(m)
45248 .n(n)
45249 .k(k)
45250 .iterations(1)
45251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45252 }
45253 }
45254 }
45255 }
45256
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2)45257 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2) {
45258 for (uint32_t n = 4; n <= 6; n += 2) {
45259 for (size_t k = 1; k <= 5; k += 2) {
45260 GemmMicrokernelTester()
45261 .mr(3)
45262 .nr(2)
45263 .kr(1)
45264 .sr(1)
45265 .m(3)
45266 .n(n)
45267 .k(k)
45268 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45269 }
45270 }
45271 }
45272
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_strided_cn)45273 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_cn) {
45274 for (uint32_t n = 4; n <= 6; n += 2) {
45275 for (size_t k = 1; k <= 5; k += 2) {
45276 GemmMicrokernelTester()
45277 .mr(3)
45278 .nr(2)
45279 .kr(1)
45280 .sr(1)
45281 .m(3)
45282 .n(n)
45283 .k(k)
45284 .cn_stride(5)
45285 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45286 }
45287 }
45288 }
45289
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_subtile)45290 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_subtile) {
45291 for (uint32_t n = 4; n <= 6; n += 2) {
45292 for (size_t k = 1; k <= 5; k += 2) {
45293 for (uint32_t m = 1; m <= 3; m++) {
45294 GemmMicrokernelTester()
45295 .mr(3)
45296 .nr(2)
45297 .kr(1)
45298 .sr(1)
45299 .m(m)
45300 .n(n)
45301 .k(k)
45302 .iterations(1)
45303 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45304 }
45305 }
45306 }
45307 }
45308
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel)45309 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel) {
45310 for (size_t k = 1; k <= 5; k += 2) {
45311 GemmMicrokernelTester()
45312 .mr(3)
45313 .nr(2)
45314 .kr(1)
45315 .sr(1)
45316 .m(3)
45317 .n(2)
45318 .k(k)
45319 .ks(3)
45320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45321 }
45322 }
45323
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel_subtile)45324 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel_subtile) {
45325 for (size_t k = 1; k <= 5; k += 2) {
45326 for (uint32_t n = 1; n <= 2; n++) {
45327 for (uint32_t m = 1; m <= 3; m++) {
45328 GemmMicrokernelTester()
45329 .mr(3)
45330 .nr(2)
45331 .kr(1)
45332 .sr(1)
45333 .m(m)
45334 .n(n)
45335 .k(k)
45336 .ks(3)
45337 .iterations(1)
45338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45339 }
45340 }
45341 }
45342 }
45343
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_small_kernel)45344 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_small_kernel) {
45345 for (uint32_t n = 3; n < 4; n++) {
45346 for (size_t k = 1; k <= 5; k += 2) {
45347 GemmMicrokernelTester()
45348 .mr(3)
45349 .nr(2)
45350 .kr(1)
45351 .sr(1)
45352 .m(3)
45353 .n(n)
45354 .k(k)
45355 .ks(3)
45356 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45357 }
45358 }
45359 }
45360
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_small_kernel)45361 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_small_kernel) {
45362 for (uint32_t n = 4; n <= 6; n += 2) {
45363 for (size_t k = 1; k <= 5; k += 2) {
45364 GemmMicrokernelTester()
45365 .mr(3)
45366 .nr(2)
45367 .kr(1)
45368 .sr(1)
45369 .m(3)
45370 .n(n)
45371 .k(k)
45372 .ks(3)
45373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45374 }
45375 }
45376 }
45377
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm_subtile)45378 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm_subtile) {
45379 for (size_t k = 1; k <= 5; k += 2) {
45380 for (uint32_t n = 1; n <= 2; n++) {
45381 for (uint32_t m = 1; m <= 3; m++) {
45382 GemmMicrokernelTester()
45383 .mr(3)
45384 .nr(2)
45385 .kr(1)
45386 .sr(1)
45387 .m(m)
45388 .n(n)
45389 .k(k)
45390 .cm_stride(5)
45391 .iterations(1)
45392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45393 }
45394 }
45395 }
45396 }
45397
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,a_offset)45398 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, a_offset) {
45399 for (size_t k = 1; k <= 5; k += 2) {
45400 GemmMicrokernelTester()
45401 .mr(3)
45402 .nr(2)
45403 .kr(1)
45404 .sr(1)
45405 .m(3)
45406 .n(2)
45407 .k(k)
45408 .ks(3)
45409 .a_offset(17)
45410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45411 }
45412 }
45413
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,zero)45414 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, zero) {
45415 for (size_t k = 1; k <= 5; k += 2) {
45416 for (uint32_t mz = 0; mz < 3; mz++) {
45417 GemmMicrokernelTester()
45418 .mr(3)
45419 .nr(2)
45420 .kr(1)
45421 .sr(1)
45422 .m(3)
45423 .n(2)
45424 .k(k)
45425 .ks(3)
45426 .a_offset(17)
45427 .zero_index(mz)
45428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45429 }
45430 }
45431 }
45432
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmin)45433 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmin) {
45434 GemmMicrokernelTester()
45435 .mr(3)
45436 .nr(2)
45437 .kr(1)
45438 .sr(1)
45439 .m(3)
45440 .n(2)
45441 .k(1)
45442 .qmin(128)
45443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45444 }
45445
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmax)45446 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmax) {
45447 GemmMicrokernelTester()
45448 .mr(3)
45449 .nr(2)
45450 .kr(1)
45451 .sr(1)
45452 .m(3)
45453 .n(2)
45454 .k(1)
45455 .qmax(128)
45456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45457 }
45458
TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm)45459 TEST(QS8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm) {
45460 GemmMicrokernelTester()
45461 .mr(3)
45462 .nr(2)
45463 .kr(1)
45464 .sr(1)
45465 .m(3)
45466 .n(2)
45467 .k(1)
45468 .cm_stride(5)
45469 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45470 }
45471 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45472
45473
45474 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1)45475 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1) {
45476 GemmMicrokernelTester()
45477 .mr(3)
45478 .nr(4)
45479 .kr(1)
45480 .sr(1)
45481 .m(3)
45482 .n(4)
45483 .k(1)
45484 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45485 }
45486
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cn)45487 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cn) {
45488 GemmMicrokernelTester()
45489 .mr(3)
45490 .nr(4)
45491 .kr(1)
45492 .sr(1)
45493 .m(3)
45494 .n(4)
45495 .k(1)
45496 .cn_stride(7)
45497 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45498 }
45499
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile)45500 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile) {
45501 for (uint32_t n = 1; n <= 4; n++) {
45502 for (uint32_t m = 1; m <= 3; m++) {
45503 GemmMicrokernelTester()
45504 .mr(3)
45505 .nr(4)
45506 .kr(1)
45507 .sr(1)
45508 .m(m)
45509 .n(n)
45510 .k(1)
45511 .iterations(1)
45512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45513 }
45514 }
45515 }
45516
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_m)45517 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_m) {
45518 for (uint32_t m = 1; m <= 3; m++) {
45519 GemmMicrokernelTester()
45520 .mr(3)
45521 .nr(4)
45522 .kr(1)
45523 .sr(1)
45524 .m(m)
45525 .n(4)
45526 .k(1)
45527 .iterations(1)
45528 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45529 }
45530 }
45531
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_n)45532 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_n) {
45533 for (uint32_t n = 1; n <= 4; n++) {
45534 GemmMicrokernelTester()
45535 .mr(3)
45536 .nr(4)
45537 .kr(1)
45538 .sr(1)
45539 .m(3)
45540 .n(n)
45541 .k(1)
45542 .iterations(1)
45543 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45544 }
45545 }
45546
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1)45547 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1) {
45548 for (size_t k = 2; k < 10; k++) {
45549 GemmMicrokernelTester()
45550 .mr(3)
45551 .nr(4)
45552 .kr(1)
45553 .sr(1)
45554 .m(3)
45555 .n(4)
45556 .k(k)
45557 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45558 }
45559 }
45560
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1_subtile)45561 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_subtile) {
45562 for (size_t k = 2; k < 10; k++) {
45563 for (uint32_t n = 1; n <= 4; n++) {
45564 for (uint32_t m = 1; m <= 3; m++) {
45565 GemmMicrokernelTester()
45566 .mr(3)
45567 .nr(4)
45568 .kr(1)
45569 .sr(1)
45570 .m(m)
45571 .n(n)
45572 .k(k)
45573 .iterations(1)
45574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45575 }
45576 }
45577 }
45578 }
45579
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4)45580 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4) {
45581 for (uint32_t n = 5; n < 8; n++) {
45582 for (size_t k = 1; k <= 5; k += 2) {
45583 GemmMicrokernelTester()
45584 .mr(3)
45585 .nr(4)
45586 .kr(1)
45587 .sr(1)
45588 .m(3)
45589 .n(n)
45590 .k(k)
45591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45592 }
45593 }
45594 }
45595
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_strided_cn)45596 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_cn) {
45597 for (uint32_t n = 5; n < 8; n++) {
45598 for (size_t k = 1; k <= 5; k += 2) {
45599 GemmMicrokernelTester()
45600 .mr(3)
45601 .nr(4)
45602 .kr(1)
45603 .sr(1)
45604 .m(3)
45605 .n(n)
45606 .k(k)
45607 .cn_stride(7)
45608 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45609 }
45610 }
45611 }
45612
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_subtile)45613 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_subtile) {
45614 for (uint32_t n = 5; n < 8; n++) {
45615 for (size_t k = 1; k <= 5; k += 2) {
45616 for (uint32_t m = 1; m <= 3; m++) {
45617 GemmMicrokernelTester()
45618 .mr(3)
45619 .nr(4)
45620 .kr(1)
45621 .sr(1)
45622 .m(m)
45623 .n(n)
45624 .k(k)
45625 .iterations(1)
45626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45627 }
45628 }
45629 }
45630 }
45631
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4)45632 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4) {
45633 for (uint32_t n = 8; n <= 12; n += 4) {
45634 for (size_t k = 1; k <= 5; k += 2) {
45635 GemmMicrokernelTester()
45636 .mr(3)
45637 .nr(4)
45638 .kr(1)
45639 .sr(1)
45640 .m(3)
45641 .n(n)
45642 .k(k)
45643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45644 }
45645 }
45646 }
45647
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_strided_cn)45648 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_cn) {
45649 for (uint32_t n = 8; n <= 12; n += 4) {
45650 for (size_t k = 1; k <= 5; k += 2) {
45651 GemmMicrokernelTester()
45652 .mr(3)
45653 .nr(4)
45654 .kr(1)
45655 .sr(1)
45656 .m(3)
45657 .n(n)
45658 .k(k)
45659 .cn_stride(7)
45660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45661 }
45662 }
45663 }
45664
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_subtile)45665 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_subtile) {
45666 for (uint32_t n = 8; n <= 12; n += 4) {
45667 for (size_t k = 1; k <= 5; k += 2) {
45668 for (uint32_t m = 1; m <= 3; m++) {
45669 GemmMicrokernelTester()
45670 .mr(3)
45671 .nr(4)
45672 .kr(1)
45673 .sr(1)
45674 .m(m)
45675 .n(n)
45676 .k(k)
45677 .iterations(1)
45678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45679 }
45680 }
45681 }
45682 }
45683
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel)45684 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel) {
45685 for (size_t k = 1; k <= 5; k += 2) {
45686 GemmMicrokernelTester()
45687 .mr(3)
45688 .nr(4)
45689 .kr(1)
45690 .sr(1)
45691 .m(3)
45692 .n(4)
45693 .k(k)
45694 .ks(3)
45695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45696 }
45697 }
45698
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel_subtile)45699 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel_subtile) {
45700 for (size_t k = 1; k <= 5; k += 2) {
45701 for (uint32_t n = 1; n <= 4; n++) {
45702 for (uint32_t m = 1; m <= 3; m++) {
45703 GemmMicrokernelTester()
45704 .mr(3)
45705 .nr(4)
45706 .kr(1)
45707 .sr(1)
45708 .m(m)
45709 .n(n)
45710 .k(k)
45711 .ks(3)
45712 .iterations(1)
45713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45714 }
45715 }
45716 }
45717 }
45718
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_small_kernel)45719 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_small_kernel) {
45720 for (uint32_t n = 5; n < 8; n++) {
45721 for (size_t k = 1; k <= 5; k += 2) {
45722 GemmMicrokernelTester()
45723 .mr(3)
45724 .nr(4)
45725 .kr(1)
45726 .sr(1)
45727 .m(3)
45728 .n(n)
45729 .k(k)
45730 .ks(3)
45731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45732 }
45733 }
45734 }
45735
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_small_kernel)45736 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_small_kernel) {
45737 for (uint32_t n = 8; n <= 12; n += 4) {
45738 for (size_t k = 1; k <= 5; k += 2) {
45739 GemmMicrokernelTester()
45740 .mr(3)
45741 .nr(4)
45742 .kr(1)
45743 .sr(1)
45744 .m(3)
45745 .n(n)
45746 .k(k)
45747 .ks(3)
45748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45749 }
45750 }
45751 }
45752
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm_subtile)45753 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm_subtile) {
45754 for (size_t k = 1; k <= 5; k += 2) {
45755 for (uint32_t n = 1; n <= 4; n++) {
45756 for (uint32_t m = 1; m <= 3; m++) {
45757 GemmMicrokernelTester()
45758 .mr(3)
45759 .nr(4)
45760 .kr(1)
45761 .sr(1)
45762 .m(m)
45763 .n(n)
45764 .k(k)
45765 .cm_stride(7)
45766 .iterations(1)
45767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45768 }
45769 }
45770 }
45771 }
45772
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,a_offset)45773 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, a_offset) {
45774 for (size_t k = 1; k <= 5; k += 2) {
45775 GemmMicrokernelTester()
45776 .mr(3)
45777 .nr(4)
45778 .kr(1)
45779 .sr(1)
45780 .m(3)
45781 .n(4)
45782 .k(k)
45783 .ks(3)
45784 .a_offset(17)
45785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45786 }
45787 }
45788
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,zero)45789 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, zero) {
45790 for (size_t k = 1; k <= 5; k += 2) {
45791 for (uint32_t mz = 0; mz < 3; mz++) {
45792 GemmMicrokernelTester()
45793 .mr(3)
45794 .nr(4)
45795 .kr(1)
45796 .sr(1)
45797 .m(3)
45798 .n(4)
45799 .k(k)
45800 .ks(3)
45801 .a_offset(17)
45802 .zero_index(mz)
45803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45804 }
45805 }
45806 }
45807
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmin)45808 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmin) {
45809 GemmMicrokernelTester()
45810 .mr(3)
45811 .nr(4)
45812 .kr(1)
45813 .sr(1)
45814 .m(3)
45815 .n(4)
45816 .k(1)
45817 .qmin(128)
45818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45819 }
45820
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmax)45821 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmax) {
45822 GemmMicrokernelTester()
45823 .mr(3)
45824 .nr(4)
45825 .kr(1)
45826 .sr(1)
45827 .m(3)
45828 .n(4)
45829 .k(1)
45830 .qmax(128)
45831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45832 }
45833
TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm)45834 TEST(QS8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm) {
45835 GemmMicrokernelTester()
45836 .mr(3)
45837 .nr(4)
45838 .kr(1)
45839 .sr(1)
45840 .m(3)
45841 .n(4)
45842 .k(1)
45843 .cm_stride(7)
45844 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45845 }
45846 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45847
45848
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1)45849 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
45850 GemmMicrokernelTester()
45851 .mr(1)
45852 .nr(2)
45853 .kr(1)
45854 .sr(1)
45855 .m(1)
45856 .n(2)
45857 .k(1)
45858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45859 }
45860
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cn)45861 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
45862 GemmMicrokernelTester()
45863 .mr(1)
45864 .nr(2)
45865 .kr(1)
45866 .sr(1)
45867 .m(1)
45868 .n(2)
45869 .k(1)
45870 .cn_stride(5)
45871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45872 }
45873
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile)45874 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
45875 for (uint32_t n = 1; n <= 2; n++) {
45876 for (uint32_t m = 1; m <= 1; m++) {
45877 GemmMicrokernelTester()
45878 .mr(1)
45879 .nr(2)
45880 .kr(1)
45881 .sr(1)
45882 .m(m)
45883 .n(n)
45884 .k(1)
45885 .iterations(1)
45886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45887 }
45888 }
45889 }
45890
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_m)45891 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
45892 for (uint32_t m = 1; m <= 1; m++) {
45893 GemmMicrokernelTester()
45894 .mr(1)
45895 .nr(2)
45896 .kr(1)
45897 .sr(1)
45898 .m(m)
45899 .n(2)
45900 .k(1)
45901 .iterations(1)
45902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45903 }
45904 }
45905
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_n)45906 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
45907 for (uint32_t n = 1; n <= 2; n++) {
45908 GemmMicrokernelTester()
45909 .mr(1)
45910 .nr(2)
45911 .kr(1)
45912 .sr(1)
45913 .m(1)
45914 .n(n)
45915 .k(1)
45916 .iterations(1)
45917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45918 }
45919 }
45920
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1)45921 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
45922 for (size_t k = 2; k < 10; k++) {
45923 GemmMicrokernelTester()
45924 .mr(1)
45925 .nr(2)
45926 .kr(1)
45927 .sr(1)
45928 .m(1)
45929 .n(2)
45930 .k(k)
45931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45932 }
45933 }
45934
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_subtile)45935 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
45936 for (size_t k = 2; k < 10; k++) {
45937 for (uint32_t n = 1; n <= 2; n++) {
45938 for (uint32_t m = 1; m <= 1; m++) {
45939 GemmMicrokernelTester()
45940 .mr(1)
45941 .nr(2)
45942 .kr(1)
45943 .sr(1)
45944 .m(m)
45945 .n(n)
45946 .k(k)
45947 .iterations(1)
45948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45949 }
45950 }
45951 }
45952 }
45953
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2)45954 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
45955 for (uint32_t n = 3; n < 4; n++) {
45956 for (size_t k = 1; k <= 5; k += 2) {
45957 GemmMicrokernelTester()
45958 .mr(1)
45959 .nr(2)
45960 .kr(1)
45961 .sr(1)
45962 .m(1)
45963 .n(n)
45964 .k(k)
45965 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45966 }
45967 }
45968 }
45969
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_cn)45970 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
45971 for (uint32_t n = 3; n < 4; n++) {
45972 for (size_t k = 1; k <= 5; k += 2) {
45973 GemmMicrokernelTester()
45974 .mr(1)
45975 .nr(2)
45976 .kr(1)
45977 .sr(1)
45978 .m(1)
45979 .n(n)
45980 .k(k)
45981 .cn_stride(5)
45982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45983 }
45984 }
45985 }
45986
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_subtile)45987 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
45988 for (uint32_t n = 3; n < 4; n++) {
45989 for (size_t k = 1; k <= 5; k += 2) {
45990 for (uint32_t m = 1; m <= 1; m++) {
45991 GemmMicrokernelTester()
45992 .mr(1)
45993 .nr(2)
45994 .kr(1)
45995 .sr(1)
45996 .m(m)
45997 .n(n)
45998 .k(k)
45999 .iterations(1)
46000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46001 }
46002 }
46003 }
46004 }
46005
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2)46006 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
46007 for (uint32_t n = 4; n <= 6; n += 2) {
46008 for (size_t k = 1; k <= 5; k += 2) {
46009 GemmMicrokernelTester()
46010 .mr(1)
46011 .nr(2)
46012 .kr(1)
46013 .sr(1)
46014 .m(1)
46015 .n(n)
46016 .k(k)
46017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46018 }
46019 }
46020 }
46021
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_cn)46022 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
46023 for (uint32_t n = 4; n <= 6; n += 2) {
46024 for (size_t k = 1; k <= 5; k += 2) {
46025 GemmMicrokernelTester()
46026 .mr(1)
46027 .nr(2)
46028 .kr(1)
46029 .sr(1)
46030 .m(1)
46031 .n(n)
46032 .k(k)
46033 .cn_stride(5)
46034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46035 }
46036 }
46037 }
46038
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_subtile)46039 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
46040 for (uint32_t n = 4; n <= 6; n += 2) {
46041 for (size_t k = 1; k <= 5; k += 2) {
46042 for (uint32_t m = 1; m <= 1; m++) {
46043 GemmMicrokernelTester()
46044 .mr(1)
46045 .nr(2)
46046 .kr(1)
46047 .sr(1)
46048 .m(m)
46049 .n(n)
46050 .k(k)
46051 .iterations(1)
46052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46053 }
46054 }
46055 }
46056 }
46057
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel)46058 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel) {
46059 for (size_t k = 1; k <= 5; k += 2) {
46060 GemmMicrokernelTester()
46061 .mr(1)
46062 .nr(2)
46063 .kr(1)
46064 .sr(1)
46065 .m(1)
46066 .n(2)
46067 .k(k)
46068 .ks(3)
46069 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46070 }
46071 }
46072
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel_subtile)46073 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel_subtile) {
46074 for (size_t k = 1; k <= 5; k += 2) {
46075 for (uint32_t n = 1; n <= 2; n++) {
46076 for (uint32_t m = 1; m <= 1; m++) {
46077 GemmMicrokernelTester()
46078 .mr(1)
46079 .nr(2)
46080 .kr(1)
46081 .sr(1)
46082 .m(m)
46083 .n(n)
46084 .k(k)
46085 .ks(3)
46086 .iterations(1)
46087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46088 }
46089 }
46090 }
46091 }
46092
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_small_kernel)46093 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
46094 for (uint32_t n = 3; n < 4; n++) {
46095 for (size_t k = 1; k <= 5; k += 2) {
46096 GemmMicrokernelTester()
46097 .mr(1)
46098 .nr(2)
46099 .kr(1)
46100 .sr(1)
46101 .m(1)
46102 .n(n)
46103 .k(k)
46104 .ks(3)
46105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46106 }
46107 }
46108 }
46109
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_small_kernel)46110 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
46111 for (uint32_t n = 4; n <= 6; n += 2) {
46112 for (size_t k = 1; k <= 5; k += 2) {
46113 GemmMicrokernelTester()
46114 .mr(1)
46115 .nr(2)
46116 .kr(1)
46117 .sr(1)
46118 .m(1)
46119 .n(n)
46120 .k(k)
46121 .ks(3)
46122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46123 }
46124 }
46125 }
46126
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm_subtile)46127 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
46128 for (size_t k = 1; k <= 5; k += 2) {
46129 for (uint32_t n = 1; n <= 2; n++) {
46130 for (uint32_t m = 1; m <= 1; m++) {
46131 GemmMicrokernelTester()
46132 .mr(1)
46133 .nr(2)
46134 .kr(1)
46135 .sr(1)
46136 .m(m)
46137 .n(n)
46138 .k(k)
46139 .cm_stride(5)
46140 .iterations(1)
46141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46142 }
46143 }
46144 }
46145 }
46146
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,a_offset)46147 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, a_offset) {
46148 for (size_t k = 1; k <= 5; k += 2) {
46149 GemmMicrokernelTester()
46150 .mr(1)
46151 .nr(2)
46152 .kr(1)
46153 .sr(1)
46154 .m(1)
46155 .n(2)
46156 .k(k)
46157 .ks(3)
46158 .a_offset(7)
46159 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46160 }
46161 }
46162
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,zero)46163 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, zero) {
46164 for (size_t k = 1; k <= 5; k += 2) {
46165 for (uint32_t mz = 0; mz < 1; mz++) {
46166 GemmMicrokernelTester()
46167 .mr(1)
46168 .nr(2)
46169 .kr(1)
46170 .sr(1)
46171 .m(1)
46172 .n(2)
46173 .k(k)
46174 .ks(3)
46175 .a_offset(7)
46176 .zero_index(mz)
46177 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46178 }
46179 }
46180 }
46181
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmin)46182 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
46183 GemmMicrokernelTester()
46184 .mr(1)
46185 .nr(2)
46186 .kr(1)
46187 .sr(1)
46188 .m(1)
46189 .n(2)
46190 .k(1)
46191 .qmin(128)
46192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46193 }
46194
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmax)46195 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
46196 GemmMicrokernelTester()
46197 .mr(1)
46198 .nr(2)
46199 .kr(1)
46200 .sr(1)
46201 .m(1)
46202 .n(2)
46203 .k(1)
46204 .qmax(128)
46205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46206 }
46207
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm)46208 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
46209 GemmMicrokernelTester()
46210 .mr(1)
46211 .nr(2)
46212 .kr(1)
46213 .sr(1)
46214 .m(1)
46215 .n(2)
46216 .k(1)
46217 .cm_stride(5)
46218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46219 }
46220
46221
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1)46222 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
46223 GemmMicrokernelTester()
46224 .mr(1)
46225 .nr(2)
46226 .kr(1)
46227 .sr(1)
46228 .m(1)
46229 .n(2)
46230 .k(1)
46231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46232 }
46233
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cn)46234 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
46235 GemmMicrokernelTester()
46236 .mr(1)
46237 .nr(2)
46238 .kr(1)
46239 .sr(1)
46240 .m(1)
46241 .n(2)
46242 .k(1)
46243 .cn_stride(5)
46244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46245 }
46246
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile)46247 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
46248 for (uint32_t n = 1; n <= 2; n++) {
46249 for (uint32_t m = 1; m <= 1; m++) {
46250 GemmMicrokernelTester()
46251 .mr(1)
46252 .nr(2)
46253 .kr(1)
46254 .sr(1)
46255 .m(m)
46256 .n(n)
46257 .k(1)
46258 .iterations(1)
46259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46260 }
46261 }
46262 }
46263
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_m)46264 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
46265 for (uint32_t m = 1; m <= 1; m++) {
46266 GemmMicrokernelTester()
46267 .mr(1)
46268 .nr(2)
46269 .kr(1)
46270 .sr(1)
46271 .m(m)
46272 .n(2)
46273 .k(1)
46274 .iterations(1)
46275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46276 }
46277 }
46278
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_n)46279 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
46280 for (uint32_t n = 1; n <= 2; n++) {
46281 GemmMicrokernelTester()
46282 .mr(1)
46283 .nr(2)
46284 .kr(1)
46285 .sr(1)
46286 .m(1)
46287 .n(n)
46288 .k(1)
46289 .iterations(1)
46290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46291 }
46292 }
46293
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1)46294 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
46295 for (size_t k = 2; k < 10; k++) {
46296 GemmMicrokernelTester()
46297 .mr(1)
46298 .nr(2)
46299 .kr(1)
46300 .sr(1)
46301 .m(1)
46302 .n(2)
46303 .k(k)
46304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46305 }
46306 }
46307
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_subtile)46308 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
46309 for (size_t k = 2; k < 10; k++) {
46310 for (uint32_t n = 1; n <= 2; n++) {
46311 for (uint32_t m = 1; m <= 1; m++) {
46312 GemmMicrokernelTester()
46313 .mr(1)
46314 .nr(2)
46315 .kr(1)
46316 .sr(1)
46317 .m(m)
46318 .n(n)
46319 .k(k)
46320 .iterations(1)
46321 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46322 }
46323 }
46324 }
46325 }
46326
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2)46327 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
46328 for (uint32_t n = 3; n < 4; n++) {
46329 for (size_t k = 1; k <= 5; k += 2) {
46330 GemmMicrokernelTester()
46331 .mr(1)
46332 .nr(2)
46333 .kr(1)
46334 .sr(1)
46335 .m(1)
46336 .n(n)
46337 .k(k)
46338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46339 }
46340 }
46341 }
46342
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_cn)46343 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
46344 for (uint32_t n = 3; n < 4; n++) {
46345 for (size_t k = 1; k <= 5; k += 2) {
46346 GemmMicrokernelTester()
46347 .mr(1)
46348 .nr(2)
46349 .kr(1)
46350 .sr(1)
46351 .m(1)
46352 .n(n)
46353 .k(k)
46354 .cn_stride(5)
46355 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46356 }
46357 }
46358 }
46359
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_subtile)46360 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
46361 for (uint32_t n = 3; n < 4; n++) {
46362 for (size_t k = 1; k <= 5; k += 2) {
46363 for (uint32_t m = 1; m <= 1; m++) {
46364 GemmMicrokernelTester()
46365 .mr(1)
46366 .nr(2)
46367 .kr(1)
46368 .sr(1)
46369 .m(m)
46370 .n(n)
46371 .k(k)
46372 .iterations(1)
46373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46374 }
46375 }
46376 }
46377 }
46378
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2)46379 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
46380 for (uint32_t n = 4; n <= 6; n += 2) {
46381 for (size_t k = 1; k <= 5; k += 2) {
46382 GemmMicrokernelTester()
46383 .mr(1)
46384 .nr(2)
46385 .kr(1)
46386 .sr(1)
46387 .m(1)
46388 .n(n)
46389 .k(k)
46390 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46391 }
46392 }
46393 }
46394
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_cn)46395 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
46396 for (uint32_t n = 4; n <= 6; n += 2) {
46397 for (size_t k = 1; k <= 5; k += 2) {
46398 GemmMicrokernelTester()
46399 .mr(1)
46400 .nr(2)
46401 .kr(1)
46402 .sr(1)
46403 .m(1)
46404 .n(n)
46405 .k(k)
46406 .cn_stride(5)
46407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46408 }
46409 }
46410 }
46411
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_subtile)46412 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
46413 for (uint32_t n = 4; n <= 6; n += 2) {
46414 for (size_t k = 1; k <= 5; k += 2) {
46415 for (uint32_t m = 1; m <= 1; m++) {
46416 GemmMicrokernelTester()
46417 .mr(1)
46418 .nr(2)
46419 .kr(1)
46420 .sr(1)
46421 .m(m)
46422 .n(n)
46423 .k(k)
46424 .iterations(1)
46425 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46426 }
46427 }
46428 }
46429 }
46430
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel)46431 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
46432 for (size_t k = 1; k <= 5; k += 2) {
46433 GemmMicrokernelTester()
46434 .mr(1)
46435 .nr(2)
46436 .kr(1)
46437 .sr(1)
46438 .m(1)
46439 .n(2)
46440 .k(k)
46441 .ks(3)
46442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46443 }
46444 }
46445
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel_subtile)46446 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
46447 for (size_t k = 1; k <= 5; k += 2) {
46448 for (uint32_t n = 1; n <= 2; n++) {
46449 for (uint32_t m = 1; m <= 1; m++) {
46450 GemmMicrokernelTester()
46451 .mr(1)
46452 .nr(2)
46453 .kr(1)
46454 .sr(1)
46455 .m(m)
46456 .n(n)
46457 .k(k)
46458 .ks(3)
46459 .iterations(1)
46460 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46461 }
46462 }
46463 }
46464 }
46465
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_small_kernel)46466 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
46467 for (uint32_t n = 3; n < 4; n++) {
46468 for (size_t k = 1; k <= 5; k += 2) {
46469 GemmMicrokernelTester()
46470 .mr(1)
46471 .nr(2)
46472 .kr(1)
46473 .sr(1)
46474 .m(1)
46475 .n(n)
46476 .k(k)
46477 .ks(3)
46478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46479 }
46480 }
46481 }
46482
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_small_kernel)46483 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
46484 for (uint32_t n = 4; n <= 6; n += 2) {
46485 for (size_t k = 1; k <= 5; k += 2) {
46486 GemmMicrokernelTester()
46487 .mr(1)
46488 .nr(2)
46489 .kr(1)
46490 .sr(1)
46491 .m(1)
46492 .n(n)
46493 .k(k)
46494 .ks(3)
46495 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46496 }
46497 }
46498 }
46499
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm_subtile)46500 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
46501 for (size_t k = 1; k <= 5; k += 2) {
46502 for (uint32_t n = 1; n <= 2; n++) {
46503 for (uint32_t m = 1; m <= 1; m++) {
46504 GemmMicrokernelTester()
46505 .mr(1)
46506 .nr(2)
46507 .kr(1)
46508 .sr(1)
46509 .m(m)
46510 .n(n)
46511 .k(k)
46512 .cm_stride(5)
46513 .iterations(1)
46514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46515 }
46516 }
46517 }
46518 }
46519
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,a_offset)46520 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
46521 for (size_t k = 1; k <= 5; k += 2) {
46522 GemmMicrokernelTester()
46523 .mr(1)
46524 .nr(2)
46525 .kr(1)
46526 .sr(1)
46527 .m(1)
46528 .n(2)
46529 .k(k)
46530 .ks(3)
46531 .a_offset(7)
46532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46533 }
46534 }
46535
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,zero)46536 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
46537 for (size_t k = 1; k <= 5; k += 2) {
46538 for (uint32_t mz = 0; mz < 1; mz++) {
46539 GemmMicrokernelTester()
46540 .mr(1)
46541 .nr(2)
46542 .kr(1)
46543 .sr(1)
46544 .m(1)
46545 .n(2)
46546 .k(k)
46547 .ks(3)
46548 .a_offset(7)
46549 .zero_index(mz)
46550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46551 }
46552 }
46553 }
46554
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmin)46555 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
46556 GemmMicrokernelTester()
46557 .mr(1)
46558 .nr(2)
46559 .kr(1)
46560 .sr(1)
46561 .m(1)
46562 .n(2)
46563 .k(1)
46564 .qmin(128)
46565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46566 }
46567
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmax)46568 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
46569 GemmMicrokernelTester()
46570 .mr(1)
46571 .nr(2)
46572 .kr(1)
46573 .sr(1)
46574 .m(1)
46575 .n(2)
46576 .k(1)
46577 .qmax(128)
46578 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46579 }
46580
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm)46581 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
46582 GemmMicrokernelTester()
46583 .mr(1)
46584 .nr(2)
46585 .kr(1)
46586 .sr(1)
46587 .m(1)
46588 .n(2)
46589 .k(1)
46590 .cm_stride(5)
46591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46592 }
46593
46594
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1)46595 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
46596 GemmMicrokernelTester()
46597 .mr(1)
46598 .nr(4)
46599 .kr(1)
46600 .sr(1)
46601 .m(1)
46602 .n(4)
46603 .k(1)
46604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46605 }
46606
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cn)46607 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
46608 GemmMicrokernelTester()
46609 .mr(1)
46610 .nr(4)
46611 .kr(1)
46612 .sr(1)
46613 .m(1)
46614 .n(4)
46615 .k(1)
46616 .cn_stride(7)
46617 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46618 }
46619
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile)46620 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
46621 for (uint32_t n = 1; n <= 4; n++) {
46622 for (uint32_t m = 1; m <= 1; m++) {
46623 GemmMicrokernelTester()
46624 .mr(1)
46625 .nr(4)
46626 .kr(1)
46627 .sr(1)
46628 .m(m)
46629 .n(n)
46630 .k(1)
46631 .iterations(1)
46632 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46633 }
46634 }
46635 }
46636
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_m)46637 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
46638 for (uint32_t m = 1; m <= 1; m++) {
46639 GemmMicrokernelTester()
46640 .mr(1)
46641 .nr(4)
46642 .kr(1)
46643 .sr(1)
46644 .m(m)
46645 .n(4)
46646 .k(1)
46647 .iterations(1)
46648 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46649 }
46650 }
46651
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_n)46652 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
46653 for (uint32_t n = 1; n <= 4; n++) {
46654 GemmMicrokernelTester()
46655 .mr(1)
46656 .nr(4)
46657 .kr(1)
46658 .sr(1)
46659 .m(1)
46660 .n(n)
46661 .k(1)
46662 .iterations(1)
46663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46664 }
46665 }
46666
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1)46667 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
46668 for (size_t k = 2; k < 10; k++) {
46669 GemmMicrokernelTester()
46670 .mr(1)
46671 .nr(4)
46672 .kr(1)
46673 .sr(1)
46674 .m(1)
46675 .n(4)
46676 .k(k)
46677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46678 }
46679 }
46680
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_subtile)46681 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
46682 for (size_t k = 2; k < 10; k++) {
46683 for (uint32_t n = 1; n <= 4; n++) {
46684 for (uint32_t m = 1; m <= 1; m++) {
46685 GemmMicrokernelTester()
46686 .mr(1)
46687 .nr(4)
46688 .kr(1)
46689 .sr(1)
46690 .m(m)
46691 .n(n)
46692 .k(k)
46693 .iterations(1)
46694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46695 }
46696 }
46697 }
46698 }
46699
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4)46700 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
46701 for (uint32_t n = 5; n < 8; n++) {
46702 for (size_t k = 1; k <= 5; k += 2) {
46703 GemmMicrokernelTester()
46704 .mr(1)
46705 .nr(4)
46706 .kr(1)
46707 .sr(1)
46708 .m(1)
46709 .n(n)
46710 .k(k)
46711 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46712 }
46713 }
46714 }
46715
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_cn)46716 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
46717 for (uint32_t n = 5; n < 8; n++) {
46718 for (size_t k = 1; k <= 5; k += 2) {
46719 GemmMicrokernelTester()
46720 .mr(1)
46721 .nr(4)
46722 .kr(1)
46723 .sr(1)
46724 .m(1)
46725 .n(n)
46726 .k(k)
46727 .cn_stride(7)
46728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46729 }
46730 }
46731 }
46732
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_subtile)46733 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
46734 for (uint32_t n = 5; n < 8; n++) {
46735 for (size_t k = 1; k <= 5; k += 2) {
46736 for (uint32_t m = 1; m <= 1; m++) {
46737 GemmMicrokernelTester()
46738 .mr(1)
46739 .nr(4)
46740 .kr(1)
46741 .sr(1)
46742 .m(m)
46743 .n(n)
46744 .k(k)
46745 .iterations(1)
46746 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46747 }
46748 }
46749 }
46750 }
46751
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4)46752 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
46753 for (uint32_t n = 8; n <= 12; n += 4) {
46754 for (size_t k = 1; k <= 5; k += 2) {
46755 GemmMicrokernelTester()
46756 .mr(1)
46757 .nr(4)
46758 .kr(1)
46759 .sr(1)
46760 .m(1)
46761 .n(n)
46762 .k(k)
46763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46764 }
46765 }
46766 }
46767
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_cn)46768 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
46769 for (uint32_t n = 8; n <= 12; n += 4) {
46770 for (size_t k = 1; k <= 5; k += 2) {
46771 GemmMicrokernelTester()
46772 .mr(1)
46773 .nr(4)
46774 .kr(1)
46775 .sr(1)
46776 .m(1)
46777 .n(n)
46778 .k(k)
46779 .cn_stride(7)
46780 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46781 }
46782 }
46783 }
46784
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_subtile)46785 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
46786 for (uint32_t n = 8; n <= 12; n += 4) {
46787 for (size_t k = 1; k <= 5; k += 2) {
46788 for (uint32_t m = 1; m <= 1; m++) {
46789 GemmMicrokernelTester()
46790 .mr(1)
46791 .nr(4)
46792 .kr(1)
46793 .sr(1)
46794 .m(m)
46795 .n(n)
46796 .k(k)
46797 .iterations(1)
46798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46799 }
46800 }
46801 }
46802 }
46803
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel)46804 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel) {
46805 for (size_t k = 1; k <= 5; k += 2) {
46806 GemmMicrokernelTester()
46807 .mr(1)
46808 .nr(4)
46809 .kr(1)
46810 .sr(1)
46811 .m(1)
46812 .n(4)
46813 .k(k)
46814 .ks(3)
46815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46816 }
46817 }
46818
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel_subtile)46819 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel_subtile) {
46820 for (size_t k = 1; k <= 5; k += 2) {
46821 for (uint32_t n = 1; n <= 4; n++) {
46822 for (uint32_t m = 1; m <= 1; m++) {
46823 GemmMicrokernelTester()
46824 .mr(1)
46825 .nr(4)
46826 .kr(1)
46827 .sr(1)
46828 .m(m)
46829 .n(n)
46830 .k(k)
46831 .ks(3)
46832 .iterations(1)
46833 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46834 }
46835 }
46836 }
46837 }
46838
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_small_kernel)46839 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
46840 for (uint32_t n = 5; n < 8; n++) {
46841 for (size_t k = 1; k <= 5; k += 2) {
46842 GemmMicrokernelTester()
46843 .mr(1)
46844 .nr(4)
46845 .kr(1)
46846 .sr(1)
46847 .m(1)
46848 .n(n)
46849 .k(k)
46850 .ks(3)
46851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46852 }
46853 }
46854 }
46855
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_small_kernel)46856 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
46857 for (uint32_t n = 8; n <= 12; n += 4) {
46858 for (size_t k = 1; k <= 5; k += 2) {
46859 GemmMicrokernelTester()
46860 .mr(1)
46861 .nr(4)
46862 .kr(1)
46863 .sr(1)
46864 .m(1)
46865 .n(n)
46866 .k(k)
46867 .ks(3)
46868 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46869 }
46870 }
46871 }
46872
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm_subtile)46873 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
46874 for (size_t k = 1; k <= 5; k += 2) {
46875 for (uint32_t n = 1; n <= 4; n++) {
46876 for (uint32_t m = 1; m <= 1; m++) {
46877 GemmMicrokernelTester()
46878 .mr(1)
46879 .nr(4)
46880 .kr(1)
46881 .sr(1)
46882 .m(m)
46883 .n(n)
46884 .k(k)
46885 .cm_stride(7)
46886 .iterations(1)
46887 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46888 }
46889 }
46890 }
46891 }
46892
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,a_offset)46893 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, a_offset) {
46894 for (size_t k = 1; k <= 5; k += 2) {
46895 GemmMicrokernelTester()
46896 .mr(1)
46897 .nr(4)
46898 .kr(1)
46899 .sr(1)
46900 .m(1)
46901 .n(4)
46902 .k(k)
46903 .ks(3)
46904 .a_offset(7)
46905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46906 }
46907 }
46908
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,zero)46909 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, zero) {
46910 for (size_t k = 1; k <= 5; k += 2) {
46911 for (uint32_t mz = 0; mz < 1; mz++) {
46912 GemmMicrokernelTester()
46913 .mr(1)
46914 .nr(4)
46915 .kr(1)
46916 .sr(1)
46917 .m(1)
46918 .n(4)
46919 .k(k)
46920 .ks(3)
46921 .a_offset(7)
46922 .zero_index(mz)
46923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46924 }
46925 }
46926 }
46927
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmin)46928 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
46929 GemmMicrokernelTester()
46930 .mr(1)
46931 .nr(4)
46932 .kr(1)
46933 .sr(1)
46934 .m(1)
46935 .n(4)
46936 .k(1)
46937 .qmin(128)
46938 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46939 }
46940
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmax)46941 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
46942 GemmMicrokernelTester()
46943 .mr(1)
46944 .nr(4)
46945 .kr(1)
46946 .sr(1)
46947 .m(1)
46948 .n(4)
46949 .k(1)
46950 .qmax(128)
46951 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46952 }
46953
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm)46954 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
46955 GemmMicrokernelTester()
46956 .mr(1)
46957 .nr(4)
46958 .kr(1)
46959 .sr(1)
46960 .m(1)
46961 .n(4)
46962 .k(1)
46963 .cm_stride(7)
46964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46965 }
46966
46967
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1)46968 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
46969 GemmMicrokernelTester()
46970 .mr(1)
46971 .nr(4)
46972 .kr(1)
46973 .sr(1)
46974 .m(1)
46975 .n(4)
46976 .k(1)
46977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46978 }
46979
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cn)46980 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
46981 GemmMicrokernelTester()
46982 .mr(1)
46983 .nr(4)
46984 .kr(1)
46985 .sr(1)
46986 .m(1)
46987 .n(4)
46988 .k(1)
46989 .cn_stride(7)
46990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
46991 }
46992
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile)46993 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
46994 for (uint32_t n = 1; n <= 4; n++) {
46995 for (uint32_t m = 1; m <= 1; m++) {
46996 GemmMicrokernelTester()
46997 .mr(1)
46998 .nr(4)
46999 .kr(1)
47000 .sr(1)
47001 .m(m)
47002 .n(n)
47003 .k(1)
47004 .iterations(1)
47005 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47006 }
47007 }
47008 }
47009
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_m)47010 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
47011 for (uint32_t m = 1; m <= 1; m++) {
47012 GemmMicrokernelTester()
47013 .mr(1)
47014 .nr(4)
47015 .kr(1)
47016 .sr(1)
47017 .m(m)
47018 .n(4)
47019 .k(1)
47020 .iterations(1)
47021 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47022 }
47023 }
47024
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_n)47025 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
47026 for (uint32_t n = 1; n <= 4; n++) {
47027 GemmMicrokernelTester()
47028 .mr(1)
47029 .nr(4)
47030 .kr(1)
47031 .sr(1)
47032 .m(1)
47033 .n(n)
47034 .k(1)
47035 .iterations(1)
47036 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47037 }
47038 }
47039
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1)47040 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
47041 for (size_t k = 2; k < 10; k++) {
47042 GemmMicrokernelTester()
47043 .mr(1)
47044 .nr(4)
47045 .kr(1)
47046 .sr(1)
47047 .m(1)
47048 .n(4)
47049 .k(k)
47050 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47051 }
47052 }
47053
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_subtile)47054 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
47055 for (size_t k = 2; k < 10; k++) {
47056 for (uint32_t n = 1; n <= 4; n++) {
47057 for (uint32_t m = 1; m <= 1; m++) {
47058 GemmMicrokernelTester()
47059 .mr(1)
47060 .nr(4)
47061 .kr(1)
47062 .sr(1)
47063 .m(m)
47064 .n(n)
47065 .k(k)
47066 .iterations(1)
47067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47068 }
47069 }
47070 }
47071 }
47072
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4)47073 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
47074 for (uint32_t n = 5; n < 8; n++) {
47075 for (size_t k = 1; k <= 5; k += 2) {
47076 GemmMicrokernelTester()
47077 .mr(1)
47078 .nr(4)
47079 .kr(1)
47080 .sr(1)
47081 .m(1)
47082 .n(n)
47083 .k(k)
47084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47085 }
47086 }
47087 }
47088
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_cn)47089 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
47090 for (uint32_t n = 5; n < 8; n++) {
47091 for (size_t k = 1; k <= 5; k += 2) {
47092 GemmMicrokernelTester()
47093 .mr(1)
47094 .nr(4)
47095 .kr(1)
47096 .sr(1)
47097 .m(1)
47098 .n(n)
47099 .k(k)
47100 .cn_stride(7)
47101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47102 }
47103 }
47104 }
47105
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_subtile)47106 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
47107 for (uint32_t n = 5; n < 8; n++) {
47108 for (size_t k = 1; k <= 5; k += 2) {
47109 for (uint32_t m = 1; m <= 1; m++) {
47110 GemmMicrokernelTester()
47111 .mr(1)
47112 .nr(4)
47113 .kr(1)
47114 .sr(1)
47115 .m(m)
47116 .n(n)
47117 .k(k)
47118 .iterations(1)
47119 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47120 }
47121 }
47122 }
47123 }
47124
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4)47125 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
47126 for (uint32_t n = 8; n <= 12; n += 4) {
47127 for (size_t k = 1; k <= 5; k += 2) {
47128 GemmMicrokernelTester()
47129 .mr(1)
47130 .nr(4)
47131 .kr(1)
47132 .sr(1)
47133 .m(1)
47134 .n(n)
47135 .k(k)
47136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47137 }
47138 }
47139 }
47140
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_cn)47141 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
47142 for (uint32_t n = 8; n <= 12; n += 4) {
47143 for (size_t k = 1; k <= 5; k += 2) {
47144 GemmMicrokernelTester()
47145 .mr(1)
47146 .nr(4)
47147 .kr(1)
47148 .sr(1)
47149 .m(1)
47150 .n(n)
47151 .k(k)
47152 .cn_stride(7)
47153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47154 }
47155 }
47156 }
47157
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_subtile)47158 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
47159 for (uint32_t n = 8; n <= 12; n += 4) {
47160 for (size_t k = 1; k <= 5; k += 2) {
47161 for (uint32_t m = 1; m <= 1; m++) {
47162 GemmMicrokernelTester()
47163 .mr(1)
47164 .nr(4)
47165 .kr(1)
47166 .sr(1)
47167 .m(m)
47168 .n(n)
47169 .k(k)
47170 .iterations(1)
47171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47172 }
47173 }
47174 }
47175 }
47176
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel)47177 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
47178 for (size_t k = 1; k <= 5; k += 2) {
47179 GemmMicrokernelTester()
47180 .mr(1)
47181 .nr(4)
47182 .kr(1)
47183 .sr(1)
47184 .m(1)
47185 .n(4)
47186 .k(k)
47187 .ks(3)
47188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47189 }
47190 }
47191
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel_subtile)47192 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
47193 for (size_t k = 1; k <= 5; k += 2) {
47194 for (uint32_t n = 1; n <= 4; n++) {
47195 for (uint32_t m = 1; m <= 1; m++) {
47196 GemmMicrokernelTester()
47197 .mr(1)
47198 .nr(4)
47199 .kr(1)
47200 .sr(1)
47201 .m(m)
47202 .n(n)
47203 .k(k)
47204 .ks(3)
47205 .iterations(1)
47206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47207 }
47208 }
47209 }
47210 }
47211
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_small_kernel)47212 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
47213 for (uint32_t n = 5; n < 8; n++) {
47214 for (size_t k = 1; k <= 5; k += 2) {
47215 GemmMicrokernelTester()
47216 .mr(1)
47217 .nr(4)
47218 .kr(1)
47219 .sr(1)
47220 .m(1)
47221 .n(n)
47222 .k(k)
47223 .ks(3)
47224 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47225 }
47226 }
47227 }
47228
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_small_kernel)47229 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
47230 for (uint32_t n = 8; n <= 12; n += 4) {
47231 for (size_t k = 1; k <= 5; k += 2) {
47232 GemmMicrokernelTester()
47233 .mr(1)
47234 .nr(4)
47235 .kr(1)
47236 .sr(1)
47237 .m(1)
47238 .n(n)
47239 .k(k)
47240 .ks(3)
47241 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47242 }
47243 }
47244 }
47245
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm_subtile)47246 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
47247 for (size_t k = 1; k <= 5; k += 2) {
47248 for (uint32_t n = 1; n <= 4; n++) {
47249 for (uint32_t m = 1; m <= 1; m++) {
47250 GemmMicrokernelTester()
47251 .mr(1)
47252 .nr(4)
47253 .kr(1)
47254 .sr(1)
47255 .m(m)
47256 .n(n)
47257 .k(k)
47258 .cm_stride(7)
47259 .iterations(1)
47260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47261 }
47262 }
47263 }
47264 }
47265
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,a_offset)47266 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
47267 for (size_t k = 1; k <= 5; k += 2) {
47268 GemmMicrokernelTester()
47269 .mr(1)
47270 .nr(4)
47271 .kr(1)
47272 .sr(1)
47273 .m(1)
47274 .n(4)
47275 .k(k)
47276 .ks(3)
47277 .a_offset(7)
47278 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47279 }
47280 }
47281
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,zero)47282 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
47283 for (size_t k = 1; k <= 5; k += 2) {
47284 for (uint32_t mz = 0; mz < 1; mz++) {
47285 GemmMicrokernelTester()
47286 .mr(1)
47287 .nr(4)
47288 .kr(1)
47289 .sr(1)
47290 .m(1)
47291 .n(4)
47292 .k(k)
47293 .ks(3)
47294 .a_offset(7)
47295 .zero_index(mz)
47296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47297 }
47298 }
47299 }
47300
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmin)47301 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
47302 GemmMicrokernelTester()
47303 .mr(1)
47304 .nr(4)
47305 .kr(1)
47306 .sr(1)
47307 .m(1)
47308 .n(4)
47309 .k(1)
47310 .qmin(128)
47311 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47312 }
47313
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmax)47314 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
47315 GemmMicrokernelTester()
47316 .mr(1)
47317 .nr(4)
47318 .kr(1)
47319 .sr(1)
47320 .m(1)
47321 .n(4)
47322 .k(1)
47323 .qmax(128)
47324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47325 }
47326
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm)47327 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
47328 GemmMicrokernelTester()
47329 .mr(1)
47330 .nr(4)
47331 .kr(1)
47332 .sr(1)
47333 .m(1)
47334 .n(4)
47335 .k(1)
47336 .cm_stride(7)
47337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47338 }
47339
47340
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1)47341 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
47342 GemmMicrokernelTester()
47343 .mr(2)
47344 .nr(2)
47345 .kr(1)
47346 .sr(1)
47347 .m(2)
47348 .n(2)
47349 .k(1)
47350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47351 }
47352
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cn)47353 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
47354 GemmMicrokernelTester()
47355 .mr(2)
47356 .nr(2)
47357 .kr(1)
47358 .sr(1)
47359 .m(2)
47360 .n(2)
47361 .k(1)
47362 .cn_stride(5)
47363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47364 }
47365
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile)47366 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
47367 for (uint32_t n = 1; n <= 2; n++) {
47368 for (uint32_t m = 1; m <= 2; m++) {
47369 GemmMicrokernelTester()
47370 .mr(2)
47371 .nr(2)
47372 .kr(1)
47373 .sr(1)
47374 .m(m)
47375 .n(n)
47376 .k(1)
47377 .iterations(1)
47378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47379 }
47380 }
47381 }
47382
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_m)47383 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
47384 for (uint32_t m = 1; m <= 2; m++) {
47385 GemmMicrokernelTester()
47386 .mr(2)
47387 .nr(2)
47388 .kr(1)
47389 .sr(1)
47390 .m(m)
47391 .n(2)
47392 .k(1)
47393 .iterations(1)
47394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47395 }
47396 }
47397
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_n)47398 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
47399 for (uint32_t n = 1; n <= 2; n++) {
47400 GemmMicrokernelTester()
47401 .mr(2)
47402 .nr(2)
47403 .kr(1)
47404 .sr(1)
47405 .m(2)
47406 .n(n)
47407 .k(1)
47408 .iterations(1)
47409 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47410 }
47411 }
47412
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1)47413 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
47414 for (size_t k = 2; k < 10; k++) {
47415 GemmMicrokernelTester()
47416 .mr(2)
47417 .nr(2)
47418 .kr(1)
47419 .sr(1)
47420 .m(2)
47421 .n(2)
47422 .k(k)
47423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47424 }
47425 }
47426
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_subtile)47427 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
47428 for (size_t k = 2; k < 10; k++) {
47429 for (uint32_t n = 1; n <= 2; n++) {
47430 for (uint32_t m = 1; m <= 2; m++) {
47431 GemmMicrokernelTester()
47432 .mr(2)
47433 .nr(2)
47434 .kr(1)
47435 .sr(1)
47436 .m(m)
47437 .n(n)
47438 .k(k)
47439 .iterations(1)
47440 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47441 }
47442 }
47443 }
47444 }
47445
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2)47446 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
47447 for (uint32_t n = 3; n < 4; n++) {
47448 for (size_t k = 1; k <= 5; k += 2) {
47449 GemmMicrokernelTester()
47450 .mr(2)
47451 .nr(2)
47452 .kr(1)
47453 .sr(1)
47454 .m(2)
47455 .n(n)
47456 .k(k)
47457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47458 }
47459 }
47460 }
47461
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_cn)47462 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
47463 for (uint32_t n = 3; n < 4; n++) {
47464 for (size_t k = 1; k <= 5; k += 2) {
47465 GemmMicrokernelTester()
47466 .mr(2)
47467 .nr(2)
47468 .kr(1)
47469 .sr(1)
47470 .m(2)
47471 .n(n)
47472 .k(k)
47473 .cn_stride(5)
47474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47475 }
47476 }
47477 }
47478
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_subtile)47479 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
47480 for (uint32_t n = 3; n < 4; n++) {
47481 for (size_t k = 1; k <= 5; k += 2) {
47482 for (uint32_t m = 1; m <= 2; m++) {
47483 GemmMicrokernelTester()
47484 .mr(2)
47485 .nr(2)
47486 .kr(1)
47487 .sr(1)
47488 .m(m)
47489 .n(n)
47490 .k(k)
47491 .iterations(1)
47492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47493 }
47494 }
47495 }
47496 }
47497
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2)47498 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
47499 for (uint32_t n = 4; n <= 6; n += 2) {
47500 for (size_t k = 1; k <= 5; k += 2) {
47501 GemmMicrokernelTester()
47502 .mr(2)
47503 .nr(2)
47504 .kr(1)
47505 .sr(1)
47506 .m(2)
47507 .n(n)
47508 .k(k)
47509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47510 }
47511 }
47512 }
47513
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_cn)47514 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
47515 for (uint32_t n = 4; n <= 6; n += 2) {
47516 for (size_t k = 1; k <= 5; k += 2) {
47517 GemmMicrokernelTester()
47518 .mr(2)
47519 .nr(2)
47520 .kr(1)
47521 .sr(1)
47522 .m(2)
47523 .n(n)
47524 .k(k)
47525 .cn_stride(5)
47526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47527 }
47528 }
47529 }
47530
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_subtile)47531 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
47532 for (uint32_t n = 4; n <= 6; n += 2) {
47533 for (size_t k = 1; k <= 5; k += 2) {
47534 for (uint32_t m = 1; m <= 2; m++) {
47535 GemmMicrokernelTester()
47536 .mr(2)
47537 .nr(2)
47538 .kr(1)
47539 .sr(1)
47540 .m(m)
47541 .n(n)
47542 .k(k)
47543 .iterations(1)
47544 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47545 }
47546 }
47547 }
47548 }
47549
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel)47550 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel) {
47551 for (size_t k = 1; k <= 5; k += 2) {
47552 GemmMicrokernelTester()
47553 .mr(2)
47554 .nr(2)
47555 .kr(1)
47556 .sr(1)
47557 .m(2)
47558 .n(2)
47559 .k(k)
47560 .ks(3)
47561 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47562 }
47563 }
47564
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel_subtile)47565 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel_subtile) {
47566 for (size_t k = 1; k <= 5; k += 2) {
47567 for (uint32_t n = 1; n <= 2; n++) {
47568 for (uint32_t m = 1; m <= 2; m++) {
47569 GemmMicrokernelTester()
47570 .mr(2)
47571 .nr(2)
47572 .kr(1)
47573 .sr(1)
47574 .m(m)
47575 .n(n)
47576 .k(k)
47577 .ks(3)
47578 .iterations(1)
47579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47580 }
47581 }
47582 }
47583 }
47584
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_small_kernel)47585 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
47586 for (uint32_t n = 3; n < 4; n++) {
47587 for (size_t k = 1; k <= 5; k += 2) {
47588 GemmMicrokernelTester()
47589 .mr(2)
47590 .nr(2)
47591 .kr(1)
47592 .sr(1)
47593 .m(2)
47594 .n(n)
47595 .k(k)
47596 .ks(3)
47597 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47598 }
47599 }
47600 }
47601
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_small_kernel)47602 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
47603 for (uint32_t n = 4; n <= 6; n += 2) {
47604 for (size_t k = 1; k <= 5; k += 2) {
47605 GemmMicrokernelTester()
47606 .mr(2)
47607 .nr(2)
47608 .kr(1)
47609 .sr(1)
47610 .m(2)
47611 .n(n)
47612 .k(k)
47613 .ks(3)
47614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47615 }
47616 }
47617 }
47618
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm_subtile)47619 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
47620 for (size_t k = 1; k <= 5; k += 2) {
47621 for (uint32_t n = 1; n <= 2; n++) {
47622 for (uint32_t m = 1; m <= 2; m++) {
47623 GemmMicrokernelTester()
47624 .mr(2)
47625 .nr(2)
47626 .kr(1)
47627 .sr(1)
47628 .m(m)
47629 .n(n)
47630 .k(k)
47631 .cm_stride(5)
47632 .iterations(1)
47633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47634 }
47635 }
47636 }
47637 }
47638
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,a_offset)47639 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, a_offset) {
47640 for (size_t k = 1; k <= 5; k += 2) {
47641 GemmMicrokernelTester()
47642 .mr(2)
47643 .nr(2)
47644 .kr(1)
47645 .sr(1)
47646 .m(2)
47647 .n(2)
47648 .k(k)
47649 .ks(3)
47650 .a_offset(13)
47651 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47652 }
47653 }
47654
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,zero)47655 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, zero) {
47656 for (size_t k = 1; k <= 5; k += 2) {
47657 for (uint32_t mz = 0; mz < 2; mz++) {
47658 GemmMicrokernelTester()
47659 .mr(2)
47660 .nr(2)
47661 .kr(1)
47662 .sr(1)
47663 .m(2)
47664 .n(2)
47665 .k(k)
47666 .ks(3)
47667 .a_offset(13)
47668 .zero_index(mz)
47669 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47670 }
47671 }
47672 }
47673
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmin)47674 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
47675 GemmMicrokernelTester()
47676 .mr(2)
47677 .nr(2)
47678 .kr(1)
47679 .sr(1)
47680 .m(2)
47681 .n(2)
47682 .k(1)
47683 .qmin(128)
47684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47685 }
47686
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmax)47687 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
47688 GemmMicrokernelTester()
47689 .mr(2)
47690 .nr(2)
47691 .kr(1)
47692 .sr(1)
47693 .m(2)
47694 .n(2)
47695 .k(1)
47696 .qmax(128)
47697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47698 }
47699
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm)47700 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
47701 GemmMicrokernelTester()
47702 .mr(2)
47703 .nr(2)
47704 .kr(1)
47705 .sr(1)
47706 .m(2)
47707 .n(2)
47708 .k(1)
47709 .cm_stride(5)
47710 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47711 }
47712
47713
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1)47714 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
47715 GemmMicrokernelTester()
47716 .mr(2)
47717 .nr(2)
47718 .kr(1)
47719 .sr(1)
47720 .m(2)
47721 .n(2)
47722 .k(1)
47723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47724 }
47725
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cn)47726 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
47727 GemmMicrokernelTester()
47728 .mr(2)
47729 .nr(2)
47730 .kr(1)
47731 .sr(1)
47732 .m(2)
47733 .n(2)
47734 .k(1)
47735 .cn_stride(5)
47736 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47737 }
47738
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile)47739 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
47740 for (uint32_t n = 1; n <= 2; n++) {
47741 for (uint32_t m = 1; m <= 2; m++) {
47742 GemmMicrokernelTester()
47743 .mr(2)
47744 .nr(2)
47745 .kr(1)
47746 .sr(1)
47747 .m(m)
47748 .n(n)
47749 .k(1)
47750 .iterations(1)
47751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47752 }
47753 }
47754 }
47755
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_m)47756 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
47757 for (uint32_t m = 1; m <= 2; m++) {
47758 GemmMicrokernelTester()
47759 .mr(2)
47760 .nr(2)
47761 .kr(1)
47762 .sr(1)
47763 .m(m)
47764 .n(2)
47765 .k(1)
47766 .iterations(1)
47767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47768 }
47769 }
47770
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_n)47771 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
47772 for (uint32_t n = 1; n <= 2; n++) {
47773 GemmMicrokernelTester()
47774 .mr(2)
47775 .nr(2)
47776 .kr(1)
47777 .sr(1)
47778 .m(2)
47779 .n(n)
47780 .k(1)
47781 .iterations(1)
47782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47783 }
47784 }
47785
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1)47786 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
47787 for (size_t k = 2; k < 10; k++) {
47788 GemmMicrokernelTester()
47789 .mr(2)
47790 .nr(2)
47791 .kr(1)
47792 .sr(1)
47793 .m(2)
47794 .n(2)
47795 .k(k)
47796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47797 }
47798 }
47799
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_subtile)47800 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
47801 for (size_t k = 2; k < 10; k++) {
47802 for (uint32_t n = 1; n <= 2; n++) {
47803 for (uint32_t m = 1; m <= 2; m++) {
47804 GemmMicrokernelTester()
47805 .mr(2)
47806 .nr(2)
47807 .kr(1)
47808 .sr(1)
47809 .m(m)
47810 .n(n)
47811 .k(k)
47812 .iterations(1)
47813 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47814 }
47815 }
47816 }
47817 }
47818
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2)47819 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
47820 for (uint32_t n = 3; n < 4; n++) {
47821 for (size_t k = 1; k <= 5; k += 2) {
47822 GemmMicrokernelTester()
47823 .mr(2)
47824 .nr(2)
47825 .kr(1)
47826 .sr(1)
47827 .m(2)
47828 .n(n)
47829 .k(k)
47830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47831 }
47832 }
47833 }
47834
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_cn)47835 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
47836 for (uint32_t n = 3; n < 4; n++) {
47837 for (size_t k = 1; k <= 5; k += 2) {
47838 GemmMicrokernelTester()
47839 .mr(2)
47840 .nr(2)
47841 .kr(1)
47842 .sr(1)
47843 .m(2)
47844 .n(n)
47845 .k(k)
47846 .cn_stride(5)
47847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47848 }
47849 }
47850 }
47851
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_subtile)47852 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
47853 for (uint32_t n = 3; n < 4; n++) {
47854 for (size_t k = 1; k <= 5; k += 2) {
47855 for (uint32_t m = 1; m <= 2; m++) {
47856 GemmMicrokernelTester()
47857 .mr(2)
47858 .nr(2)
47859 .kr(1)
47860 .sr(1)
47861 .m(m)
47862 .n(n)
47863 .k(k)
47864 .iterations(1)
47865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47866 }
47867 }
47868 }
47869 }
47870
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2)47871 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
47872 for (uint32_t n = 4; n <= 6; n += 2) {
47873 for (size_t k = 1; k <= 5; k += 2) {
47874 GemmMicrokernelTester()
47875 .mr(2)
47876 .nr(2)
47877 .kr(1)
47878 .sr(1)
47879 .m(2)
47880 .n(n)
47881 .k(k)
47882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47883 }
47884 }
47885 }
47886
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_cn)47887 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
47888 for (uint32_t n = 4; n <= 6; n += 2) {
47889 for (size_t k = 1; k <= 5; k += 2) {
47890 GemmMicrokernelTester()
47891 .mr(2)
47892 .nr(2)
47893 .kr(1)
47894 .sr(1)
47895 .m(2)
47896 .n(n)
47897 .k(k)
47898 .cn_stride(5)
47899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47900 }
47901 }
47902 }
47903
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_subtile)47904 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
47905 for (uint32_t n = 4; n <= 6; n += 2) {
47906 for (size_t k = 1; k <= 5; k += 2) {
47907 for (uint32_t m = 1; m <= 2; m++) {
47908 GemmMicrokernelTester()
47909 .mr(2)
47910 .nr(2)
47911 .kr(1)
47912 .sr(1)
47913 .m(m)
47914 .n(n)
47915 .k(k)
47916 .iterations(1)
47917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47918 }
47919 }
47920 }
47921 }
47922
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel)47923 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
47924 for (size_t k = 1; k <= 5; k += 2) {
47925 GemmMicrokernelTester()
47926 .mr(2)
47927 .nr(2)
47928 .kr(1)
47929 .sr(1)
47930 .m(2)
47931 .n(2)
47932 .k(k)
47933 .ks(3)
47934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47935 }
47936 }
47937
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel_subtile)47938 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
47939 for (size_t k = 1; k <= 5; k += 2) {
47940 for (uint32_t n = 1; n <= 2; n++) {
47941 for (uint32_t m = 1; m <= 2; m++) {
47942 GemmMicrokernelTester()
47943 .mr(2)
47944 .nr(2)
47945 .kr(1)
47946 .sr(1)
47947 .m(m)
47948 .n(n)
47949 .k(k)
47950 .ks(3)
47951 .iterations(1)
47952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47953 }
47954 }
47955 }
47956 }
47957
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_small_kernel)47958 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
47959 for (uint32_t n = 3; n < 4; n++) {
47960 for (size_t k = 1; k <= 5; k += 2) {
47961 GemmMicrokernelTester()
47962 .mr(2)
47963 .nr(2)
47964 .kr(1)
47965 .sr(1)
47966 .m(2)
47967 .n(n)
47968 .k(k)
47969 .ks(3)
47970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47971 }
47972 }
47973 }
47974
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_small_kernel)47975 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
47976 for (uint32_t n = 4; n <= 6; n += 2) {
47977 for (size_t k = 1; k <= 5; k += 2) {
47978 GemmMicrokernelTester()
47979 .mr(2)
47980 .nr(2)
47981 .kr(1)
47982 .sr(1)
47983 .m(2)
47984 .n(n)
47985 .k(k)
47986 .ks(3)
47987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47988 }
47989 }
47990 }
47991
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm_subtile)47992 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
47993 for (size_t k = 1; k <= 5; k += 2) {
47994 for (uint32_t n = 1; n <= 2; n++) {
47995 for (uint32_t m = 1; m <= 2; m++) {
47996 GemmMicrokernelTester()
47997 .mr(2)
47998 .nr(2)
47999 .kr(1)
48000 .sr(1)
48001 .m(m)
48002 .n(n)
48003 .k(k)
48004 .cm_stride(5)
48005 .iterations(1)
48006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48007 }
48008 }
48009 }
48010 }
48011
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,a_offset)48012 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
48013 for (size_t k = 1; k <= 5; k += 2) {
48014 GemmMicrokernelTester()
48015 .mr(2)
48016 .nr(2)
48017 .kr(1)
48018 .sr(1)
48019 .m(2)
48020 .n(2)
48021 .k(k)
48022 .ks(3)
48023 .a_offset(13)
48024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48025 }
48026 }
48027
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,zero)48028 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
48029 for (size_t k = 1; k <= 5; k += 2) {
48030 for (uint32_t mz = 0; mz < 2; mz++) {
48031 GemmMicrokernelTester()
48032 .mr(2)
48033 .nr(2)
48034 .kr(1)
48035 .sr(1)
48036 .m(2)
48037 .n(2)
48038 .k(k)
48039 .ks(3)
48040 .a_offset(13)
48041 .zero_index(mz)
48042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48043 }
48044 }
48045 }
48046
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmin)48047 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
48048 GemmMicrokernelTester()
48049 .mr(2)
48050 .nr(2)
48051 .kr(1)
48052 .sr(1)
48053 .m(2)
48054 .n(2)
48055 .k(1)
48056 .qmin(128)
48057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48058 }
48059
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmax)48060 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
48061 GemmMicrokernelTester()
48062 .mr(2)
48063 .nr(2)
48064 .kr(1)
48065 .sr(1)
48066 .m(2)
48067 .n(2)
48068 .k(1)
48069 .qmax(128)
48070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48071 }
48072
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm)48073 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
48074 GemmMicrokernelTester()
48075 .mr(2)
48076 .nr(2)
48077 .kr(1)
48078 .sr(1)
48079 .m(2)
48080 .n(2)
48081 .k(1)
48082 .cm_stride(5)
48083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48084 }
48085
48086
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1)48087 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
48088 GemmMicrokernelTester()
48089 .mr(2)
48090 .nr(4)
48091 .kr(1)
48092 .sr(1)
48093 .m(2)
48094 .n(4)
48095 .k(1)
48096 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48097 }
48098
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cn)48099 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
48100 GemmMicrokernelTester()
48101 .mr(2)
48102 .nr(4)
48103 .kr(1)
48104 .sr(1)
48105 .m(2)
48106 .n(4)
48107 .k(1)
48108 .cn_stride(7)
48109 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48110 }
48111
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile)48112 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
48113 for (uint32_t n = 1; n <= 4; n++) {
48114 for (uint32_t m = 1; m <= 2; m++) {
48115 GemmMicrokernelTester()
48116 .mr(2)
48117 .nr(4)
48118 .kr(1)
48119 .sr(1)
48120 .m(m)
48121 .n(n)
48122 .k(1)
48123 .iterations(1)
48124 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48125 }
48126 }
48127 }
48128
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_m)48129 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
48130 for (uint32_t m = 1; m <= 2; m++) {
48131 GemmMicrokernelTester()
48132 .mr(2)
48133 .nr(4)
48134 .kr(1)
48135 .sr(1)
48136 .m(m)
48137 .n(4)
48138 .k(1)
48139 .iterations(1)
48140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48141 }
48142 }
48143
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_n)48144 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
48145 for (uint32_t n = 1; n <= 4; n++) {
48146 GemmMicrokernelTester()
48147 .mr(2)
48148 .nr(4)
48149 .kr(1)
48150 .sr(1)
48151 .m(2)
48152 .n(n)
48153 .k(1)
48154 .iterations(1)
48155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48156 }
48157 }
48158
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1)48159 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
48160 for (size_t k = 2; k < 10; k++) {
48161 GemmMicrokernelTester()
48162 .mr(2)
48163 .nr(4)
48164 .kr(1)
48165 .sr(1)
48166 .m(2)
48167 .n(4)
48168 .k(k)
48169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48170 }
48171 }
48172
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_subtile)48173 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
48174 for (size_t k = 2; k < 10; k++) {
48175 for (uint32_t n = 1; n <= 4; n++) {
48176 for (uint32_t m = 1; m <= 2; m++) {
48177 GemmMicrokernelTester()
48178 .mr(2)
48179 .nr(4)
48180 .kr(1)
48181 .sr(1)
48182 .m(m)
48183 .n(n)
48184 .k(k)
48185 .iterations(1)
48186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48187 }
48188 }
48189 }
48190 }
48191
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4)48192 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
48193 for (uint32_t n = 5; n < 8; n++) {
48194 for (size_t k = 1; k <= 5; k += 2) {
48195 GemmMicrokernelTester()
48196 .mr(2)
48197 .nr(4)
48198 .kr(1)
48199 .sr(1)
48200 .m(2)
48201 .n(n)
48202 .k(k)
48203 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48204 }
48205 }
48206 }
48207
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_cn)48208 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
48209 for (uint32_t n = 5; n < 8; n++) {
48210 for (size_t k = 1; k <= 5; k += 2) {
48211 GemmMicrokernelTester()
48212 .mr(2)
48213 .nr(4)
48214 .kr(1)
48215 .sr(1)
48216 .m(2)
48217 .n(n)
48218 .k(k)
48219 .cn_stride(7)
48220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48221 }
48222 }
48223 }
48224
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_subtile)48225 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
48226 for (uint32_t n = 5; n < 8; n++) {
48227 for (size_t k = 1; k <= 5; k += 2) {
48228 for (uint32_t m = 1; m <= 2; m++) {
48229 GemmMicrokernelTester()
48230 .mr(2)
48231 .nr(4)
48232 .kr(1)
48233 .sr(1)
48234 .m(m)
48235 .n(n)
48236 .k(k)
48237 .iterations(1)
48238 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48239 }
48240 }
48241 }
48242 }
48243
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4)48244 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
48245 for (uint32_t n = 8; n <= 12; n += 4) {
48246 for (size_t k = 1; k <= 5; k += 2) {
48247 GemmMicrokernelTester()
48248 .mr(2)
48249 .nr(4)
48250 .kr(1)
48251 .sr(1)
48252 .m(2)
48253 .n(n)
48254 .k(k)
48255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48256 }
48257 }
48258 }
48259
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_cn)48260 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
48261 for (uint32_t n = 8; n <= 12; n += 4) {
48262 for (size_t k = 1; k <= 5; k += 2) {
48263 GemmMicrokernelTester()
48264 .mr(2)
48265 .nr(4)
48266 .kr(1)
48267 .sr(1)
48268 .m(2)
48269 .n(n)
48270 .k(k)
48271 .cn_stride(7)
48272 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48273 }
48274 }
48275 }
48276
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_subtile)48277 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
48278 for (uint32_t n = 8; n <= 12; n += 4) {
48279 for (size_t k = 1; k <= 5; k += 2) {
48280 for (uint32_t m = 1; m <= 2; m++) {
48281 GemmMicrokernelTester()
48282 .mr(2)
48283 .nr(4)
48284 .kr(1)
48285 .sr(1)
48286 .m(m)
48287 .n(n)
48288 .k(k)
48289 .iterations(1)
48290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48291 }
48292 }
48293 }
48294 }
48295
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel)48296 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel) {
48297 for (size_t k = 1; k <= 5; k += 2) {
48298 GemmMicrokernelTester()
48299 .mr(2)
48300 .nr(4)
48301 .kr(1)
48302 .sr(1)
48303 .m(2)
48304 .n(4)
48305 .k(k)
48306 .ks(3)
48307 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48308 }
48309 }
48310
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel_subtile)48311 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel_subtile) {
48312 for (size_t k = 1; k <= 5; k += 2) {
48313 for (uint32_t n = 1; n <= 4; n++) {
48314 for (uint32_t m = 1; m <= 2; m++) {
48315 GemmMicrokernelTester()
48316 .mr(2)
48317 .nr(4)
48318 .kr(1)
48319 .sr(1)
48320 .m(m)
48321 .n(n)
48322 .k(k)
48323 .ks(3)
48324 .iterations(1)
48325 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48326 }
48327 }
48328 }
48329 }
48330
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_small_kernel)48331 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
48332 for (uint32_t n = 5; n < 8; n++) {
48333 for (size_t k = 1; k <= 5; k += 2) {
48334 GemmMicrokernelTester()
48335 .mr(2)
48336 .nr(4)
48337 .kr(1)
48338 .sr(1)
48339 .m(2)
48340 .n(n)
48341 .k(k)
48342 .ks(3)
48343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48344 }
48345 }
48346 }
48347
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_small_kernel)48348 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
48349 for (uint32_t n = 8; n <= 12; n += 4) {
48350 for (size_t k = 1; k <= 5; k += 2) {
48351 GemmMicrokernelTester()
48352 .mr(2)
48353 .nr(4)
48354 .kr(1)
48355 .sr(1)
48356 .m(2)
48357 .n(n)
48358 .k(k)
48359 .ks(3)
48360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48361 }
48362 }
48363 }
48364
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm_subtile)48365 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
48366 for (size_t k = 1; k <= 5; k += 2) {
48367 for (uint32_t n = 1; n <= 4; n++) {
48368 for (uint32_t m = 1; m <= 2; m++) {
48369 GemmMicrokernelTester()
48370 .mr(2)
48371 .nr(4)
48372 .kr(1)
48373 .sr(1)
48374 .m(m)
48375 .n(n)
48376 .k(k)
48377 .cm_stride(7)
48378 .iterations(1)
48379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48380 }
48381 }
48382 }
48383 }
48384
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,a_offset)48385 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, a_offset) {
48386 for (size_t k = 1; k <= 5; k += 2) {
48387 GemmMicrokernelTester()
48388 .mr(2)
48389 .nr(4)
48390 .kr(1)
48391 .sr(1)
48392 .m(2)
48393 .n(4)
48394 .k(k)
48395 .ks(3)
48396 .a_offset(13)
48397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48398 }
48399 }
48400
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,zero)48401 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, zero) {
48402 for (size_t k = 1; k <= 5; k += 2) {
48403 for (uint32_t mz = 0; mz < 2; mz++) {
48404 GemmMicrokernelTester()
48405 .mr(2)
48406 .nr(4)
48407 .kr(1)
48408 .sr(1)
48409 .m(2)
48410 .n(4)
48411 .k(k)
48412 .ks(3)
48413 .a_offset(13)
48414 .zero_index(mz)
48415 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48416 }
48417 }
48418 }
48419
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmin)48420 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
48421 GemmMicrokernelTester()
48422 .mr(2)
48423 .nr(4)
48424 .kr(1)
48425 .sr(1)
48426 .m(2)
48427 .n(4)
48428 .k(1)
48429 .qmin(128)
48430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48431 }
48432
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmax)48433 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
48434 GemmMicrokernelTester()
48435 .mr(2)
48436 .nr(4)
48437 .kr(1)
48438 .sr(1)
48439 .m(2)
48440 .n(4)
48441 .k(1)
48442 .qmax(128)
48443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48444 }
48445
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm)48446 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
48447 GemmMicrokernelTester()
48448 .mr(2)
48449 .nr(4)
48450 .kr(1)
48451 .sr(1)
48452 .m(2)
48453 .n(4)
48454 .k(1)
48455 .cm_stride(7)
48456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48457 }
48458
48459
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1)48460 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
48461 GemmMicrokernelTester()
48462 .mr(2)
48463 .nr(4)
48464 .kr(1)
48465 .sr(1)
48466 .m(2)
48467 .n(4)
48468 .k(1)
48469 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48470 }
48471
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cn)48472 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
48473 GemmMicrokernelTester()
48474 .mr(2)
48475 .nr(4)
48476 .kr(1)
48477 .sr(1)
48478 .m(2)
48479 .n(4)
48480 .k(1)
48481 .cn_stride(7)
48482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48483 }
48484
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile)48485 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
48486 for (uint32_t n = 1; n <= 4; n++) {
48487 for (uint32_t m = 1; m <= 2; m++) {
48488 GemmMicrokernelTester()
48489 .mr(2)
48490 .nr(4)
48491 .kr(1)
48492 .sr(1)
48493 .m(m)
48494 .n(n)
48495 .k(1)
48496 .iterations(1)
48497 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48498 }
48499 }
48500 }
48501
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_m)48502 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
48503 for (uint32_t m = 1; m <= 2; m++) {
48504 GemmMicrokernelTester()
48505 .mr(2)
48506 .nr(4)
48507 .kr(1)
48508 .sr(1)
48509 .m(m)
48510 .n(4)
48511 .k(1)
48512 .iterations(1)
48513 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48514 }
48515 }
48516
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_n)48517 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
48518 for (uint32_t n = 1; n <= 4; n++) {
48519 GemmMicrokernelTester()
48520 .mr(2)
48521 .nr(4)
48522 .kr(1)
48523 .sr(1)
48524 .m(2)
48525 .n(n)
48526 .k(1)
48527 .iterations(1)
48528 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48529 }
48530 }
48531
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1)48532 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
48533 for (size_t k = 2; k < 10; k++) {
48534 GemmMicrokernelTester()
48535 .mr(2)
48536 .nr(4)
48537 .kr(1)
48538 .sr(1)
48539 .m(2)
48540 .n(4)
48541 .k(k)
48542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48543 }
48544 }
48545
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_subtile)48546 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
48547 for (size_t k = 2; k < 10; k++) {
48548 for (uint32_t n = 1; n <= 4; n++) {
48549 for (uint32_t m = 1; m <= 2; m++) {
48550 GemmMicrokernelTester()
48551 .mr(2)
48552 .nr(4)
48553 .kr(1)
48554 .sr(1)
48555 .m(m)
48556 .n(n)
48557 .k(k)
48558 .iterations(1)
48559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48560 }
48561 }
48562 }
48563 }
48564
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4)48565 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
48566 for (uint32_t n = 5; n < 8; n++) {
48567 for (size_t k = 1; k <= 5; k += 2) {
48568 GemmMicrokernelTester()
48569 .mr(2)
48570 .nr(4)
48571 .kr(1)
48572 .sr(1)
48573 .m(2)
48574 .n(n)
48575 .k(k)
48576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48577 }
48578 }
48579 }
48580
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_cn)48581 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
48582 for (uint32_t n = 5; n < 8; n++) {
48583 for (size_t k = 1; k <= 5; k += 2) {
48584 GemmMicrokernelTester()
48585 .mr(2)
48586 .nr(4)
48587 .kr(1)
48588 .sr(1)
48589 .m(2)
48590 .n(n)
48591 .k(k)
48592 .cn_stride(7)
48593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48594 }
48595 }
48596 }
48597
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_subtile)48598 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
48599 for (uint32_t n = 5; n < 8; n++) {
48600 for (size_t k = 1; k <= 5; k += 2) {
48601 for (uint32_t m = 1; m <= 2; m++) {
48602 GemmMicrokernelTester()
48603 .mr(2)
48604 .nr(4)
48605 .kr(1)
48606 .sr(1)
48607 .m(m)
48608 .n(n)
48609 .k(k)
48610 .iterations(1)
48611 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48612 }
48613 }
48614 }
48615 }
48616
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4)48617 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
48618 for (uint32_t n = 8; n <= 12; n += 4) {
48619 for (size_t k = 1; k <= 5; k += 2) {
48620 GemmMicrokernelTester()
48621 .mr(2)
48622 .nr(4)
48623 .kr(1)
48624 .sr(1)
48625 .m(2)
48626 .n(n)
48627 .k(k)
48628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48629 }
48630 }
48631 }
48632
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_cn)48633 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
48634 for (uint32_t n = 8; n <= 12; n += 4) {
48635 for (size_t k = 1; k <= 5; k += 2) {
48636 GemmMicrokernelTester()
48637 .mr(2)
48638 .nr(4)
48639 .kr(1)
48640 .sr(1)
48641 .m(2)
48642 .n(n)
48643 .k(k)
48644 .cn_stride(7)
48645 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48646 }
48647 }
48648 }
48649
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_subtile)48650 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
48651 for (uint32_t n = 8; n <= 12; n += 4) {
48652 for (size_t k = 1; k <= 5; k += 2) {
48653 for (uint32_t m = 1; m <= 2; m++) {
48654 GemmMicrokernelTester()
48655 .mr(2)
48656 .nr(4)
48657 .kr(1)
48658 .sr(1)
48659 .m(m)
48660 .n(n)
48661 .k(k)
48662 .iterations(1)
48663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48664 }
48665 }
48666 }
48667 }
48668
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel)48669 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
48670 for (size_t k = 1; k <= 5; k += 2) {
48671 GemmMicrokernelTester()
48672 .mr(2)
48673 .nr(4)
48674 .kr(1)
48675 .sr(1)
48676 .m(2)
48677 .n(4)
48678 .k(k)
48679 .ks(3)
48680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48681 }
48682 }
48683
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel_subtile)48684 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
48685 for (size_t k = 1; k <= 5; k += 2) {
48686 for (uint32_t n = 1; n <= 4; n++) {
48687 for (uint32_t m = 1; m <= 2; m++) {
48688 GemmMicrokernelTester()
48689 .mr(2)
48690 .nr(4)
48691 .kr(1)
48692 .sr(1)
48693 .m(m)
48694 .n(n)
48695 .k(k)
48696 .ks(3)
48697 .iterations(1)
48698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48699 }
48700 }
48701 }
48702 }
48703
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_small_kernel)48704 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
48705 for (uint32_t n = 5; n < 8; n++) {
48706 for (size_t k = 1; k <= 5; k += 2) {
48707 GemmMicrokernelTester()
48708 .mr(2)
48709 .nr(4)
48710 .kr(1)
48711 .sr(1)
48712 .m(2)
48713 .n(n)
48714 .k(k)
48715 .ks(3)
48716 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48717 }
48718 }
48719 }
48720
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_small_kernel)48721 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
48722 for (uint32_t n = 8; n <= 12; n += 4) {
48723 for (size_t k = 1; k <= 5; k += 2) {
48724 GemmMicrokernelTester()
48725 .mr(2)
48726 .nr(4)
48727 .kr(1)
48728 .sr(1)
48729 .m(2)
48730 .n(n)
48731 .k(k)
48732 .ks(3)
48733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48734 }
48735 }
48736 }
48737
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm_subtile)48738 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
48739 for (size_t k = 1; k <= 5; k += 2) {
48740 for (uint32_t n = 1; n <= 4; n++) {
48741 for (uint32_t m = 1; m <= 2; m++) {
48742 GemmMicrokernelTester()
48743 .mr(2)
48744 .nr(4)
48745 .kr(1)
48746 .sr(1)
48747 .m(m)
48748 .n(n)
48749 .k(k)
48750 .cm_stride(7)
48751 .iterations(1)
48752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48753 }
48754 }
48755 }
48756 }
48757
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,a_offset)48758 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
48759 for (size_t k = 1; k <= 5; k += 2) {
48760 GemmMicrokernelTester()
48761 .mr(2)
48762 .nr(4)
48763 .kr(1)
48764 .sr(1)
48765 .m(2)
48766 .n(4)
48767 .k(k)
48768 .ks(3)
48769 .a_offset(13)
48770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48771 }
48772 }
48773
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,zero)48774 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
48775 for (size_t k = 1; k <= 5; k += 2) {
48776 for (uint32_t mz = 0; mz < 2; mz++) {
48777 GemmMicrokernelTester()
48778 .mr(2)
48779 .nr(4)
48780 .kr(1)
48781 .sr(1)
48782 .m(2)
48783 .n(4)
48784 .k(k)
48785 .ks(3)
48786 .a_offset(13)
48787 .zero_index(mz)
48788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48789 }
48790 }
48791 }
48792
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmin)48793 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
48794 GemmMicrokernelTester()
48795 .mr(2)
48796 .nr(4)
48797 .kr(1)
48798 .sr(1)
48799 .m(2)
48800 .n(4)
48801 .k(1)
48802 .qmin(128)
48803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48804 }
48805
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmax)48806 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
48807 GemmMicrokernelTester()
48808 .mr(2)
48809 .nr(4)
48810 .kr(1)
48811 .sr(1)
48812 .m(2)
48813 .n(4)
48814 .k(1)
48815 .qmax(128)
48816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48817 }
48818
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm)48819 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
48820 GemmMicrokernelTester()
48821 .mr(2)
48822 .nr(4)
48823 .kr(1)
48824 .sr(1)
48825 .m(2)
48826 .n(4)
48827 .k(1)
48828 .cm_stride(7)
48829 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48830 }
48831
48832
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1)48833 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
48834 GemmMicrokernelTester()
48835 .mr(3)
48836 .nr(2)
48837 .kr(1)
48838 .sr(1)
48839 .m(3)
48840 .n(2)
48841 .k(1)
48842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48843 }
48844
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cn)48845 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
48846 GemmMicrokernelTester()
48847 .mr(3)
48848 .nr(2)
48849 .kr(1)
48850 .sr(1)
48851 .m(3)
48852 .n(2)
48853 .k(1)
48854 .cn_stride(5)
48855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48856 }
48857
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile)48858 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
48859 for (uint32_t n = 1; n <= 2; n++) {
48860 for (uint32_t m = 1; m <= 3; m++) {
48861 GemmMicrokernelTester()
48862 .mr(3)
48863 .nr(2)
48864 .kr(1)
48865 .sr(1)
48866 .m(m)
48867 .n(n)
48868 .k(1)
48869 .iterations(1)
48870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48871 }
48872 }
48873 }
48874
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_m)48875 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
48876 for (uint32_t m = 1; m <= 3; m++) {
48877 GemmMicrokernelTester()
48878 .mr(3)
48879 .nr(2)
48880 .kr(1)
48881 .sr(1)
48882 .m(m)
48883 .n(2)
48884 .k(1)
48885 .iterations(1)
48886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48887 }
48888 }
48889
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_n)48890 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
48891 for (uint32_t n = 1; n <= 2; n++) {
48892 GemmMicrokernelTester()
48893 .mr(3)
48894 .nr(2)
48895 .kr(1)
48896 .sr(1)
48897 .m(3)
48898 .n(n)
48899 .k(1)
48900 .iterations(1)
48901 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48902 }
48903 }
48904
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1)48905 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
48906 for (size_t k = 2; k < 10; k++) {
48907 GemmMicrokernelTester()
48908 .mr(3)
48909 .nr(2)
48910 .kr(1)
48911 .sr(1)
48912 .m(3)
48913 .n(2)
48914 .k(k)
48915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48916 }
48917 }
48918
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_subtile)48919 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
48920 for (size_t k = 2; k < 10; k++) {
48921 for (uint32_t n = 1; n <= 2; n++) {
48922 for (uint32_t m = 1; m <= 3; m++) {
48923 GemmMicrokernelTester()
48924 .mr(3)
48925 .nr(2)
48926 .kr(1)
48927 .sr(1)
48928 .m(m)
48929 .n(n)
48930 .k(k)
48931 .iterations(1)
48932 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48933 }
48934 }
48935 }
48936 }
48937
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2)48938 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
48939 for (uint32_t n = 3; n < 4; n++) {
48940 for (size_t k = 1; k <= 5; k += 2) {
48941 GemmMicrokernelTester()
48942 .mr(3)
48943 .nr(2)
48944 .kr(1)
48945 .sr(1)
48946 .m(3)
48947 .n(n)
48948 .k(k)
48949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48950 }
48951 }
48952 }
48953
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_cn)48954 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
48955 for (uint32_t n = 3; n < 4; n++) {
48956 for (size_t k = 1; k <= 5; k += 2) {
48957 GemmMicrokernelTester()
48958 .mr(3)
48959 .nr(2)
48960 .kr(1)
48961 .sr(1)
48962 .m(3)
48963 .n(n)
48964 .k(k)
48965 .cn_stride(5)
48966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48967 }
48968 }
48969 }
48970
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_subtile)48971 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
48972 for (uint32_t n = 3; n < 4; n++) {
48973 for (size_t k = 1; k <= 5; k += 2) {
48974 for (uint32_t m = 1; m <= 3; m++) {
48975 GemmMicrokernelTester()
48976 .mr(3)
48977 .nr(2)
48978 .kr(1)
48979 .sr(1)
48980 .m(m)
48981 .n(n)
48982 .k(k)
48983 .iterations(1)
48984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
48985 }
48986 }
48987 }
48988 }
48989
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2)48990 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
48991 for (uint32_t n = 4; n <= 6; n += 2) {
48992 for (size_t k = 1; k <= 5; k += 2) {
48993 GemmMicrokernelTester()
48994 .mr(3)
48995 .nr(2)
48996 .kr(1)
48997 .sr(1)
48998 .m(3)
48999 .n(n)
49000 .k(k)
49001 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49002 }
49003 }
49004 }
49005
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_cn)49006 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
49007 for (uint32_t n = 4; n <= 6; n += 2) {
49008 for (size_t k = 1; k <= 5; k += 2) {
49009 GemmMicrokernelTester()
49010 .mr(3)
49011 .nr(2)
49012 .kr(1)
49013 .sr(1)
49014 .m(3)
49015 .n(n)
49016 .k(k)
49017 .cn_stride(5)
49018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49019 }
49020 }
49021 }
49022
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_subtile)49023 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
49024 for (uint32_t n = 4; n <= 6; n += 2) {
49025 for (size_t k = 1; k <= 5; k += 2) {
49026 for (uint32_t m = 1; m <= 3; m++) {
49027 GemmMicrokernelTester()
49028 .mr(3)
49029 .nr(2)
49030 .kr(1)
49031 .sr(1)
49032 .m(m)
49033 .n(n)
49034 .k(k)
49035 .iterations(1)
49036 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49037 }
49038 }
49039 }
49040 }
49041
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel)49042 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
49043 for (size_t k = 1; k <= 5; k += 2) {
49044 GemmMicrokernelTester()
49045 .mr(3)
49046 .nr(2)
49047 .kr(1)
49048 .sr(1)
49049 .m(3)
49050 .n(2)
49051 .k(k)
49052 .ks(3)
49053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49054 }
49055 }
49056
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel_subtile)49057 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
49058 for (size_t k = 1; k <= 5; k += 2) {
49059 for (uint32_t n = 1; n <= 2; n++) {
49060 for (uint32_t m = 1; m <= 3; m++) {
49061 GemmMicrokernelTester()
49062 .mr(3)
49063 .nr(2)
49064 .kr(1)
49065 .sr(1)
49066 .m(m)
49067 .n(n)
49068 .k(k)
49069 .ks(3)
49070 .iterations(1)
49071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49072 }
49073 }
49074 }
49075 }
49076
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_small_kernel)49077 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
49078 for (uint32_t n = 3; n < 4; n++) {
49079 for (size_t k = 1; k <= 5; k += 2) {
49080 GemmMicrokernelTester()
49081 .mr(3)
49082 .nr(2)
49083 .kr(1)
49084 .sr(1)
49085 .m(3)
49086 .n(n)
49087 .k(k)
49088 .ks(3)
49089 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49090 }
49091 }
49092 }
49093
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_small_kernel)49094 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
49095 for (uint32_t n = 4; n <= 6; n += 2) {
49096 for (size_t k = 1; k <= 5; k += 2) {
49097 GemmMicrokernelTester()
49098 .mr(3)
49099 .nr(2)
49100 .kr(1)
49101 .sr(1)
49102 .m(3)
49103 .n(n)
49104 .k(k)
49105 .ks(3)
49106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49107 }
49108 }
49109 }
49110
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm_subtile)49111 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
49112 for (size_t k = 1; k <= 5; k += 2) {
49113 for (uint32_t n = 1; n <= 2; n++) {
49114 for (uint32_t m = 1; m <= 3; m++) {
49115 GemmMicrokernelTester()
49116 .mr(3)
49117 .nr(2)
49118 .kr(1)
49119 .sr(1)
49120 .m(m)
49121 .n(n)
49122 .k(k)
49123 .cm_stride(5)
49124 .iterations(1)
49125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49126 }
49127 }
49128 }
49129 }
49130
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,a_offset)49131 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
49132 for (size_t k = 1; k <= 5; k += 2) {
49133 GemmMicrokernelTester()
49134 .mr(3)
49135 .nr(2)
49136 .kr(1)
49137 .sr(1)
49138 .m(3)
49139 .n(2)
49140 .k(k)
49141 .ks(3)
49142 .a_offset(17)
49143 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49144 }
49145 }
49146
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,zero)49147 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
49148 for (size_t k = 1; k <= 5; k += 2) {
49149 for (uint32_t mz = 0; mz < 3; mz++) {
49150 GemmMicrokernelTester()
49151 .mr(3)
49152 .nr(2)
49153 .kr(1)
49154 .sr(1)
49155 .m(3)
49156 .n(2)
49157 .k(k)
49158 .ks(3)
49159 .a_offset(17)
49160 .zero_index(mz)
49161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49162 }
49163 }
49164 }
49165
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmin)49166 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
49167 GemmMicrokernelTester()
49168 .mr(3)
49169 .nr(2)
49170 .kr(1)
49171 .sr(1)
49172 .m(3)
49173 .n(2)
49174 .k(1)
49175 .qmin(128)
49176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49177 }
49178
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmax)49179 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
49180 GemmMicrokernelTester()
49181 .mr(3)
49182 .nr(2)
49183 .kr(1)
49184 .sr(1)
49185 .m(3)
49186 .n(2)
49187 .k(1)
49188 .qmax(128)
49189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49190 }
49191
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm)49192 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
49193 GemmMicrokernelTester()
49194 .mr(3)
49195 .nr(2)
49196 .kr(1)
49197 .sr(1)
49198 .m(3)
49199 .n(2)
49200 .k(1)
49201 .cm_stride(5)
49202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49203 }
49204
49205
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1)49206 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
49207 GemmMicrokernelTester()
49208 .mr(3)
49209 .nr(4)
49210 .kr(1)
49211 .sr(1)
49212 .m(3)
49213 .n(4)
49214 .k(1)
49215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49216 }
49217
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cn)49218 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
49219 GemmMicrokernelTester()
49220 .mr(3)
49221 .nr(4)
49222 .kr(1)
49223 .sr(1)
49224 .m(3)
49225 .n(4)
49226 .k(1)
49227 .cn_stride(7)
49228 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49229 }
49230
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile)49231 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
49232 for (uint32_t n = 1; n <= 4; n++) {
49233 for (uint32_t m = 1; m <= 3; m++) {
49234 GemmMicrokernelTester()
49235 .mr(3)
49236 .nr(4)
49237 .kr(1)
49238 .sr(1)
49239 .m(m)
49240 .n(n)
49241 .k(1)
49242 .iterations(1)
49243 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49244 }
49245 }
49246 }
49247
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_m)49248 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
49249 for (uint32_t m = 1; m <= 3; m++) {
49250 GemmMicrokernelTester()
49251 .mr(3)
49252 .nr(4)
49253 .kr(1)
49254 .sr(1)
49255 .m(m)
49256 .n(4)
49257 .k(1)
49258 .iterations(1)
49259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49260 }
49261 }
49262
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_n)49263 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
49264 for (uint32_t n = 1; n <= 4; n++) {
49265 GemmMicrokernelTester()
49266 .mr(3)
49267 .nr(4)
49268 .kr(1)
49269 .sr(1)
49270 .m(3)
49271 .n(n)
49272 .k(1)
49273 .iterations(1)
49274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49275 }
49276 }
49277
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1)49278 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
49279 for (size_t k = 2; k < 10; k++) {
49280 GemmMicrokernelTester()
49281 .mr(3)
49282 .nr(4)
49283 .kr(1)
49284 .sr(1)
49285 .m(3)
49286 .n(4)
49287 .k(k)
49288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49289 }
49290 }
49291
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_subtile)49292 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
49293 for (size_t k = 2; k < 10; k++) {
49294 for (uint32_t n = 1; n <= 4; n++) {
49295 for (uint32_t m = 1; m <= 3; m++) {
49296 GemmMicrokernelTester()
49297 .mr(3)
49298 .nr(4)
49299 .kr(1)
49300 .sr(1)
49301 .m(m)
49302 .n(n)
49303 .k(k)
49304 .iterations(1)
49305 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49306 }
49307 }
49308 }
49309 }
49310
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4)49311 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
49312 for (uint32_t n = 5; n < 8; n++) {
49313 for (size_t k = 1; k <= 5; k += 2) {
49314 GemmMicrokernelTester()
49315 .mr(3)
49316 .nr(4)
49317 .kr(1)
49318 .sr(1)
49319 .m(3)
49320 .n(n)
49321 .k(k)
49322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49323 }
49324 }
49325 }
49326
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_cn)49327 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
49328 for (uint32_t n = 5; n < 8; n++) {
49329 for (size_t k = 1; k <= 5; k += 2) {
49330 GemmMicrokernelTester()
49331 .mr(3)
49332 .nr(4)
49333 .kr(1)
49334 .sr(1)
49335 .m(3)
49336 .n(n)
49337 .k(k)
49338 .cn_stride(7)
49339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49340 }
49341 }
49342 }
49343
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_subtile)49344 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
49345 for (uint32_t n = 5; n < 8; n++) {
49346 for (size_t k = 1; k <= 5; k += 2) {
49347 for (uint32_t m = 1; m <= 3; m++) {
49348 GemmMicrokernelTester()
49349 .mr(3)
49350 .nr(4)
49351 .kr(1)
49352 .sr(1)
49353 .m(m)
49354 .n(n)
49355 .k(k)
49356 .iterations(1)
49357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49358 }
49359 }
49360 }
49361 }
49362
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4)49363 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
49364 for (uint32_t n = 8; n <= 12; n += 4) {
49365 for (size_t k = 1; k <= 5; k += 2) {
49366 GemmMicrokernelTester()
49367 .mr(3)
49368 .nr(4)
49369 .kr(1)
49370 .sr(1)
49371 .m(3)
49372 .n(n)
49373 .k(k)
49374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49375 }
49376 }
49377 }
49378
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_cn)49379 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
49380 for (uint32_t n = 8; n <= 12; n += 4) {
49381 for (size_t k = 1; k <= 5; k += 2) {
49382 GemmMicrokernelTester()
49383 .mr(3)
49384 .nr(4)
49385 .kr(1)
49386 .sr(1)
49387 .m(3)
49388 .n(n)
49389 .k(k)
49390 .cn_stride(7)
49391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49392 }
49393 }
49394 }
49395
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_subtile)49396 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
49397 for (uint32_t n = 8; n <= 12; n += 4) {
49398 for (size_t k = 1; k <= 5; k += 2) {
49399 for (uint32_t m = 1; m <= 3; m++) {
49400 GemmMicrokernelTester()
49401 .mr(3)
49402 .nr(4)
49403 .kr(1)
49404 .sr(1)
49405 .m(m)
49406 .n(n)
49407 .k(k)
49408 .iterations(1)
49409 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49410 }
49411 }
49412 }
49413 }
49414
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel)49415 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
49416 for (size_t k = 1; k <= 5; k += 2) {
49417 GemmMicrokernelTester()
49418 .mr(3)
49419 .nr(4)
49420 .kr(1)
49421 .sr(1)
49422 .m(3)
49423 .n(4)
49424 .k(k)
49425 .ks(3)
49426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49427 }
49428 }
49429
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel_subtile)49430 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
49431 for (size_t k = 1; k <= 5; k += 2) {
49432 for (uint32_t n = 1; n <= 4; n++) {
49433 for (uint32_t m = 1; m <= 3; m++) {
49434 GemmMicrokernelTester()
49435 .mr(3)
49436 .nr(4)
49437 .kr(1)
49438 .sr(1)
49439 .m(m)
49440 .n(n)
49441 .k(k)
49442 .ks(3)
49443 .iterations(1)
49444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49445 }
49446 }
49447 }
49448 }
49449
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_small_kernel)49450 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
49451 for (uint32_t n = 5; n < 8; n++) {
49452 for (size_t k = 1; k <= 5; k += 2) {
49453 GemmMicrokernelTester()
49454 .mr(3)
49455 .nr(4)
49456 .kr(1)
49457 .sr(1)
49458 .m(3)
49459 .n(n)
49460 .k(k)
49461 .ks(3)
49462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49463 }
49464 }
49465 }
49466
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_small_kernel)49467 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
49468 for (uint32_t n = 8; n <= 12; n += 4) {
49469 for (size_t k = 1; k <= 5; k += 2) {
49470 GemmMicrokernelTester()
49471 .mr(3)
49472 .nr(4)
49473 .kr(1)
49474 .sr(1)
49475 .m(3)
49476 .n(n)
49477 .k(k)
49478 .ks(3)
49479 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49480 }
49481 }
49482 }
49483
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm_subtile)49484 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
49485 for (size_t k = 1; k <= 5; k += 2) {
49486 for (uint32_t n = 1; n <= 4; n++) {
49487 for (uint32_t m = 1; m <= 3; m++) {
49488 GemmMicrokernelTester()
49489 .mr(3)
49490 .nr(4)
49491 .kr(1)
49492 .sr(1)
49493 .m(m)
49494 .n(n)
49495 .k(k)
49496 .cm_stride(7)
49497 .iterations(1)
49498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49499 }
49500 }
49501 }
49502 }
49503
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,a_offset)49504 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
49505 for (size_t k = 1; k <= 5; k += 2) {
49506 GemmMicrokernelTester()
49507 .mr(3)
49508 .nr(4)
49509 .kr(1)
49510 .sr(1)
49511 .m(3)
49512 .n(4)
49513 .k(k)
49514 .ks(3)
49515 .a_offset(17)
49516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49517 }
49518 }
49519
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,zero)49520 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
49521 for (size_t k = 1; k <= 5; k += 2) {
49522 for (uint32_t mz = 0; mz < 3; mz++) {
49523 GemmMicrokernelTester()
49524 .mr(3)
49525 .nr(4)
49526 .kr(1)
49527 .sr(1)
49528 .m(3)
49529 .n(4)
49530 .k(k)
49531 .ks(3)
49532 .a_offset(17)
49533 .zero_index(mz)
49534 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49535 }
49536 }
49537 }
49538
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmin)49539 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
49540 GemmMicrokernelTester()
49541 .mr(3)
49542 .nr(4)
49543 .kr(1)
49544 .sr(1)
49545 .m(3)
49546 .n(4)
49547 .k(1)
49548 .qmin(128)
49549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49550 }
49551
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmax)49552 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
49553 GemmMicrokernelTester()
49554 .mr(3)
49555 .nr(4)
49556 .kr(1)
49557 .sr(1)
49558 .m(3)
49559 .n(4)
49560 .k(1)
49561 .qmax(128)
49562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49563 }
49564
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm)49565 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
49566 GemmMicrokernelTester()
49567 .mr(3)
49568 .nr(4)
49569 .kr(1)
49570 .sr(1)
49571 .m(3)
49572 .n(4)
49573 .k(1)
49574 .cm_stride(7)
49575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49576 }
49577
49578
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1)49579 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
49580 GemmMicrokernelTester()
49581 .mr(4)
49582 .nr(2)
49583 .kr(1)
49584 .sr(1)
49585 .m(4)
49586 .n(2)
49587 .k(1)
49588 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49589 }
49590
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cn)49591 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
49592 GemmMicrokernelTester()
49593 .mr(4)
49594 .nr(2)
49595 .kr(1)
49596 .sr(1)
49597 .m(4)
49598 .n(2)
49599 .k(1)
49600 .cn_stride(5)
49601 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49602 }
49603
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile)49604 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
49605 for (uint32_t n = 1; n <= 2; n++) {
49606 for (uint32_t m = 1; m <= 4; m++) {
49607 GemmMicrokernelTester()
49608 .mr(4)
49609 .nr(2)
49610 .kr(1)
49611 .sr(1)
49612 .m(m)
49613 .n(n)
49614 .k(1)
49615 .iterations(1)
49616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49617 }
49618 }
49619 }
49620
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_m)49621 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
49622 for (uint32_t m = 1; m <= 4; m++) {
49623 GemmMicrokernelTester()
49624 .mr(4)
49625 .nr(2)
49626 .kr(1)
49627 .sr(1)
49628 .m(m)
49629 .n(2)
49630 .k(1)
49631 .iterations(1)
49632 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49633 }
49634 }
49635
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_n)49636 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
49637 for (uint32_t n = 1; n <= 2; n++) {
49638 GemmMicrokernelTester()
49639 .mr(4)
49640 .nr(2)
49641 .kr(1)
49642 .sr(1)
49643 .m(4)
49644 .n(n)
49645 .k(1)
49646 .iterations(1)
49647 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49648 }
49649 }
49650
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1)49651 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
49652 for (size_t k = 2; k < 10; k++) {
49653 GemmMicrokernelTester()
49654 .mr(4)
49655 .nr(2)
49656 .kr(1)
49657 .sr(1)
49658 .m(4)
49659 .n(2)
49660 .k(k)
49661 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49662 }
49663 }
49664
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_subtile)49665 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
49666 for (size_t k = 2; k < 10; k++) {
49667 for (uint32_t n = 1; n <= 2; n++) {
49668 for (uint32_t m = 1; m <= 4; m++) {
49669 GemmMicrokernelTester()
49670 .mr(4)
49671 .nr(2)
49672 .kr(1)
49673 .sr(1)
49674 .m(m)
49675 .n(n)
49676 .k(k)
49677 .iterations(1)
49678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49679 }
49680 }
49681 }
49682 }
49683
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2)49684 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
49685 for (uint32_t n = 3; n < 4; n++) {
49686 for (size_t k = 1; k <= 5; k += 2) {
49687 GemmMicrokernelTester()
49688 .mr(4)
49689 .nr(2)
49690 .kr(1)
49691 .sr(1)
49692 .m(4)
49693 .n(n)
49694 .k(k)
49695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49696 }
49697 }
49698 }
49699
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_cn)49700 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
49701 for (uint32_t n = 3; n < 4; n++) {
49702 for (size_t k = 1; k <= 5; k += 2) {
49703 GemmMicrokernelTester()
49704 .mr(4)
49705 .nr(2)
49706 .kr(1)
49707 .sr(1)
49708 .m(4)
49709 .n(n)
49710 .k(k)
49711 .cn_stride(5)
49712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49713 }
49714 }
49715 }
49716
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_subtile)49717 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
49718 for (uint32_t n = 3; n < 4; n++) {
49719 for (size_t k = 1; k <= 5; k += 2) {
49720 for (uint32_t m = 1; m <= 4; m++) {
49721 GemmMicrokernelTester()
49722 .mr(4)
49723 .nr(2)
49724 .kr(1)
49725 .sr(1)
49726 .m(m)
49727 .n(n)
49728 .k(k)
49729 .iterations(1)
49730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49731 }
49732 }
49733 }
49734 }
49735
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2)49736 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
49737 for (uint32_t n = 4; n <= 6; n += 2) {
49738 for (size_t k = 1; k <= 5; k += 2) {
49739 GemmMicrokernelTester()
49740 .mr(4)
49741 .nr(2)
49742 .kr(1)
49743 .sr(1)
49744 .m(4)
49745 .n(n)
49746 .k(k)
49747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49748 }
49749 }
49750 }
49751
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_cn)49752 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
49753 for (uint32_t n = 4; n <= 6; n += 2) {
49754 for (size_t k = 1; k <= 5; k += 2) {
49755 GemmMicrokernelTester()
49756 .mr(4)
49757 .nr(2)
49758 .kr(1)
49759 .sr(1)
49760 .m(4)
49761 .n(n)
49762 .k(k)
49763 .cn_stride(5)
49764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49765 }
49766 }
49767 }
49768
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_subtile)49769 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
49770 for (uint32_t n = 4; n <= 6; n += 2) {
49771 for (size_t k = 1; k <= 5; k += 2) {
49772 for (uint32_t m = 1; m <= 4; m++) {
49773 GemmMicrokernelTester()
49774 .mr(4)
49775 .nr(2)
49776 .kr(1)
49777 .sr(1)
49778 .m(m)
49779 .n(n)
49780 .k(k)
49781 .iterations(1)
49782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49783 }
49784 }
49785 }
49786 }
49787
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel)49788 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
49789 for (size_t k = 1; k <= 5; k += 2) {
49790 GemmMicrokernelTester()
49791 .mr(4)
49792 .nr(2)
49793 .kr(1)
49794 .sr(1)
49795 .m(4)
49796 .n(2)
49797 .k(k)
49798 .ks(3)
49799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49800 }
49801 }
49802
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel_subtile)49803 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
49804 for (size_t k = 1; k <= 5; k += 2) {
49805 for (uint32_t n = 1; n <= 2; n++) {
49806 for (uint32_t m = 1; m <= 4; m++) {
49807 GemmMicrokernelTester()
49808 .mr(4)
49809 .nr(2)
49810 .kr(1)
49811 .sr(1)
49812 .m(m)
49813 .n(n)
49814 .k(k)
49815 .ks(3)
49816 .iterations(1)
49817 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49818 }
49819 }
49820 }
49821 }
49822
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_small_kernel)49823 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
49824 for (uint32_t n = 3; n < 4; n++) {
49825 for (size_t k = 1; k <= 5; k += 2) {
49826 GemmMicrokernelTester()
49827 .mr(4)
49828 .nr(2)
49829 .kr(1)
49830 .sr(1)
49831 .m(4)
49832 .n(n)
49833 .k(k)
49834 .ks(3)
49835 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49836 }
49837 }
49838 }
49839
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_small_kernel)49840 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
49841 for (uint32_t n = 4; n <= 6; n += 2) {
49842 for (size_t k = 1; k <= 5; k += 2) {
49843 GemmMicrokernelTester()
49844 .mr(4)
49845 .nr(2)
49846 .kr(1)
49847 .sr(1)
49848 .m(4)
49849 .n(n)
49850 .k(k)
49851 .ks(3)
49852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49853 }
49854 }
49855 }
49856
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm_subtile)49857 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
49858 for (size_t k = 1; k <= 5; k += 2) {
49859 for (uint32_t n = 1; n <= 2; n++) {
49860 for (uint32_t m = 1; m <= 4; m++) {
49861 GemmMicrokernelTester()
49862 .mr(4)
49863 .nr(2)
49864 .kr(1)
49865 .sr(1)
49866 .m(m)
49867 .n(n)
49868 .k(k)
49869 .cm_stride(5)
49870 .iterations(1)
49871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49872 }
49873 }
49874 }
49875 }
49876
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,a_offset)49877 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
49878 for (size_t k = 1; k <= 5; k += 2) {
49879 GemmMicrokernelTester()
49880 .mr(4)
49881 .nr(2)
49882 .kr(1)
49883 .sr(1)
49884 .m(4)
49885 .n(2)
49886 .k(k)
49887 .ks(3)
49888 .a_offset(23)
49889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49890 }
49891 }
49892
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,zero)49893 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
49894 for (size_t k = 1; k <= 5; k += 2) {
49895 for (uint32_t mz = 0; mz < 4; mz++) {
49896 GemmMicrokernelTester()
49897 .mr(4)
49898 .nr(2)
49899 .kr(1)
49900 .sr(1)
49901 .m(4)
49902 .n(2)
49903 .k(k)
49904 .ks(3)
49905 .a_offset(23)
49906 .zero_index(mz)
49907 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49908 }
49909 }
49910 }
49911
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmin)49912 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
49913 GemmMicrokernelTester()
49914 .mr(4)
49915 .nr(2)
49916 .kr(1)
49917 .sr(1)
49918 .m(4)
49919 .n(2)
49920 .k(1)
49921 .qmin(128)
49922 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49923 }
49924
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmax)49925 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
49926 GemmMicrokernelTester()
49927 .mr(4)
49928 .nr(2)
49929 .kr(1)
49930 .sr(1)
49931 .m(4)
49932 .n(2)
49933 .k(1)
49934 .qmax(128)
49935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49936 }
49937
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm)49938 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
49939 GemmMicrokernelTester()
49940 .mr(4)
49941 .nr(2)
49942 .kr(1)
49943 .sr(1)
49944 .m(4)
49945 .n(2)
49946 .k(1)
49947 .cm_stride(5)
49948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49949 }
49950
49951
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1)49952 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
49953 GemmMicrokernelTester()
49954 .mr(4)
49955 .nr(4)
49956 .kr(1)
49957 .sr(1)
49958 .m(4)
49959 .n(4)
49960 .k(1)
49961 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49962 }
49963
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cn)49964 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
49965 GemmMicrokernelTester()
49966 .mr(4)
49967 .nr(4)
49968 .kr(1)
49969 .sr(1)
49970 .m(4)
49971 .n(4)
49972 .k(1)
49973 .cn_stride(7)
49974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49975 }
49976
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile)49977 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
49978 for (uint32_t n = 1; n <= 4; n++) {
49979 for (uint32_t m = 1; m <= 4; m++) {
49980 GemmMicrokernelTester()
49981 .mr(4)
49982 .nr(4)
49983 .kr(1)
49984 .sr(1)
49985 .m(m)
49986 .n(n)
49987 .k(1)
49988 .iterations(1)
49989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
49990 }
49991 }
49992 }
49993
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_m)49994 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
49995 for (uint32_t m = 1; m <= 4; m++) {
49996 GemmMicrokernelTester()
49997 .mr(4)
49998 .nr(4)
49999 .kr(1)
50000 .sr(1)
50001 .m(m)
50002 .n(4)
50003 .k(1)
50004 .iterations(1)
50005 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50006 }
50007 }
50008
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_n)50009 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
50010 for (uint32_t n = 1; n <= 4; n++) {
50011 GemmMicrokernelTester()
50012 .mr(4)
50013 .nr(4)
50014 .kr(1)
50015 .sr(1)
50016 .m(4)
50017 .n(n)
50018 .k(1)
50019 .iterations(1)
50020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50021 }
50022 }
50023
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1)50024 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
50025 for (size_t k = 2; k < 10; k++) {
50026 GemmMicrokernelTester()
50027 .mr(4)
50028 .nr(4)
50029 .kr(1)
50030 .sr(1)
50031 .m(4)
50032 .n(4)
50033 .k(k)
50034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50035 }
50036 }
50037
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_subtile)50038 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
50039 for (size_t k = 2; k < 10; k++) {
50040 for (uint32_t n = 1; n <= 4; n++) {
50041 for (uint32_t m = 1; m <= 4; m++) {
50042 GemmMicrokernelTester()
50043 .mr(4)
50044 .nr(4)
50045 .kr(1)
50046 .sr(1)
50047 .m(m)
50048 .n(n)
50049 .k(k)
50050 .iterations(1)
50051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50052 }
50053 }
50054 }
50055 }
50056
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4)50057 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
50058 for (uint32_t n = 5; n < 8; n++) {
50059 for (size_t k = 1; k <= 5; k += 2) {
50060 GemmMicrokernelTester()
50061 .mr(4)
50062 .nr(4)
50063 .kr(1)
50064 .sr(1)
50065 .m(4)
50066 .n(n)
50067 .k(k)
50068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50069 }
50070 }
50071 }
50072
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_cn)50073 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
50074 for (uint32_t n = 5; n < 8; n++) {
50075 for (size_t k = 1; k <= 5; k += 2) {
50076 GemmMicrokernelTester()
50077 .mr(4)
50078 .nr(4)
50079 .kr(1)
50080 .sr(1)
50081 .m(4)
50082 .n(n)
50083 .k(k)
50084 .cn_stride(7)
50085 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50086 }
50087 }
50088 }
50089
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_subtile)50090 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
50091 for (uint32_t n = 5; n < 8; n++) {
50092 for (size_t k = 1; k <= 5; k += 2) {
50093 for (uint32_t m = 1; m <= 4; m++) {
50094 GemmMicrokernelTester()
50095 .mr(4)
50096 .nr(4)
50097 .kr(1)
50098 .sr(1)
50099 .m(m)
50100 .n(n)
50101 .k(k)
50102 .iterations(1)
50103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50104 }
50105 }
50106 }
50107 }
50108
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4)50109 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
50110 for (uint32_t n = 8; n <= 12; n += 4) {
50111 for (size_t k = 1; k <= 5; k += 2) {
50112 GemmMicrokernelTester()
50113 .mr(4)
50114 .nr(4)
50115 .kr(1)
50116 .sr(1)
50117 .m(4)
50118 .n(n)
50119 .k(k)
50120 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50121 }
50122 }
50123 }
50124
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_cn)50125 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
50126 for (uint32_t n = 8; n <= 12; n += 4) {
50127 for (size_t k = 1; k <= 5; k += 2) {
50128 GemmMicrokernelTester()
50129 .mr(4)
50130 .nr(4)
50131 .kr(1)
50132 .sr(1)
50133 .m(4)
50134 .n(n)
50135 .k(k)
50136 .cn_stride(7)
50137 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50138 }
50139 }
50140 }
50141
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_subtile)50142 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
50143 for (uint32_t n = 8; n <= 12; n += 4) {
50144 for (size_t k = 1; k <= 5; k += 2) {
50145 for (uint32_t m = 1; m <= 4; m++) {
50146 GemmMicrokernelTester()
50147 .mr(4)
50148 .nr(4)
50149 .kr(1)
50150 .sr(1)
50151 .m(m)
50152 .n(n)
50153 .k(k)
50154 .iterations(1)
50155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50156 }
50157 }
50158 }
50159 }
50160
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel)50161 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
50162 for (size_t k = 1; k <= 5; k += 2) {
50163 GemmMicrokernelTester()
50164 .mr(4)
50165 .nr(4)
50166 .kr(1)
50167 .sr(1)
50168 .m(4)
50169 .n(4)
50170 .k(k)
50171 .ks(3)
50172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50173 }
50174 }
50175
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel_subtile)50176 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
50177 for (size_t k = 1; k <= 5; k += 2) {
50178 for (uint32_t n = 1; n <= 4; n++) {
50179 for (uint32_t m = 1; m <= 4; m++) {
50180 GemmMicrokernelTester()
50181 .mr(4)
50182 .nr(4)
50183 .kr(1)
50184 .sr(1)
50185 .m(m)
50186 .n(n)
50187 .k(k)
50188 .ks(3)
50189 .iterations(1)
50190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50191 }
50192 }
50193 }
50194 }
50195
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_small_kernel)50196 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
50197 for (uint32_t n = 5; n < 8; n++) {
50198 for (size_t k = 1; k <= 5; k += 2) {
50199 GemmMicrokernelTester()
50200 .mr(4)
50201 .nr(4)
50202 .kr(1)
50203 .sr(1)
50204 .m(4)
50205 .n(n)
50206 .k(k)
50207 .ks(3)
50208 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50209 }
50210 }
50211 }
50212
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_small_kernel)50213 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
50214 for (uint32_t n = 8; n <= 12; n += 4) {
50215 for (size_t k = 1; k <= 5; k += 2) {
50216 GemmMicrokernelTester()
50217 .mr(4)
50218 .nr(4)
50219 .kr(1)
50220 .sr(1)
50221 .m(4)
50222 .n(n)
50223 .k(k)
50224 .ks(3)
50225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50226 }
50227 }
50228 }
50229
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm_subtile)50230 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
50231 for (size_t k = 1; k <= 5; k += 2) {
50232 for (uint32_t n = 1; n <= 4; n++) {
50233 for (uint32_t m = 1; m <= 4; m++) {
50234 GemmMicrokernelTester()
50235 .mr(4)
50236 .nr(4)
50237 .kr(1)
50238 .sr(1)
50239 .m(m)
50240 .n(n)
50241 .k(k)
50242 .cm_stride(7)
50243 .iterations(1)
50244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50245 }
50246 }
50247 }
50248 }
50249
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,a_offset)50250 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
50251 for (size_t k = 1; k <= 5; k += 2) {
50252 GemmMicrokernelTester()
50253 .mr(4)
50254 .nr(4)
50255 .kr(1)
50256 .sr(1)
50257 .m(4)
50258 .n(4)
50259 .k(k)
50260 .ks(3)
50261 .a_offset(23)
50262 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50263 }
50264 }
50265
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,zero)50266 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
50267 for (size_t k = 1; k <= 5; k += 2) {
50268 for (uint32_t mz = 0; mz < 4; mz++) {
50269 GemmMicrokernelTester()
50270 .mr(4)
50271 .nr(4)
50272 .kr(1)
50273 .sr(1)
50274 .m(4)
50275 .n(4)
50276 .k(k)
50277 .ks(3)
50278 .a_offset(23)
50279 .zero_index(mz)
50280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50281 }
50282 }
50283 }
50284
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmin)50285 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
50286 GemmMicrokernelTester()
50287 .mr(4)
50288 .nr(4)
50289 .kr(1)
50290 .sr(1)
50291 .m(4)
50292 .n(4)
50293 .k(1)
50294 .qmin(128)
50295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50296 }
50297
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmax)50298 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
50299 GemmMicrokernelTester()
50300 .mr(4)
50301 .nr(4)
50302 .kr(1)
50303 .sr(1)
50304 .m(4)
50305 .n(4)
50306 .k(1)
50307 .qmax(128)
50308 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50309 }
50310
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm)50311 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
50312 GemmMicrokernelTester()
50313 .mr(4)
50314 .nr(4)
50315 .kr(1)
50316 .sr(1)
50317 .m(4)
50318 .n(4)
50319 .k(1)
50320 .cm_stride(7)
50321 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
50322 }
50323