1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-ppmm-minmax.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1)28 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1) {
29 TEST_REQUIRES_ARM_NEON;
30 GemmMicrokernelTester()
31 .mr(4)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(4)
36 .n(8)
37 .k(1)
38 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
39 }
40
TEST(F32_PPMM_MINMAX_4X8__NEON,strided_cn)41 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cn) {
42 TEST_REQUIRES_ARM_NEON;
43 GemmMicrokernelTester()
44 .mr(4)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(4)
49 .n(8)
50 .k(1)
51 .cn_stride(11)
52 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
53 }
54
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_strided_a)55 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_strided_a) {
56 TEST_REQUIRES_ARM_NEON;
57 GemmMicrokernelTester()
58 .mr(4)
59 .nr(8)
60 .kr(1)
61 .sr(1)
62 .m(4)
63 .n(8)
64 .k(1)
65 .a_stride(3)
66 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
67 }
68
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_subtile)69 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile) {
70 TEST_REQUIRES_ARM_NEON;
71 for (uint32_t n = 1; n <= 8; n++) {
72 for (uint32_t m = 1; m <= 4; m++) {
73 GemmMicrokernelTester()
74 .mr(4)
75 .nr(8)
76 .kr(1)
77 .sr(1)
78 .m(m)
79 .n(n)
80 .k(1)
81 .iterations(1)
82 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
83 }
84 }
85 }
86
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_subtile_m)87 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_m) {
88 TEST_REQUIRES_ARM_NEON;
89 for (uint32_t m = 1; m <= 4; m++) {
90 GemmMicrokernelTester()
91 .mr(4)
92 .nr(8)
93 .kr(1)
94 .sr(1)
95 .m(m)
96 .n(8)
97 .k(1)
98 .iterations(1)
99 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
100 }
101 }
102
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_subtile_n)103 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_n) {
104 TEST_REQUIRES_ARM_NEON;
105 for (uint32_t n = 1; n <= 8; n++) {
106 GemmMicrokernelTester()
107 .mr(4)
108 .nr(8)
109 .kr(1)
110 .sr(1)
111 .m(4)
112 .n(n)
113 .k(1)
114 .iterations(1)
115 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
116 }
117 }
118
TEST(F32_PPMM_MINMAX_4X8__NEON,k_gt_1)119 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1) {
120 TEST_REQUIRES_ARM_NEON;
121 for (size_t k = 2; k < 10; k++) {
122 GemmMicrokernelTester()
123 .mr(4)
124 .nr(8)
125 .kr(1)
126 .sr(1)
127 .m(4)
128 .n(8)
129 .k(k)
130 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
131 }
132 }
133
TEST(F32_PPMM_MINMAX_4X8__NEON,k_gt_1_subtile)134 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1_subtile) {
135 TEST_REQUIRES_ARM_NEON;
136 for (size_t k = 2; k < 10; k++) {
137 for (uint32_t n = 1; n <= 8; n++) {
138 for (uint32_t m = 1; m <= 4; m++) {
139 GemmMicrokernelTester()
140 .mr(4)
141 .nr(8)
142 .kr(1)
143 .sr(1)
144 .m(m)
145 .n(n)
146 .k(k)
147 .iterations(1)
148 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
149 }
150 }
151 }
152 }
153
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8)154 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8) {
155 TEST_REQUIRES_ARM_NEON;
156 for (uint32_t n = 9; n < 16; n++) {
157 for (size_t k = 1; k <= 5; k += 2) {
158 GemmMicrokernelTester()
159 .mr(4)
160 .nr(8)
161 .kr(1)
162 .sr(1)
163 .m(4)
164 .n(n)
165 .k(k)
166 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
167 }
168 }
169 }
170
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8_strided_cn)171 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_cn) {
172 TEST_REQUIRES_ARM_NEON;
173 for (uint32_t n = 9; n < 16; n++) {
174 for (size_t k = 1; k <= 5; k += 2) {
175 GemmMicrokernelTester()
176 .mr(4)
177 .nr(8)
178 .kr(1)
179 .sr(1)
180 .m(4)
181 .n(n)
182 .k(k)
183 .cn_stride(11)
184 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
185 }
186 }
187 }
188
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8_strided_a)189 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_a) {
190 TEST_REQUIRES_ARM_NEON;
191 for (uint32_t n = 9; n < 16; n++) {
192 for (size_t k = 1; k <= 5; k += 2) {
193 GemmMicrokernelTester()
194 .mr(4)
195 .nr(8)
196 .kr(1)
197 .sr(1)
198 .m(4)
199 .n(n)
200 .k(k)
201 .a_stride(7)
202 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
203 }
204 }
205 }
206
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8_subtile)207 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_subtile) {
208 TEST_REQUIRES_ARM_NEON;
209 for (uint32_t n = 9; n < 16; n++) {
210 for (size_t k = 1; k <= 5; k += 2) {
211 for (uint32_t m = 1; m <= 4; m++) {
212 GemmMicrokernelTester()
213 .mr(4)
214 .nr(8)
215 .kr(1)
216 .sr(1)
217 .m(m)
218 .n(n)
219 .k(k)
220 .iterations(1)
221 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
222 }
223 }
224 }
225 }
226
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8)227 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8) {
228 TEST_REQUIRES_ARM_NEON;
229 for (uint32_t n = 16; n <= 24; n += 8) {
230 for (size_t k = 1; k <= 5; k += 2) {
231 GemmMicrokernelTester()
232 .mr(4)
233 .nr(8)
234 .kr(1)
235 .sr(1)
236 .m(4)
237 .n(n)
238 .k(k)
239 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
240 }
241 }
242 }
243
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8_strided_cn)244 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_cn) {
245 TEST_REQUIRES_ARM_NEON;
246 for (uint32_t n = 16; n <= 24; n += 8) {
247 for (size_t k = 1; k <= 5; k += 2) {
248 GemmMicrokernelTester()
249 .mr(4)
250 .nr(8)
251 .kr(1)
252 .sr(1)
253 .m(4)
254 .n(n)
255 .k(k)
256 .cn_stride(11)
257 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
258 }
259 }
260 }
261
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8_strided_a)262 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_a) {
263 TEST_REQUIRES_ARM_NEON;
264 for (uint32_t n = 16; n <= 24; n += 8) {
265 for (size_t k = 1; k <= 5; k += 2) {
266 GemmMicrokernelTester()
267 .mr(4)
268 .nr(8)
269 .kr(1)
270 .sr(1)
271 .m(4)
272 .n(n)
273 .k(k)
274 .a_stride(7)
275 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
276 }
277 }
278 }
279
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8_subtile)280 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_subtile) {
281 TEST_REQUIRES_ARM_NEON;
282 for (uint32_t n = 16; n <= 24; n += 8) {
283 for (size_t k = 1; k <= 5; k += 2) {
284 for (uint32_t m = 1; m <= 4; m++) {
285 GemmMicrokernelTester()
286 .mr(4)
287 .nr(8)
288 .kr(1)
289 .sr(1)
290 .m(m)
291 .n(n)
292 .k(k)
293 .iterations(1)
294 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
295 }
296 }
297 }
298 }
299
TEST(F32_PPMM_MINMAX_4X8__NEON,strided_cm_subtile)300 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm_subtile) {
301 TEST_REQUIRES_ARM_NEON;
302 for (size_t k = 1; k <= 5; k += 2) {
303 for (uint32_t n = 1; n <= 8; n++) {
304 for (uint32_t m = 1; m <= 4; m++) {
305 GemmMicrokernelTester()
306 .mr(4)
307 .nr(8)
308 .kr(1)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .cm_stride(11)
314 .iterations(1)
315 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
316 }
317 }
318 }
319 }
320
TEST(F32_PPMM_MINMAX_4X8__NEON,qmin)321 TEST(F32_PPMM_MINMAX_4X8__NEON, qmin) {
322 TEST_REQUIRES_ARM_NEON;
323 GemmMicrokernelTester()
324 .mr(4)
325 .nr(8)
326 .kr(1)
327 .sr(1)
328 .m(4)
329 .n(8)
330 .k(1)
331 .qmin(128)
332 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
333 }
334
TEST(F32_PPMM_MINMAX_4X8__NEON,qmax)335 TEST(F32_PPMM_MINMAX_4X8__NEON, qmax) {
336 TEST_REQUIRES_ARM_NEON;
337 GemmMicrokernelTester()
338 .mr(4)
339 .nr(8)
340 .kr(1)
341 .sr(1)
342 .m(4)
343 .n(8)
344 .k(1)
345 .qmax(128)
346 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
347 }
348
TEST(F32_PPMM_MINMAX_4X8__NEON,strided_cm)349 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm) {
350 TEST_REQUIRES_ARM_NEON;
351 GemmMicrokernelTester()
352 .mr(4)
353 .nr(8)
354 .kr(1)
355 .sr(1)
356 .m(4)
357 .n(8)
358 .k(1)
359 .cm_stride(11)
360 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
361 }
362 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
363
364
365 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1)366 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1) {
367 TEST_REQUIRES_ARM_NEON_FMA;
368 GemmMicrokernelTester()
369 .mr(4)
370 .nr(8)
371 .kr(1)
372 .sr(1)
373 .m(4)
374 .n(8)
375 .k(1)
376 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
377 }
378
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,strided_cn)379 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cn) {
380 TEST_REQUIRES_ARM_NEON_FMA;
381 GemmMicrokernelTester()
382 .mr(4)
383 .nr(8)
384 .kr(1)
385 .sr(1)
386 .m(4)
387 .n(8)
388 .k(1)
389 .cn_stride(11)
390 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
391 }
392
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_strided_a)393 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_strided_a) {
394 TEST_REQUIRES_ARM_NEON_FMA;
395 GemmMicrokernelTester()
396 .mr(4)
397 .nr(8)
398 .kr(1)
399 .sr(1)
400 .m(4)
401 .n(8)
402 .k(1)
403 .a_stride(3)
404 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
405 }
406
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_subtile)407 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile) {
408 TEST_REQUIRES_ARM_NEON_FMA;
409 for (uint32_t n = 1; n <= 8; n++) {
410 for (uint32_t m = 1; m <= 4; m++) {
411 GemmMicrokernelTester()
412 .mr(4)
413 .nr(8)
414 .kr(1)
415 .sr(1)
416 .m(m)
417 .n(n)
418 .k(1)
419 .iterations(1)
420 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
421 }
422 }
423 }
424
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_subtile_m)425 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_m) {
426 TEST_REQUIRES_ARM_NEON_FMA;
427 for (uint32_t m = 1; m <= 4; m++) {
428 GemmMicrokernelTester()
429 .mr(4)
430 .nr(8)
431 .kr(1)
432 .sr(1)
433 .m(m)
434 .n(8)
435 .k(1)
436 .iterations(1)
437 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
438 }
439 }
440
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_subtile_n)441 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_n) {
442 TEST_REQUIRES_ARM_NEON_FMA;
443 for (uint32_t n = 1; n <= 8; n++) {
444 GemmMicrokernelTester()
445 .mr(4)
446 .nr(8)
447 .kr(1)
448 .sr(1)
449 .m(4)
450 .n(n)
451 .k(1)
452 .iterations(1)
453 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
454 }
455 }
456
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_gt_1)457 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1) {
458 TEST_REQUIRES_ARM_NEON_FMA;
459 for (size_t k = 2; k < 10; k++) {
460 GemmMicrokernelTester()
461 .mr(4)
462 .nr(8)
463 .kr(1)
464 .sr(1)
465 .m(4)
466 .n(8)
467 .k(k)
468 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
469 }
470 }
471
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_gt_1_subtile)472 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1_subtile) {
473 TEST_REQUIRES_ARM_NEON_FMA;
474 for (size_t k = 2; k < 10; k++) {
475 for (uint32_t n = 1; n <= 8; n++) {
476 for (uint32_t m = 1; m <= 4; m++) {
477 GemmMicrokernelTester()
478 .mr(4)
479 .nr(8)
480 .kr(1)
481 .sr(1)
482 .m(m)
483 .n(n)
484 .k(k)
485 .iterations(1)
486 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
487 }
488 }
489 }
490 }
491
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8)492 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8) {
493 TEST_REQUIRES_ARM_NEON_FMA;
494 for (uint32_t n = 9; n < 16; n++) {
495 for (size_t k = 1; k <= 5; k += 2) {
496 GemmMicrokernelTester()
497 .mr(4)
498 .nr(8)
499 .kr(1)
500 .sr(1)
501 .m(4)
502 .n(n)
503 .k(k)
504 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
505 }
506 }
507 }
508
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8_strided_cn)509 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_cn) {
510 TEST_REQUIRES_ARM_NEON_FMA;
511 for (uint32_t n = 9; n < 16; n++) {
512 for (size_t k = 1; k <= 5; k += 2) {
513 GemmMicrokernelTester()
514 .mr(4)
515 .nr(8)
516 .kr(1)
517 .sr(1)
518 .m(4)
519 .n(n)
520 .k(k)
521 .cn_stride(11)
522 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
523 }
524 }
525 }
526
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8_strided_a)527 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_a) {
528 TEST_REQUIRES_ARM_NEON_FMA;
529 for (uint32_t n = 9; n < 16; n++) {
530 for (size_t k = 1; k <= 5; k += 2) {
531 GemmMicrokernelTester()
532 .mr(4)
533 .nr(8)
534 .kr(1)
535 .sr(1)
536 .m(4)
537 .n(n)
538 .k(k)
539 .a_stride(7)
540 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
541 }
542 }
543 }
544
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8_subtile)545 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_subtile) {
546 TEST_REQUIRES_ARM_NEON_FMA;
547 for (uint32_t n = 9; n < 16; n++) {
548 for (size_t k = 1; k <= 5; k += 2) {
549 for (uint32_t m = 1; m <= 4; m++) {
550 GemmMicrokernelTester()
551 .mr(4)
552 .nr(8)
553 .kr(1)
554 .sr(1)
555 .m(m)
556 .n(n)
557 .k(k)
558 .iterations(1)
559 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
560 }
561 }
562 }
563 }
564
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8)565 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8) {
566 TEST_REQUIRES_ARM_NEON_FMA;
567 for (uint32_t n = 16; n <= 24; n += 8) {
568 for (size_t k = 1; k <= 5; k += 2) {
569 GemmMicrokernelTester()
570 .mr(4)
571 .nr(8)
572 .kr(1)
573 .sr(1)
574 .m(4)
575 .n(n)
576 .k(k)
577 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
578 }
579 }
580 }
581
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8_strided_cn)582 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_cn) {
583 TEST_REQUIRES_ARM_NEON_FMA;
584 for (uint32_t n = 16; n <= 24; n += 8) {
585 for (size_t k = 1; k <= 5; k += 2) {
586 GemmMicrokernelTester()
587 .mr(4)
588 .nr(8)
589 .kr(1)
590 .sr(1)
591 .m(4)
592 .n(n)
593 .k(k)
594 .cn_stride(11)
595 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
596 }
597 }
598 }
599
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8_strided_a)600 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_a) {
601 TEST_REQUIRES_ARM_NEON_FMA;
602 for (uint32_t n = 16; n <= 24; n += 8) {
603 for (size_t k = 1; k <= 5; k += 2) {
604 GemmMicrokernelTester()
605 .mr(4)
606 .nr(8)
607 .kr(1)
608 .sr(1)
609 .m(4)
610 .n(n)
611 .k(k)
612 .a_stride(7)
613 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
614 }
615 }
616 }
617
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8_subtile)618 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_subtile) {
619 TEST_REQUIRES_ARM_NEON_FMA;
620 for (uint32_t n = 16; n <= 24; n += 8) {
621 for (size_t k = 1; k <= 5; k += 2) {
622 for (uint32_t m = 1; m <= 4; m++) {
623 GemmMicrokernelTester()
624 .mr(4)
625 .nr(8)
626 .kr(1)
627 .sr(1)
628 .m(m)
629 .n(n)
630 .k(k)
631 .iterations(1)
632 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
633 }
634 }
635 }
636 }
637
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,strided_cm_subtile)638 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm_subtile) {
639 TEST_REQUIRES_ARM_NEON_FMA;
640 for (size_t k = 1; k <= 5; k += 2) {
641 for (uint32_t n = 1; n <= 8; n++) {
642 for (uint32_t m = 1; m <= 4; m++) {
643 GemmMicrokernelTester()
644 .mr(4)
645 .nr(8)
646 .kr(1)
647 .sr(1)
648 .m(m)
649 .n(n)
650 .k(k)
651 .cm_stride(11)
652 .iterations(1)
653 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
654 }
655 }
656 }
657 }
658
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,qmin)659 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmin) {
660 TEST_REQUIRES_ARM_NEON_FMA;
661 GemmMicrokernelTester()
662 .mr(4)
663 .nr(8)
664 .kr(1)
665 .sr(1)
666 .m(4)
667 .n(8)
668 .k(1)
669 .qmin(128)
670 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
671 }
672
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,qmax)673 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmax) {
674 TEST_REQUIRES_ARM_NEON_FMA;
675 GemmMicrokernelTester()
676 .mr(4)
677 .nr(8)
678 .kr(1)
679 .sr(1)
680 .m(4)
681 .n(8)
682 .k(1)
683 .qmax(128)
684 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
685 }
686
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,strided_cm)687 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm) {
688 TEST_REQUIRES_ARM_NEON_FMA;
689 GemmMicrokernelTester()
690 .mr(4)
691 .nr(8)
692 .kr(1)
693 .sr(1)
694 .m(4)
695 .n(8)
696 .k(1)
697 .cm_stride(11)
698 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
699 }
700 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
701
702
703 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1)704 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1) {
705 TEST_REQUIRES_ARM_NEON;
706 GemmMicrokernelTester()
707 .mr(8)
708 .nr(8)
709 .kr(1)
710 .sr(1)
711 .m(8)
712 .n(8)
713 .k(1)
714 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
715 }
716
TEST(F32_PPMM_MINMAX_8X8__NEON,strided_cn)717 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cn) {
718 TEST_REQUIRES_ARM_NEON;
719 GemmMicrokernelTester()
720 .mr(8)
721 .nr(8)
722 .kr(1)
723 .sr(1)
724 .m(8)
725 .n(8)
726 .k(1)
727 .cn_stride(11)
728 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
729 }
730
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_strided_a)731 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_strided_a) {
732 TEST_REQUIRES_ARM_NEON;
733 GemmMicrokernelTester()
734 .mr(8)
735 .nr(8)
736 .kr(1)
737 .sr(1)
738 .m(8)
739 .n(8)
740 .k(1)
741 .a_stride(3)
742 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
743 }
744
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_subtile)745 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile) {
746 TEST_REQUIRES_ARM_NEON;
747 for (uint32_t n = 1; n <= 8; n++) {
748 for (uint32_t m = 1; m <= 8; m++) {
749 GemmMicrokernelTester()
750 .mr(8)
751 .nr(8)
752 .kr(1)
753 .sr(1)
754 .m(m)
755 .n(n)
756 .k(1)
757 .iterations(1)
758 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
759 }
760 }
761 }
762
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_subtile_m)763 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_m) {
764 TEST_REQUIRES_ARM_NEON;
765 for (uint32_t m = 1; m <= 8; m++) {
766 GemmMicrokernelTester()
767 .mr(8)
768 .nr(8)
769 .kr(1)
770 .sr(1)
771 .m(m)
772 .n(8)
773 .k(1)
774 .iterations(1)
775 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
776 }
777 }
778
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_subtile_n)779 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_n) {
780 TEST_REQUIRES_ARM_NEON;
781 for (uint32_t n = 1; n <= 8; n++) {
782 GemmMicrokernelTester()
783 .mr(8)
784 .nr(8)
785 .kr(1)
786 .sr(1)
787 .m(8)
788 .n(n)
789 .k(1)
790 .iterations(1)
791 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
792 }
793 }
794
TEST(F32_PPMM_MINMAX_8X8__NEON,k_gt_1)795 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1) {
796 TEST_REQUIRES_ARM_NEON;
797 for (size_t k = 2; k < 10; k++) {
798 GemmMicrokernelTester()
799 .mr(8)
800 .nr(8)
801 .kr(1)
802 .sr(1)
803 .m(8)
804 .n(8)
805 .k(k)
806 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
807 }
808 }
809
TEST(F32_PPMM_MINMAX_8X8__NEON,k_gt_1_subtile)810 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1_subtile) {
811 TEST_REQUIRES_ARM_NEON;
812 for (size_t k = 2; k < 10; k++) {
813 for (uint32_t n = 1; n <= 8; n++) {
814 for (uint32_t m = 1; m <= 8; m++) {
815 GemmMicrokernelTester()
816 .mr(8)
817 .nr(8)
818 .kr(1)
819 .sr(1)
820 .m(m)
821 .n(n)
822 .k(k)
823 .iterations(1)
824 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
825 }
826 }
827 }
828 }
829
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8)830 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8) {
831 TEST_REQUIRES_ARM_NEON;
832 for (uint32_t n = 9; n < 16; n++) {
833 for (size_t k = 1; k <= 5; k += 2) {
834 GemmMicrokernelTester()
835 .mr(8)
836 .nr(8)
837 .kr(1)
838 .sr(1)
839 .m(8)
840 .n(n)
841 .k(k)
842 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
843 }
844 }
845 }
846
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8_strided_cn)847 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_cn) {
848 TEST_REQUIRES_ARM_NEON;
849 for (uint32_t n = 9; n < 16; n++) {
850 for (size_t k = 1; k <= 5; k += 2) {
851 GemmMicrokernelTester()
852 .mr(8)
853 .nr(8)
854 .kr(1)
855 .sr(1)
856 .m(8)
857 .n(n)
858 .k(k)
859 .cn_stride(11)
860 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
861 }
862 }
863 }
864
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8_strided_a)865 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_a) {
866 TEST_REQUIRES_ARM_NEON;
867 for (uint32_t n = 9; n < 16; n++) {
868 for (size_t k = 1; k <= 5; k += 2) {
869 GemmMicrokernelTester()
870 .mr(8)
871 .nr(8)
872 .kr(1)
873 .sr(1)
874 .m(8)
875 .n(n)
876 .k(k)
877 .a_stride(7)
878 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
879 }
880 }
881 }
882
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8_subtile)883 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_subtile) {
884 TEST_REQUIRES_ARM_NEON;
885 for (uint32_t n = 9; n < 16; n++) {
886 for (size_t k = 1; k <= 5; k += 2) {
887 for (uint32_t m = 1; m <= 8; m++) {
888 GemmMicrokernelTester()
889 .mr(8)
890 .nr(8)
891 .kr(1)
892 .sr(1)
893 .m(m)
894 .n(n)
895 .k(k)
896 .iterations(1)
897 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
898 }
899 }
900 }
901 }
902
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8)903 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8) {
904 TEST_REQUIRES_ARM_NEON;
905 for (uint32_t n = 16; n <= 24; n += 8) {
906 for (size_t k = 1; k <= 5; k += 2) {
907 GemmMicrokernelTester()
908 .mr(8)
909 .nr(8)
910 .kr(1)
911 .sr(1)
912 .m(8)
913 .n(n)
914 .k(k)
915 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
916 }
917 }
918 }
919
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8_strided_cn)920 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_cn) {
921 TEST_REQUIRES_ARM_NEON;
922 for (uint32_t n = 16; n <= 24; n += 8) {
923 for (size_t k = 1; k <= 5; k += 2) {
924 GemmMicrokernelTester()
925 .mr(8)
926 .nr(8)
927 .kr(1)
928 .sr(1)
929 .m(8)
930 .n(n)
931 .k(k)
932 .cn_stride(11)
933 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
934 }
935 }
936 }
937
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8_strided_a)938 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_a) {
939 TEST_REQUIRES_ARM_NEON;
940 for (uint32_t n = 16; n <= 24; n += 8) {
941 for (size_t k = 1; k <= 5; k += 2) {
942 GemmMicrokernelTester()
943 .mr(8)
944 .nr(8)
945 .kr(1)
946 .sr(1)
947 .m(8)
948 .n(n)
949 .k(k)
950 .a_stride(7)
951 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
952 }
953 }
954 }
955
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8_subtile)956 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_subtile) {
957 TEST_REQUIRES_ARM_NEON;
958 for (uint32_t n = 16; n <= 24; n += 8) {
959 for (size_t k = 1; k <= 5; k += 2) {
960 for (uint32_t m = 1; m <= 8; m++) {
961 GemmMicrokernelTester()
962 .mr(8)
963 .nr(8)
964 .kr(1)
965 .sr(1)
966 .m(m)
967 .n(n)
968 .k(k)
969 .iterations(1)
970 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
971 }
972 }
973 }
974 }
975
TEST(F32_PPMM_MINMAX_8X8__NEON,strided_cm_subtile)976 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm_subtile) {
977 TEST_REQUIRES_ARM_NEON;
978 for (size_t k = 1; k <= 5; k += 2) {
979 for (uint32_t n = 1; n <= 8; n++) {
980 for (uint32_t m = 1; m <= 8; m++) {
981 GemmMicrokernelTester()
982 .mr(8)
983 .nr(8)
984 .kr(1)
985 .sr(1)
986 .m(m)
987 .n(n)
988 .k(k)
989 .cm_stride(11)
990 .iterations(1)
991 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
992 }
993 }
994 }
995 }
996
TEST(F32_PPMM_MINMAX_8X8__NEON,qmin)997 TEST(F32_PPMM_MINMAX_8X8__NEON, qmin) {
998 TEST_REQUIRES_ARM_NEON;
999 GemmMicrokernelTester()
1000 .mr(8)
1001 .nr(8)
1002 .kr(1)
1003 .sr(1)
1004 .m(8)
1005 .n(8)
1006 .k(1)
1007 .qmin(128)
1008 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
1009 }
1010
TEST(F32_PPMM_MINMAX_8X8__NEON,qmax)1011 TEST(F32_PPMM_MINMAX_8X8__NEON, qmax) {
1012 TEST_REQUIRES_ARM_NEON;
1013 GemmMicrokernelTester()
1014 .mr(8)
1015 .nr(8)
1016 .kr(1)
1017 .sr(1)
1018 .m(8)
1019 .n(8)
1020 .k(1)
1021 .qmax(128)
1022 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
1023 }
1024
TEST(F32_PPMM_MINMAX_8X8__NEON,strided_cm)1025 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm) {
1026 TEST_REQUIRES_ARM_NEON;
1027 GemmMicrokernelTester()
1028 .mr(8)
1029 .nr(8)
1030 .kr(1)
1031 .sr(1)
1032 .m(8)
1033 .n(8)
1034 .k(1)
1035 .cm_stride(11)
1036 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
1037 }
1038 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1039
1040
1041 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1)1042 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 GemmMicrokernelTester()
1045 .mr(8)
1046 .nr(8)
1047 .kr(1)
1048 .sr(1)
1049 .m(8)
1050 .n(8)
1051 .k(1)
1052 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1053 }
1054
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,strided_cn)1055 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cn) {
1056 TEST_REQUIRES_ARM_NEON_FMA;
1057 GemmMicrokernelTester()
1058 .mr(8)
1059 .nr(8)
1060 .kr(1)
1061 .sr(1)
1062 .m(8)
1063 .n(8)
1064 .k(1)
1065 .cn_stride(11)
1066 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1067 }
1068
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_strided_a)1069 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_strided_a) {
1070 TEST_REQUIRES_ARM_NEON_FMA;
1071 GemmMicrokernelTester()
1072 .mr(8)
1073 .nr(8)
1074 .kr(1)
1075 .sr(1)
1076 .m(8)
1077 .n(8)
1078 .k(1)
1079 .a_stride(3)
1080 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1081 }
1082
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_subtile)1083 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile) {
1084 TEST_REQUIRES_ARM_NEON_FMA;
1085 for (uint32_t n = 1; n <= 8; n++) {
1086 for (uint32_t m = 1; m <= 8; m++) {
1087 GemmMicrokernelTester()
1088 .mr(8)
1089 .nr(8)
1090 .kr(1)
1091 .sr(1)
1092 .m(m)
1093 .n(n)
1094 .k(1)
1095 .iterations(1)
1096 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1097 }
1098 }
1099 }
1100
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_subtile_m)1101 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_m) {
1102 TEST_REQUIRES_ARM_NEON_FMA;
1103 for (uint32_t m = 1; m <= 8; m++) {
1104 GemmMicrokernelTester()
1105 .mr(8)
1106 .nr(8)
1107 .kr(1)
1108 .sr(1)
1109 .m(m)
1110 .n(8)
1111 .k(1)
1112 .iterations(1)
1113 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1114 }
1115 }
1116
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_subtile_n)1117 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_n) {
1118 TEST_REQUIRES_ARM_NEON_FMA;
1119 for (uint32_t n = 1; n <= 8; n++) {
1120 GemmMicrokernelTester()
1121 .mr(8)
1122 .nr(8)
1123 .kr(1)
1124 .sr(1)
1125 .m(8)
1126 .n(n)
1127 .k(1)
1128 .iterations(1)
1129 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1130 }
1131 }
1132
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_gt_1)1133 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1) {
1134 TEST_REQUIRES_ARM_NEON_FMA;
1135 for (size_t k = 2; k < 10; k++) {
1136 GemmMicrokernelTester()
1137 .mr(8)
1138 .nr(8)
1139 .kr(1)
1140 .sr(1)
1141 .m(8)
1142 .n(8)
1143 .k(k)
1144 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1145 }
1146 }
1147
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_gt_1_subtile)1148 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1_subtile) {
1149 TEST_REQUIRES_ARM_NEON_FMA;
1150 for (size_t k = 2; k < 10; k++) {
1151 for (uint32_t n = 1; n <= 8; n++) {
1152 for (uint32_t m = 1; m <= 8; m++) {
1153 GemmMicrokernelTester()
1154 .mr(8)
1155 .nr(8)
1156 .kr(1)
1157 .sr(1)
1158 .m(m)
1159 .n(n)
1160 .k(k)
1161 .iterations(1)
1162 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1163 }
1164 }
1165 }
1166 }
1167
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8)1168 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8) {
1169 TEST_REQUIRES_ARM_NEON_FMA;
1170 for (uint32_t n = 9; n < 16; n++) {
1171 for (size_t k = 1; k <= 5; k += 2) {
1172 GemmMicrokernelTester()
1173 .mr(8)
1174 .nr(8)
1175 .kr(1)
1176 .sr(1)
1177 .m(8)
1178 .n(n)
1179 .k(k)
1180 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1181 }
1182 }
1183 }
1184
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8_strided_cn)1185 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_cn) {
1186 TEST_REQUIRES_ARM_NEON_FMA;
1187 for (uint32_t n = 9; n < 16; n++) {
1188 for (size_t k = 1; k <= 5; k += 2) {
1189 GemmMicrokernelTester()
1190 .mr(8)
1191 .nr(8)
1192 .kr(1)
1193 .sr(1)
1194 .m(8)
1195 .n(n)
1196 .k(k)
1197 .cn_stride(11)
1198 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1199 }
1200 }
1201 }
1202
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8_strided_a)1203 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_a) {
1204 TEST_REQUIRES_ARM_NEON_FMA;
1205 for (uint32_t n = 9; n < 16; n++) {
1206 for (size_t k = 1; k <= 5; k += 2) {
1207 GemmMicrokernelTester()
1208 .mr(8)
1209 .nr(8)
1210 .kr(1)
1211 .sr(1)
1212 .m(8)
1213 .n(n)
1214 .k(k)
1215 .a_stride(7)
1216 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1217 }
1218 }
1219 }
1220
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8_subtile)1221 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_subtile) {
1222 TEST_REQUIRES_ARM_NEON_FMA;
1223 for (uint32_t n = 9; n < 16; n++) {
1224 for (size_t k = 1; k <= 5; k += 2) {
1225 for (uint32_t m = 1; m <= 8; m++) {
1226 GemmMicrokernelTester()
1227 .mr(8)
1228 .nr(8)
1229 .kr(1)
1230 .sr(1)
1231 .m(m)
1232 .n(n)
1233 .k(k)
1234 .iterations(1)
1235 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1236 }
1237 }
1238 }
1239 }
1240
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8)1241 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8) {
1242 TEST_REQUIRES_ARM_NEON_FMA;
1243 for (uint32_t n = 16; n <= 24; n += 8) {
1244 for (size_t k = 1; k <= 5; k += 2) {
1245 GemmMicrokernelTester()
1246 .mr(8)
1247 .nr(8)
1248 .kr(1)
1249 .sr(1)
1250 .m(8)
1251 .n(n)
1252 .k(k)
1253 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1254 }
1255 }
1256 }
1257
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8_strided_cn)1258 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_cn) {
1259 TEST_REQUIRES_ARM_NEON_FMA;
1260 for (uint32_t n = 16; n <= 24; n += 8) {
1261 for (size_t k = 1; k <= 5; k += 2) {
1262 GemmMicrokernelTester()
1263 .mr(8)
1264 .nr(8)
1265 .kr(1)
1266 .sr(1)
1267 .m(8)
1268 .n(n)
1269 .k(k)
1270 .cn_stride(11)
1271 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1272 }
1273 }
1274 }
1275
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8_strided_a)1276 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_a) {
1277 TEST_REQUIRES_ARM_NEON_FMA;
1278 for (uint32_t n = 16; n <= 24; n += 8) {
1279 for (size_t k = 1; k <= 5; k += 2) {
1280 GemmMicrokernelTester()
1281 .mr(8)
1282 .nr(8)
1283 .kr(1)
1284 .sr(1)
1285 .m(8)
1286 .n(n)
1287 .k(k)
1288 .a_stride(7)
1289 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1290 }
1291 }
1292 }
1293
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8_subtile)1294 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_subtile) {
1295 TEST_REQUIRES_ARM_NEON_FMA;
1296 for (uint32_t n = 16; n <= 24; n += 8) {
1297 for (size_t k = 1; k <= 5; k += 2) {
1298 for (uint32_t m = 1; m <= 8; m++) {
1299 GemmMicrokernelTester()
1300 .mr(8)
1301 .nr(8)
1302 .kr(1)
1303 .sr(1)
1304 .m(m)
1305 .n(n)
1306 .k(k)
1307 .iterations(1)
1308 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1309 }
1310 }
1311 }
1312 }
1313
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,strided_cm_subtile)1314 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm_subtile) {
1315 TEST_REQUIRES_ARM_NEON_FMA;
1316 for (size_t k = 1; k <= 5; k += 2) {
1317 for (uint32_t n = 1; n <= 8; n++) {
1318 for (uint32_t m = 1; m <= 8; m++) {
1319 GemmMicrokernelTester()
1320 .mr(8)
1321 .nr(8)
1322 .kr(1)
1323 .sr(1)
1324 .m(m)
1325 .n(n)
1326 .k(k)
1327 .cm_stride(11)
1328 .iterations(1)
1329 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1330 }
1331 }
1332 }
1333 }
1334
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,qmin)1335 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmin) {
1336 TEST_REQUIRES_ARM_NEON_FMA;
1337 GemmMicrokernelTester()
1338 .mr(8)
1339 .nr(8)
1340 .kr(1)
1341 .sr(1)
1342 .m(8)
1343 .n(8)
1344 .k(1)
1345 .qmin(128)
1346 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1347 }
1348
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,qmax)1349 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmax) {
1350 TEST_REQUIRES_ARM_NEON_FMA;
1351 GemmMicrokernelTester()
1352 .mr(8)
1353 .nr(8)
1354 .kr(1)
1355 .sr(1)
1356 .m(8)
1357 .n(8)
1358 .k(1)
1359 .qmax(128)
1360 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1361 }
1362
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,strided_cm)1363 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm) {
1364 TEST_REQUIRES_ARM_NEON_FMA;
1365 GemmMicrokernelTester()
1366 .mr(8)
1367 .nr(8)
1368 .kr(1)
1369 .sr(1)
1370 .m(8)
1371 .n(8)
1372 .k(1)
1373 .cm_stride(11)
1374 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
1375 }
1376 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1377
1378
1379 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1)1380 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1) {
1381 TEST_REQUIRES_X86_SSE;
1382 GemmMicrokernelTester()
1383 .mr(4)
1384 .nr(8)
1385 .kr(1)
1386 .sr(1)
1387 .m(4)
1388 .n(8)
1389 .k(1)
1390 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1391 }
1392
TEST(F32_PPMM_MINMAX_4X8__SSE,strided_cn)1393 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cn) {
1394 TEST_REQUIRES_X86_SSE;
1395 GemmMicrokernelTester()
1396 .mr(4)
1397 .nr(8)
1398 .kr(1)
1399 .sr(1)
1400 .m(4)
1401 .n(8)
1402 .k(1)
1403 .cn_stride(11)
1404 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1405 }
1406
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_strided_a)1407 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_strided_a) {
1408 TEST_REQUIRES_X86_SSE;
1409 GemmMicrokernelTester()
1410 .mr(4)
1411 .nr(8)
1412 .kr(1)
1413 .sr(1)
1414 .m(4)
1415 .n(8)
1416 .k(1)
1417 .a_stride(3)
1418 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1419 }
1420
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_subtile)1421 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile) {
1422 TEST_REQUIRES_X86_SSE;
1423 for (uint32_t n = 1; n <= 8; n++) {
1424 for (uint32_t m = 1; m <= 4; m++) {
1425 GemmMicrokernelTester()
1426 .mr(4)
1427 .nr(8)
1428 .kr(1)
1429 .sr(1)
1430 .m(m)
1431 .n(n)
1432 .k(1)
1433 .iterations(1)
1434 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1435 }
1436 }
1437 }
1438
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_subtile_m)1439 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_m) {
1440 TEST_REQUIRES_X86_SSE;
1441 for (uint32_t m = 1; m <= 4; m++) {
1442 GemmMicrokernelTester()
1443 .mr(4)
1444 .nr(8)
1445 .kr(1)
1446 .sr(1)
1447 .m(m)
1448 .n(8)
1449 .k(1)
1450 .iterations(1)
1451 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1452 }
1453 }
1454
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_subtile_n)1455 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_n) {
1456 TEST_REQUIRES_X86_SSE;
1457 for (uint32_t n = 1; n <= 8; n++) {
1458 GemmMicrokernelTester()
1459 .mr(4)
1460 .nr(8)
1461 .kr(1)
1462 .sr(1)
1463 .m(4)
1464 .n(n)
1465 .k(1)
1466 .iterations(1)
1467 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1468 }
1469 }
1470
TEST(F32_PPMM_MINMAX_4X8__SSE,k_gt_1)1471 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1) {
1472 TEST_REQUIRES_X86_SSE;
1473 for (size_t k = 2; k < 10; k++) {
1474 GemmMicrokernelTester()
1475 .mr(4)
1476 .nr(8)
1477 .kr(1)
1478 .sr(1)
1479 .m(4)
1480 .n(8)
1481 .k(k)
1482 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1483 }
1484 }
1485
TEST(F32_PPMM_MINMAX_4X8__SSE,k_gt_1_subtile)1486 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1_subtile) {
1487 TEST_REQUIRES_X86_SSE;
1488 for (size_t k = 2; k < 10; k++) {
1489 for (uint32_t n = 1; n <= 8; n++) {
1490 for (uint32_t m = 1; m <= 4; m++) {
1491 GemmMicrokernelTester()
1492 .mr(4)
1493 .nr(8)
1494 .kr(1)
1495 .sr(1)
1496 .m(m)
1497 .n(n)
1498 .k(k)
1499 .iterations(1)
1500 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1501 }
1502 }
1503 }
1504 }
1505
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8)1506 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8) {
1507 TEST_REQUIRES_X86_SSE;
1508 for (uint32_t n = 9; n < 16; n++) {
1509 for (size_t k = 1; k <= 5; k += 2) {
1510 GemmMicrokernelTester()
1511 .mr(4)
1512 .nr(8)
1513 .kr(1)
1514 .sr(1)
1515 .m(4)
1516 .n(n)
1517 .k(k)
1518 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1519 }
1520 }
1521 }
1522
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8_strided_cn)1523 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_cn) {
1524 TEST_REQUIRES_X86_SSE;
1525 for (uint32_t n = 9; n < 16; n++) {
1526 for (size_t k = 1; k <= 5; k += 2) {
1527 GemmMicrokernelTester()
1528 .mr(4)
1529 .nr(8)
1530 .kr(1)
1531 .sr(1)
1532 .m(4)
1533 .n(n)
1534 .k(k)
1535 .cn_stride(11)
1536 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1537 }
1538 }
1539 }
1540
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8_strided_a)1541 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_a) {
1542 TEST_REQUIRES_X86_SSE;
1543 for (uint32_t n = 9; n < 16; n++) {
1544 for (size_t k = 1; k <= 5; k += 2) {
1545 GemmMicrokernelTester()
1546 .mr(4)
1547 .nr(8)
1548 .kr(1)
1549 .sr(1)
1550 .m(4)
1551 .n(n)
1552 .k(k)
1553 .a_stride(7)
1554 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1555 }
1556 }
1557 }
1558
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8_subtile)1559 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_subtile) {
1560 TEST_REQUIRES_X86_SSE;
1561 for (uint32_t n = 9; n < 16; n++) {
1562 for (size_t k = 1; k <= 5; k += 2) {
1563 for (uint32_t m = 1; m <= 4; m++) {
1564 GemmMicrokernelTester()
1565 .mr(4)
1566 .nr(8)
1567 .kr(1)
1568 .sr(1)
1569 .m(m)
1570 .n(n)
1571 .k(k)
1572 .iterations(1)
1573 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1574 }
1575 }
1576 }
1577 }
1578
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8)1579 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8) {
1580 TEST_REQUIRES_X86_SSE;
1581 for (uint32_t n = 16; n <= 24; n += 8) {
1582 for (size_t k = 1; k <= 5; k += 2) {
1583 GemmMicrokernelTester()
1584 .mr(4)
1585 .nr(8)
1586 .kr(1)
1587 .sr(1)
1588 .m(4)
1589 .n(n)
1590 .k(k)
1591 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1592 }
1593 }
1594 }
1595
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8_strided_cn)1596 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_cn) {
1597 TEST_REQUIRES_X86_SSE;
1598 for (uint32_t n = 16; n <= 24; n += 8) {
1599 for (size_t k = 1; k <= 5; k += 2) {
1600 GemmMicrokernelTester()
1601 .mr(4)
1602 .nr(8)
1603 .kr(1)
1604 .sr(1)
1605 .m(4)
1606 .n(n)
1607 .k(k)
1608 .cn_stride(11)
1609 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1610 }
1611 }
1612 }
1613
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8_strided_a)1614 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_a) {
1615 TEST_REQUIRES_X86_SSE;
1616 for (uint32_t n = 16; n <= 24; n += 8) {
1617 for (size_t k = 1; k <= 5; k += 2) {
1618 GemmMicrokernelTester()
1619 .mr(4)
1620 .nr(8)
1621 .kr(1)
1622 .sr(1)
1623 .m(4)
1624 .n(n)
1625 .k(k)
1626 .a_stride(7)
1627 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1628 }
1629 }
1630 }
1631
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8_subtile)1632 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_subtile) {
1633 TEST_REQUIRES_X86_SSE;
1634 for (uint32_t n = 16; n <= 24; n += 8) {
1635 for (size_t k = 1; k <= 5; k += 2) {
1636 for (uint32_t m = 1; m <= 4; m++) {
1637 GemmMicrokernelTester()
1638 .mr(4)
1639 .nr(8)
1640 .kr(1)
1641 .sr(1)
1642 .m(m)
1643 .n(n)
1644 .k(k)
1645 .iterations(1)
1646 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1647 }
1648 }
1649 }
1650 }
1651
TEST(F32_PPMM_MINMAX_4X8__SSE,strided_cm_subtile)1652 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm_subtile) {
1653 TEST_REQUIRES_X86_SSE;
1654 for (size_t k = 1; k <= 5; k += 2) {
1655 for (uint32_t n = 1; n <= 8; n++) {
1656 for (uint32_t m = 1; m <= 4; m++) {
1657 GemmMicrokernelTester()
1658 .mr(4)
1659 .nr(8)
1660 .kr(1)
1661 .sr(1)
1662 .m(m)
1663 .n(n)
1664 .k(k)
1665 .cm_stride(11)
1666 .iterations(1)
1667 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1668 }
1669 }
1670 }
1671 }
1672
TEST(F32_PPMM_MINMAX_4X8__SSE,qmin)1673 TEST(F32_PPMM_MINMAX_4X8__SSE, qmin) {
1674 TEST_REQUIRES_X86_SSE;
1675 GemmMicrokernelTester()
1676 .mr(4)
1677 .nr(8)
1678 .kr(1)
1679 .sr(1)
1680 .m(4)
1681 .n(8)
1682 .k(1)
1683 .qmin(128)
1684 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1685 }
1686
TEST(F32_PPMM_MINMAX_4X8__SSE,qmax)1687 TEST(F32_PPMM_MINMAX_4X8__SSE, qmax) {
1688 TEST_REQUIRES_X86_SSE;
1689 GemmMicrokernelTester()
1690 .mr(4)
1691 .nr(8)
1692 .kr(1)
1693 .sr(1)
1694 .m(4)
1695 .n(8)
1696 .k(1)
1697 .qmax(128)
1698 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1699 }
1700
TEST(F32_PPMM_MINMAX_4X8__SSE,strided_cm)1701 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm) {
1702 TEST_REQUIRES_X86_SSE;
1703 GemmMicrokernelTester()
1704 .mr(4)
1705 .nr(8)
1706 .kr(1)
1707 .sr(1)
1708 .m(4)
1709 .n(8)
1710 .k(1)
1711 .cm_stride(11)
1712 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
1713 }
1714 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1715
1716
1717 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1)1718 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1) {
1719 GemmMicrokernelTester()
1720 .mr(4)
1721 .nr(8)
1722 .kr(1)
1723 .sr(1)
1724 .m(4)
1725 .n(8)
1726 .k(1)
1727 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1728 }
1729
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cn)1730 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
1731 GemmMicrokernelTester()
1732 .mr(4)
1733 .nr(8)
1734 .kr(1)
1735 .sr(1)
1736 .m(4)
1737 .n(8)
1738 .k(1)
1739 .cn_stride(11)
1740 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1741 }
1742
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_strided_a)1743 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_strided_a) {
1744 GemmMicrokernelTester()
1745 .mr(4)
1746 .nr(8)
1747 .kr(1)
1748 .sr(1)
1749 .m(4)
1750 .n(8)
1751 .k(1)
1752 .a_stride(3)
1753 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1754 }
1755
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_subtile)1756 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile) {
1757 for (uint32_t n = 1; n <= 8; n++) {
1758 for (uint32_t m = 1; m <= 4; m++) {
1759 GemmMicrokernelTester()
1760 .mr(4)
1761 .nr(8)
1762 .kr(1)
1763 .sr(1)
1764 .m(m)
1765 .n(n)
1766 .k(1)
1767 .iterations(1)
1768 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1769 }
1770 }
1771 }
1772
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_subtile_m)1773 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_m) {
1774 for (uint32_t m = 1; m <= 4; m++) {
1775 GemmMicrokernelTester()
1776 .mr(4)
1777 .nr(8)
1778 .kr(1)
1779 .sr(1)
1780 .m(m)
1781 .n(8)
1782 .k(1)
1783 .iterations(1)
1784 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1785 }
1786 }
1787
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_subtile_n)1788 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_n) {
1789 for (uint32_t n = 1; n <= 8; n++) {
1790 GemmMicrokernelTester()
1791 .mr(4)
1792 .nr(8)
1793 .kr(1)
1794 .sr(1)
1795 .m(4)
1796 .n(n)
1797 .k(1)
1798 .iterations(1)
1799 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1800 }
1801 }
1802
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_1)1803 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1) {
1804 for (size_t k = 2; k < 10; k++) {
1805 GemmMicrokernelTester()
1806 .mr(4)
1807 .nr(8)
1808 .kr(1)
1809 .sr(1)
1810 .m(4)
1811 .n(8)
1812 .k(k)
1813 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1814 }
1815 }
1816
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_1_subtile)1817 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1_subtile) {
1818 for (size_t k = 2; k < 10; k++) {
1819 for (uint32_t n = 1; n <= 8; n++) {
1820 for (uint32_t m = 1; m <= 4; m++) {
1821 GemmMicrokernelTester()
1822 .mr(4)
1823 .nr(8)
1824 .kr(1)
1825 .sr(1)
1826 .m(m)
1827 .n(n)
1828 .k(k)
1829 .iterations(1)
1830 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1831 }
1832 }
1833 }
1834 }
1835
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8)1836 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
1837 for (uint32_t n = 9; n < 16; n++) {
1838 for (size_t k = 1; k <= 5; k += 2) {
1839 GemmMicrokernelTester()
1840 .mr(4)
1841 .nr(8)
1842 .kr(1)
1843 .sr(1)
1844 .m(4)
1845 .n(n)
1846 .k(k)
1847 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1848 }
1849 }
1850 }
1851
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)1852 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
1853 for (uint32_t n = 9; n < 16; n++) {
1854 for (size_t k = 1; k <= 5; k += 2) {
1855 GemmMicrokernelTester()
1856 .mr(4)
1857 .nr(8)
1858 .kr(1)
1859 .sr(1)
1860 .m(4)
1861 .n(n)
1862 .k(k)
1863 .cn_stride(11)
1864 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1865 }
1866 }
1867 }
1868
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_a)1869 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
1870 for (uint32_t n = 9; n < 16; n++) {
1871 for (size_t k = 1; k <= 5; k += 2) {
1872 GemmMicrokernelTester()
1873 .mr(4)
1874 .nr(8)
1875 .kr(1)
1876 .sr(1)
1877 .m(4)
1878 .n(n)
1879 .k(k)
1880 .a_stride(7)
1881 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1882 }
1883 }
1884 }
1885
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)1886 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
1887 for (uint32_t n = 9; n < 16; n++) {
1888 for (size_t k = 1; k <= 5; k += 2) {
1889 for (uint32_t m = 1; m <= 4; m++) {
1890 GemmMicrokernelTester()
1891 .mr(4)
1892 .nr(8)
1893 .kr(1)
1894 .sr(1)
1895 .m(m)
1896 .n(n)
1897 .k(k)
1898 .iterations(1)
1899 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1900 }
1901 }
1902 }
1903 }
1904
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8)1905 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
1906 for (uint32_t n = 16; n <= 24; n += 8) {
1907 for (size_t k = 1; k <= 5; k += 2) {
1908 GemmMicrokernelTester()
1909 .mr(4)
1910 .nr(8)
1911 .kr(1)
1912 .sr(1)
1913 .m(4)
1914 .n(n)
1915 .k(k)
1916 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1917 }
1918 }
1919 }
1920
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)1921 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
1922 for (uint32_t n = 16; n <= 24; n += 8) {
1923 for (size_t k = 1; k <= 5; k += 2) {
1924 GemmMicrokernelTester()
1925 .mr(4)
1926 .nr(8)
1927 .kr(1)
1928 .sr(1)
1929 .m(4)
1930 .n(n)
1931 .k(k)
1932 .cn_stride(11)
1933 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1934 }
1935 }
1936 }
1937
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_a)1938 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
1939 for (uint32_t n = 16; n <= 24; n += 8) {
1940 for (size_t k = 1; k <= 5; k += 2) {
1941 GemmMicrokernelTester()
1942 .mr(4)
1943 .nr(8)
1944 .kr(1)
1945 .sr(1)
1946 .m(4)
1947 .n(n)
1948 .k(k)
1949 .a_stride(7)
1950 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1951 }
1952 }
1953 }
1954
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)1955 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
1956 for (uint32_t n = 16; n <= 24; n += 8) {
1957 for (size_t k = 1; k <= 5; k += 2) {
1958 for (uint32_t m = 1; m <= 4; m++) {
1959 GemmMicrokernelTester()
1960 .mr(4)
1961 .nr(8)
1962 .kr(1)
1963 .sr(1)
1964 .m(m)
1965 .n(n)
1966 .k(k)
1967 .iterations(1)
1968 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1969 }
1970 }
1971 }
1972 }
1973
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)1974 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
1975 for (size_t k = 1; k <= 5; k += 2) {
1976 for (uint32_t n = 1; n <= 8; n++) {
1977 for (uint32_t m = 1; m <= 4; m++) {
1978 GemmMicrokernelTester()
1979 .mr(4)
1980 .nr(8)
1981 .kr(1)
1982 .sr(1)
1983 .m(m)
1984 .n(n)
1985 .k(k)
1986 .cm_stride(11)
1987 .iterations(1)
1988 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
1989 }
1990 }
1991 }
1992 }
1993
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmin)1994 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
1995 GemmMicrokernelTester()
1996 .mr(4)
1997 .nr(8)
1998 .kr(1)
1999 .sr(1)
2000 .m(4)
2001 .n(8)
2002 .k(1)
2003 .qmin(128)
2004 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
2005 }
2006
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmax)2007 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
2008 GemmMicrokernelTester()
2009 .mr(4)
2010 .nr(8)
2011 .kr(1)
2012 .sr(1)
2013 .m(4)
2014 .n(8)
2015 .k(1)
2016 .qmax(128)
2017 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
2018 }
2019
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm)2020 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
2021 GemmMicrokernelTester()
2022 .mr(4)
2023 .nr(8)
2024 .kr(1)
2025 .sr(1)
2026 .m(4)
2027 .n(8)
2028 .k(1)
2029 .cm_stride(11)
2030 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
2031 }
2032 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2033
2034
2035 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1)2036 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1) {
2037 GemmMicrokernelTester()
2038 .mr(4)
2039 .nr(8)
2040 .kr(1)
2041 .sr(1)
2042 .m(4)
2043 .n(8)
2044 .k(1)
2045 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2046 }
2047
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cn)2048 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
2049 GemmMicrokernelTester()
2050 .mr(4)
2051 .nr(8)
2052 .kr(1)
2053 .sr(1)
2054 .m(4)
2055 .n(8)
2056 .k(1)
2057 .cn_stride(11)
2058 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2059 }
2060
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_strided_a)2061 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_strided_a) {
2062 GemmMicrokernelTester()
2063 .mr(4)
2064 .nr(8)
2065 .kr(1)
2066 .sr(1)
2067 .m(4)
2068 .n(8)
2069 .k(1)
2070 .a_stride(3)
2071 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2072 }
2073
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_subtile)2074 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile) {
2075 for (uint32_t n = 1; n <= 8; n++) {
2076 for (uint32_t m = 1; m <= 4; m++) {
2077 GemmMicrokernelTester()
2078 .mr(4)
2079 .nr(8)
2080 .kr(1)
2081 .sr(1)
2082 .m(m)
2083 .n(n)
2084 .k(1)
2085 .iterations(1)
2086 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2087 }
2088 }
2089 }
2090
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_subtile_m)2091 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_m) {
2092 for (uint32_t m = 1; m <= 4; m++) {
2093 GemmMicrokernelTester()
2094 .mr(4)
2095 .nr(8)
2096 .kr(1)
2097 .sr(1)
2098 .m(m)
2099 .n(8)
2100 .k(1)
2101 .iterations(1)
2102 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2103 }
2104 }
2105
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_subtile_n)2106 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_n) {
2107 for (uint32_t n = 1; n <= 8; n++) {
2108 GemmMicrokernelTester()
2109 .mr(4)
2110 .nr(8)
2111 .kr(1)
2112 .sr(1)
2113 .m(4)
2114 .n(n)
2115 .k(1)
2116 .iterations(1)
2117 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2118 }
2119 }
2120
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_1)2121 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1) {
2122 for (size_t k = 2; k < 10; k++) {
2123 GemmMicrokernelTester()
2124 .mr(4)
2125 .nr(8)
2126 .kr(1)
2127 .sr(1)
2128 .m(4)
2129 .n(8)
2130 .k(k)
2131 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2132 }
2133 }
2134
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_1_subtile)2135 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1_subtile) {
2136 for (size_t k = 2; k < 10; k++) {
2137 for (uint32_t n = 1; n <= 8; n++) {
2138 for (uint32_t m = 1; m <= 4; m++) {
2139 GemmMicrokernelTester()
2140 .mr(4)
2141 .nr(8)
2142 .kr(1)
2143 .sr(1)
2144 .m(m)
2145 .n(n)
2146 .k(k)
2147 .iterations(1)
2148 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2149 }
2150 }
2151 }
2152 }
2153
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8)2154 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
2155 for (uint32_t n = 9; n < 16; n++) {
2156 for (size_t k = 1; k <= 5; k += 2) {
2157 GemmMicrokernelTester()
2158 .mr(4)
2159 .nr(8)
2160 .kr(1)
2161 .sr(1)
2162 .m(4)
2163 .n(n)
2164 .k(k)
2165 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2166 }
2167 }
2168 }
2169
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)2170 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
2171 for (uint32_t n = 9; n < 16; n++) {
2172 for (size_t k = 1; k <= 5; k += 2) {
2173 GemmMicrokernelTester()
2174 .mr(4)
2175 .nr(8)
2176 .kr(1)
2177 .sr(1)
2178 .m(4)
2179 .n(n)
2180 .k(k)
2181 .cn_stride(11)
2182 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2183 }
2184 }
2185 }
2186
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_a)2187 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
2188 for (uint32_t n = 9; n < 16; n++) {
2189 for (size_t k = 1; k <= 5; k += 2) {
2190 GemmMicrokernelTester()
2191 .mr(4)
2192 .nr(8)
2193 .kr(1)
2194 .sr(1)
2195 .m(4)
2196 .n(n)
2197 .k(k)
2198 .a_stride(7)
2199 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2200 }
2201 }
2202 }
2203
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)2204 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
2205 for (uint32_t n = 9; n < 16; n++) {
2206 for (size_t k = 1; k <= 5; k += 2) {
2207 for (uint32_t m = 1; m <= 4; m++) {
2208 GemmMicrokernelTester()
2209 .mr(4)
2210 .nr(8)
2211 .kr(1)
2212 .sr(1)
2213 .m(m)
2214 .n(n)
2215 .k(k)
2216 .iterations(1)
2217 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2218 }
2219 }
2220 }
2221 }
2222
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8)2223 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
2224 for (uint32_t n = 16; n <= 24; n += 8) {
2225 for (size_t k = 1; k <= 5; k += 2) {
2226 GemmMicrokernelTester()
2227 .mr(4)
2228 .nr(8)
2229 .kr(1)
2230 .sr(1)
2231 .m(4)
2232 .n(n)
2233 .k(k)
2234 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2235 }
2236 }
2237 }
2238
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)2239 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
2240 for (uint32_t n = 16; n <= 24; n += 8) {
2241 for (size_t k = 1; k <= 5; k += 2) {
2242 GemmMicrokernelTester()
2243 .mr(4)
2244 .nr(8)
2245 .kr(1)
2246 .sr(1)
2247 .m(4)
2248 .n(n)
2249 .k(k)
2250 .cn_stride(11)
2251 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2252 }
2253 }
2254 }
2255
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_strided_a)2256 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
2257 for (uint32_t n = 16; n <= 24; n += 8) {
2258 for (size_t k = 1; k <= 5; k += 2) {
2259 GemmMicrokernelTester()
2260 .mr(4)
2261 .nr(8)
2262 .kr(1)
2263 .sr(1)
2264 .m(4)
2265 .n(n)
2266 .k(k)
2267 .a_stride(7)
2268 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2269 }
2270 }
2271 }
2272
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)2273 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
2274 for (uint32_t n = 16; n <= 24; n += 8) {
2275 for (size_t k = 1; k <= 5; k += 2) {
2276 for (uint32_t m = 1; m <= 4; m++) {
2277 GemmMicrokernelTester()
2278 .mr(4)
2279 .nr(8)
2280 .kr(1)
2281 .sr(1)
2282 .m(m)
2283 .n(n)
2284 .k(k)
2285 .iterations(1)
2286 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2287 }
2288 }
2289 }
2290 }
2291
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)2292 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
2293 for (size_t k = 1; k <= 5; k += 2) {
2294 for (uint32_t n = 1; n <= 8; n++) {
2295 for (uint32_t m = 1; m <= 4; m++) {
2296 GemmMicrokernelTester()
2297 .mr(4)
2298 .nr(8)
2299 .kr(1)
2300 .sr(1)
2301 .m(m)
2302 .n(n)
2303 .k(k)
2304 .cm_stride(11)
2305 .iterations(1)
2306 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2307 }
2308 }
2309 }
2310 }
2311
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmin)2312 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
2313 GemmMicrokernelTester()
2314 .mr(4)
2315 .nr(8)
2316 .kr(1)
2317 .sr(1)
2318 .m(4)
2319 .n(8)
2320 .k(1)
2321 .qmin(128)
2322 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2323 }
2324
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmax)2325 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
2326 GemmMicrokernelTester()
2327 .mr(4)
2328 .nr(8)
2329 .kr(1)
2330 .sr(1)
2331 .m(4)
2332 .n(8)
2333 .k(1)
2334 .qmax(128)
2335 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2336 }
2337
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm)2338 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
2339 GemmMicrokernelTester()
2340 .mr(4)
2341 .nr(8)
2342 .kr(1)
2343 .sr(1)
2344 .m(4)
2345 .n(8)
2346 .k(1)
2347 .cm_stride(11)
2348 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
2349 }
2350 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2351
2352
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1)2353 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1) {
2354 GemmMicrokernelTester()
2355 .mr(2)
2356 .nr(4)
2357 .kr(1)
2358 .sr(1)
2359 .m(2)
2360 .n(4)
2361 .k(1)
2362 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2363 }
2364
TEST(F32_PPMM_MINMAX_2X4__SCALAR,strided_cn)2365 TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cn) {
2366 GemmMicrokernelTester()
2367 .mr(2)
2368 .nr(4)
2369 .kr(1)
2370 .sr(1)
2371 .m(2)
2372 .n(4)
2373 .k(1)
2374 .cn_stride(7)
2375 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2376 }
2377
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_strided_a)2378 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
2379 GemmMicrokernelTester()
2380 .mr(2)
2381 .nr(4)
2382 .kr(1)
2383 .sr(1)
2384 .m(2)
2385 .n(4)
2386 .k(1)
2387 .a_stride(3)
2388 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2389 }
2390
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_subtile)2391 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
2392 for (uint32_t n = 1; n <= 4; n++) {
2393 for (uint32_t m = 1; m <= 2; m++) {
2394 GemmMicrokernelTester()
2395 .mr(2)
2396 .nr(4)
2397 .kr(1)
2398 .sr(1)
2399 .m(m)
2400 .n(n)
2401 .k(1)
2402 .iterations(1)
2403 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2404 }
2405 }
2406 }
2407
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_subtile_m)2408 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
2409 for (uint32_t m = 1; m <= 2; m++) {
2410 GemmMicrokernelTester()
2411 .mr(2)
2412 .nr(4)
2413 .kr(1)
2414 .sr(1)
2415 .m(m)
2416 .n(4)
2417 .k(1)
2418 .iterations(1)
2419 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2420 }
2421 }
2422
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_subtile_n)2423 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
2424 for (uint32_t n = 1; n <= 4; n++) {
2425 GemmMicrokernelTester()
2426 .mr(2)
2427 .nr(4)
2428 .kr(1)
2429 .sr(1)
2430 .m(2)
2431 .n(n)
2432 .k(1)
2433 .iterations(1)
2434 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2435 }
2436 }
2437
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_gt_1)2438 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1) {
2439 for (size_t k = 2; k < 10; k++) {
2440 GemmMicrokernelTester()
2441 .mr(2)
2442 .nr(4)
2443 .kr(1)
2444 .sr(1)
2445 .m(2)
2446 .n(4)
2447 .k(k)
2448 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2449 }
2450 }
2451
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_gt_1_subtile)2452 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
2453 for (size_t k = 2; k < 10; k++) {
2454 for (uint32_t n = 1; n <= 4; n++) {
2455 for (uint32_t m = 1; m <= 2; m++) {
2456 GemmMicrokernelTester()
2457 .mr(2)
2458 .nr(4)
2459 .kr(1)
2460 .sr(1)
2461 .m(m)
2462 .n(n)
2463 .k(k)
2464 .iterations(1)
2465 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2466 }
2467 }
2468 }
2469 }
2470
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4)2471 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4) {
2472 for (uint32_t n = 5; n < 8; n++) {
2473 for (size_t k = 1; k <= 5; k += 2) {
2474 GemmMicrokernelTester()
2475 .mr(2)
2476 .nr(4)
2477 .kr(1)
2478 .sr(1)
2479 .m(2)
2480 .n(n)
2481 .k(k)
2482 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2483 }
2484 }
2485 }
2486
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4_strided_cn)2487 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
2488 for (uint32_t n = 5; n < 8; n++) {
2489 for (size_t k = 1; k <= 5; k += 2) {
2490 GemmMicrokernelTester()
2491 .mr(2)
2492 .nr(4)
2493 .kr(1)
2494 .sr(1)
2495 .m(2)
2496 .n(n)
2497 .k(k)
2498 .cn_stride(7)
2499 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2500 }
2501 }
2502 }
2503
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4_strided_a)2504 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
2505 for (uint32_t n = 5; n < 8; n++) {
2506 for (size_t k = 1; k <= 5; k += 2) {
2507 GemmMicrokernelTester()
2508 .mr(2)
2509 .nr(4)
2510 .kr(1)
2511 .sr(1)
2512 .m(2)
2513 .n(n)
2514 .k(k)
2515 .a_stride(7)
2516 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2517 }
2518 }
2519 }
2520
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4_subtile)2521 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
2522 for (uint32_t n = 5; n < 8; n++) {
2523 for (size_t k = 1; k <= 5; k += 2) {
2524 for (uint32_t m = 1; m <= 2; m++) {
2525 GemmMicrokernelTester()
2526 .mr(2)
2527 .nr(4)
2528 .kr(1)
2529 .sr(1)
2530 .m(m)
2531 .n(n)
2532 .k(k)
2533 .iterations(1)
2534 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2535 }
2536 }
2537 }
2538 }
2539
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4)2540 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4) {
2541 for (uint32_t n = 8; n <= 12; n += 4) {
2542 for (size_t k = 1; k <= 5; k += 2) {
2543 GemmMicrokernelTester()
2544 .mr(2)
2545 .nr(4)
2546 .kr(1)
2547 .sr(1)
2548 .m(2)
2549 .n(n)
2550 .k(k)
2551 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2552 }
2553 }
2554 }
2555
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4_strided_cn)2556 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
2557 for (uint32_t n = 8; n <= 12; n += 4) {
2558 for (size_t k = 1; k <= 5; k += 2) {
2559 GemmMicrokernelTester()
2560 .mr(2)
2561 .nr(4)
2562 .kr(1)
2563 .sr(1)
2564 .m(2)
2565 .n(n)
2566 .k(k)
2567 .cn_stride(7)
2568 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2569 }
2570 }
2571 }
2572
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4_strided_a)2573 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
2574 for (uint32_t n = 8; n <= 12; n += 4) {
2575 for (size_t k = 1; k <= 5; k += 2) {
2576 GemmMicrokernelTester()
2577 .mr(2)
2578 .nr(4)
2579 .kr(1)
2580 .sr(1)
2581 .m(2)
2582 .n(n)
2583 .k(k)
2584 .a_stride(7)
2585 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2586 }
2587 }
2588 }
2589
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4_subtile)2590 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
2591 for (uint32_t n = 8; n <= 12; n += 4) {
2592 for (size_t k = 1; k <= 5; k += 2) {
2593 for (uint32_t m = 1; m <= 2; m++) {
2594 GemmMicrokernelTester()
2595 .mr(2)
2596 .nr(4)
2597 .kr(1)
2598 .sr(1)
2599 .m(m)
2600 .n(n)
2601 .k(k)
2602 .iterations(1)
2603 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2604 }
2605 }
2606 }
2607 }
2608
TEST(F32_PPMM_MINMAX_2X4__SCALAR,strided_cm_subtile)2609 TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
2610 for (size_t k = 1; k <= 5; k += 2) {
2611 for (uint32_t n = 1; n <= 4; n++) {
2612 for (uint32_t m = 1; m <= 2; m++) {
2613 GemmMicrokernelTester()
2614 .mr(2)
2615 .nr(4)
2616 .kr(1)
2617 .sr(1)
2618 .m(m)
2619 .n(n)
2620 .k(k)
2621 .cm_stride(7)
2622 .iterations(1)
2623 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2624 }
2625 }
2626 }
2627 }
2628
TEST(F32_PPMM_MINMAX_2X4__SCALAR,qmin)2629 TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmin) {
2630 GemmMicrokernelTester()
2631 .mr(2)
2632 .nr(4)
2633 .kr(1)
2634 .sr(1)
2635 .m(2)
2636 .n(4)
2637 .k(1)
2638 .qmin(128)
2639 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2640 }
2641
TEST(F32_PPMM_MINMAX_2X4__SCALAR,qmax)2642 TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmax) {
2643 GemmMicrokernelTester()
2644 .mr(2)
2645 .nr(4)
2646 .kr(1)
2647 .sr(1)
2648 .m(2)
2649 .n(4)
2650 .k(1)
2651 .qmax(128)
2652 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2653 }
2654
TEST(F32_PPMM_MINMAX_2X4__SCALAR,strided_cm)2655 TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm) {
2656 GemmMicrokernelTester()
2657 .mr(2)
2658 .nr(4)
2659 .kr(1)
2660 .sr(1)
2661 .m(2)
2662 .n(4)
2663 .k(1)
2664 .cm_stride(7)
2665 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
2666 }
2667
2668
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1)2669 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1) {
2670 GemmMicrokernelTester()
2671 .mr(3)
2672 .nr(3)
2673 .kr(1)
2674 .sr(1)
2675 .m(3)
2676 .n(3)
2677 .k(1)
2678 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2679 }
2680
TEST(F32_PPMM_MINMAX_3X3__SCALAR,strided_cn)2681 TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cn) {
2682 GemmMicrokernelTester()
2683 .mr(3)
2684 .nr(3)
2685 .kr(1)
2686 .sr(1)
2687 .m(3)
2688 .n(3)
2689 .k(1)
2690 .cn_stride(5)
2691 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2692 }
2693
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_strided_a)2694 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_strided_a) {
2695 GemmMicrokernelTester()
2696 .mr(3)
2697 .nr(3)
2698 .kr(1)
2699 .sr(1)
2700 .m(3)
2701 .n(3)
2702 .k(1)
2703 .a_stride(3)
2704 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2705 }
2706
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_subtile)2707 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile) {
2708 for (uint32_t n = 1; n <= 3; n++) {
2709 for (uint32_t m = 1; m <= 3; m++) {
2710 GemmMicrokernelTester()
2711 .mr(3)
2712 .nr(3)
2713 .kr(1)
2714 .sr(1)
2715 .m(m)
2716 .n(n)
2717 .k(1)
2718 .iterations(1)
2719 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2720 }
2721 }
2722 }
2723
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_subtile_m)2724 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_m) {
2725 for (uint32_t m = 1; m <= 3; m++) {
2726 GemmMicrokernelTester()
2727 .mr(3)
2728 .nr(3)
2729 .kr(1)
2730 .sr(1)
2731 .m(m)
2732 .n(3)
2733 .k(1)
2734 .iterations(1)
2735 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2736 }
2737 }
2738
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_subtile_n)2739 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_n) {
2740 for (uint32_t n = 1; n <= 3; n++) {
2741 GemmMicrokernelTester()
2742 .mr(3)
2743 .nr(3)
2744 .kr(1)
2745 .sr(1)
2746 .m(3)
2747 .n(n)
2748 .k(1)
2749 .iterations(1)
2750 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2751 }
2752 }
2753
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_gt_1)2754 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1) {
2755 for (size_t k = 2; k < 10; k++) {
2756 GemmMicrokernelTester()
2757 .mr(3)
2758 .nr(3)
2759 .kr(1)
2760 .sr(1)
2761 .m(3)
2762 .n(3)
2763 .k(k)
2764 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2765 }
2766 }
2767
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_gt_1_subtile)2768 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1_subtile) {
2769 for (size_t k = 2; k < 10; k++) {
2770 for (uint32_t n = 1; n <= 3; n++) {
2771 for (uint32_t m = 1; m <= 3; m++) {
2772 GemmMicrokernelTester()
2773 .mr(3)
2774 .nr(3)
2775 .kr(1)
2776 .sr(1)
2777 .m(m)
2778 .n(n)
2779 .k(k)
2780 .iterations(1)
2781 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2782 }
2783 }
2784 }
2785 }
2786
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3)2787 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3) {
2788 for (uint32_t n = 4; n < 6; n++) {
2789 for (size_t k = 1; k <= 5; k += 2) {
2790 GemmMicrokernelTester()
2791 .mr(3)
2792 .nr(3)
2793 .kr(1)
2794 .sr(1)
2795 .m(3)
2796 .n(n)
2797 .k(k)
2798 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2799 }
2800 }
2801 }
2802
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3_strided_cn)2803 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_cn) {
2804 for (uint32_t n = 4; n < 6; n++) {
2805 for (size_t k = 1; k <= 5; k += 2) {
2806 GemmMicrokernelTester()
2807 .mr(3)
2808 .nr(3)
2809 .kr(1)
2810 .sr(1)
2811 .m(3)
2812 .n(n)
2813 .k(k)
2814 .cn_stride(5)
2815 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2816 }
2817 }
2818 }
2819
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3_strided_a)2820 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_a) {
2821 for (uint32_t n = 4; n < 6; n++) {
2822 for (size_t k = 1; k <= 5; k += 2) {
2823 GemmMicrokernelTester()
2824 .mr(3)
2825 .nr(3)
2826 .kr(1)
2827 .sr(1)
2828 .m(3)
2829 .n(n)
2830 .k(k)
2831 .a_stride(7)
2832 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2833 }
2834 }
2835 }
2836
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3_subtile)2837 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_subtile) {
2838 for (uint32_t n = 4; n < 6; n++) {
2839 for (size_t k = 1; k <= 5; k += 2) {
2840 for (uint32_t m = 1; m <= 3; m++) {
2841 GemmMicrokernelTester()
2842 .mr(3)
2843 .nr(3)
2844 .kr(1)
2845 .sr(1)
2846 .m(m)
2847 .n(n)
2848 .k(k)
2849 .iterations(1)
2850 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2851 }
2852 }
2853 }
2854 }
2855
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3)2856 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3) {
2857 for (uint32_t n = 6; n <= 9; n += 3) {
2858 for (size_t k = 1; k <= 5; k += 2) {
2859 GemmMicrokernelTester()
2860 .mr(3)
2861 .nr(3)
2862 .kr(1)
2863 .sr(1)
2864 .m(3)
2865 .n(n)
2866 .k(k)
2867 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2868 }
2869 }
2870 }
2871
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3_strided_cn)2872 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_cn) {
2873 for (uint32_t n = 6; n <= 9; n += 3) {
2874 for (size_t k = 1; k <= 5; k += 2) {
2875 GemmMicrokernelTester()
2876 .mr(3)
2877 .nr(3)
2878 .kr(1)
2879 .sr(1)
2880 .m(3)
2881 .n(n)
2882 .k(k)
2883 .cn_stride(5)
2884 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2885 }
2886 }
2887 }
2888
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3_strided_a)2889 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_a) {
2890 for (uint32_t n = 6; n <= 9; n += 3) {
2891 for (size_t k = 1; k <= 5; k += 2) {
2892 GemmMicrokernelTester()
2893 .mr(3)
2894 .nr(3)
2895 .kr(1)
2896 .sr(1)
2897 .m(3)
2898 .n(n)
2899 .k(k)
2900 .a_stride(7)
2901 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2902 }
2903 }
2904 }
2905
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3_subtile)2906 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_subtile) {
2907 for (uint32_t n = 6; n <= 9; n += 3) {
2908 for (size_t k = 1; k <= 5; k += 2) {
2909 for (uint32_t m = 1; m <= 3; m++) {
2910 GemmMicrokernelTester()
2911 .mr(3)
2912 .nr(3)
2913 .kr(1)
2914 .sr(1)
2915 .m(m)
2916 .n(n)
2917 .k(k)
2918 .iterations(1)
2919 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2920 }
2921 }
2922 }
2923 }
2924
TEST(F32_PPMM_MINMAX_3X3__SCALAR,strided_cm_subtile)2925 TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm_subtile) {
2926 for (size_t k = 1; k <= 5; k += 2) {
2927 for (uint32_t n = 1; n <= 3; n++) {
2928 for (uint32_t m = 1; m <= 3; m++) {
2929 GemmMicrokernelTester()
2930 .mr(3)
2931 .nr(3)
2932 .kr(1)
2933 .sr(1)
2934 .m(m)
2935 .n(n)
2936 .k(k)
2937 .cm_stride(5)
2938 .iterations(1)
2939 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2940 }
2941 }
2942 }
2943 }
2944
TEST(F32_PPMM_MINMAX_3X3__SCALAR,qmin)2945 TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmin) {
2946 GemmMicrokernelTester()
2947 .mr(3)
2948 .nr(3)
2949 .kr(1)
2950 .sr(1)
2951 .m(3)
2952 .n(3)
2953 .k(1)
2954 .qmin(128)
2955 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2956 }
2957
TEST(F32_PPMM_MINMAX_3X3__SCALAR,qmax)2958 TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmax) {
2959 GemmMicrokernelTester()
2960 .mr(3)
2961 .nr(3)
2962 .kr(1)
2963 .sr(1)
2964 .m(3)
2965 .n(3)
2966 .k(1)
2967 .qmax(128)
2968 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2969 }
2970
TEST(F32_PPMM_MINMAX_3X3__SCALAR,strided_cm)2971 TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm) {
2972 GemmMicrokernelTester()
2973 .mr(3)
2974 .nr(3)
2975 .kr(1)
2976 .sr(1)
2977 .m(3)
2978 .n(3)
2979 .k(1)
2980 .cm_stride(5)
2981 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
2982 }
2983
2984
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1)2985 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1) {
2986 GemmMicrokernelTester()
2987 .mr(4)
2988 .nr(2)
2989 .kr(1)
2990 .sr(1)
2991 .m(4)
2992 .n(2)
2993 .k(1)
2994 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
2995 }
2996
TEST(F32_PPMM_MINMAX_4X2__SCALAR,strided_cn)2997 TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cn) {
2998 GemmMicrokernelTester()
2999 .mr(4)
3000 .nr(2)
3001 .kr(1)
3002 .sr(1)
3003 .m(4)
3004 .n(2)
3005 .k(1)
3006 .cn_stride(5)
3007 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3008 }
3009
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_strided_a)3010 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_strided_a) {
3011 GemmMicrokernelTester()
3012 .mr(4)
3013 .nr(2)
3014 .kr(1)
3015 .sr(1)
3016 .m(4)
3017 .n(2)
3018 .k(1)
3019 .a_stride(3)
3020 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3021 }
3022
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_subtile)3023 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
3024 for (uint32_t n = 1; n <= 2; n++) {
3025 for (uint32_t m = 1; m <= 4; m++) {
3026 GemmMicrokernelTester()
3027 .mr(4)
3028 .nr(2)
3029 .kr(1)
3030 .sr(1)
3031 .m(m)
3032 .n(n)
3033 .k(1)
3034 .iterations(1)
3035 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3036 }
3037 }
3038 }
3039
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_subtile_m)3040 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
3041 for (uint32_t m = 1; m <= 4; m++) {
3042 GemmMicrokernelTester()
3043 .mr(4)
3044 .nr(2)
3045 .kr(1)
3046 .sr(1)
3047 .m(m)
3048 .n(2)
3049 .k(1)
3050 .iterations(1)
3051 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3052 }
3053 }
3054
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_subtile_n)3055 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
3056 for (uint32_t n = 1; n <= 2; n++) {
3057 GemmMicrokernelTester()
3058 .mr(4)
3059 .nr(2)
3060 .kr(1)
3061 .sr(1)
3062 .m(4)
3063 .n(n)
3064 .k(1)
3065 .iterations(1)
3066 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3067 }
3068 }
3069
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_gt_1)3070 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1) {
3071 for (size_t k = 2; k < 10; k++) {
3072 GemmMicrokernelTester()
3073 .mr(4)
3074 .nr(2)
3075 .kr(1)
3076 .sr(1)
3077 .m(4)
3078 .n(2)
3079 .k(k)
3080 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3081 }
3082 }
3083
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_gt_1_subtile)3084 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
3085 for (size_t k = 2; k < 10; k++) {
3086 for (uint32_t n = 1; n <= 2; n++) {
3087 for (uint32_t m = 1; m <= 4; m++) {
3088 GemmMicrokernelTester()
3089 .mr(4)
3090 .nr(2)
3091 .kr(1)
3092 .sr(1)
3093 .m(m)
3094 .n(n)
3095 .k(k)
3096 .iterations(1)
3097 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3098 }
3099 }
3100 }
3101 }
3102
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2)3103 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2) {
3104 for (uint32_t n = 3; n < 4; n++) {
3105 for (size_t k = 1; k <= 5; k += 2) {
3106 GemmMicrokernelTester()
3107 .mr(4)
3108 .nr(2)
3109 .kr(1)
3110 .sr(1)
3111 .m(4)
3112 .n(n)
3113 .k(k)
3114 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3115 }
3116 }
3117 }
3118
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2_strided_cn)3119 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
3120 for (uint32_t n = 3; n < 4; n++) {
3121 for (size_t k = 1; k <= 5; k += 2) {
3122 GemmMicrokernelTester()
3123 .mr(4)
3124 .nr(2)
3125 .kr(1)
3126 .sr(1)
3127 .m(4)
3128 .n(n)
3129 .k(k)
3130 .cn_stride(5)
3131 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3132 }
3133 }
3134 }
3135
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2_strided_a)3136 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_a) {
3137 for (uint32_t n = 3; n < 4; n++) {
3138 for (size_t k = 1; k <= 5; k += 2) {
3139 GemmMicrokernelTester()
3140 .mr(4)
3141 .nr(2)
3142 .kr(1)
3143 .sr(1)
3144 .m(4)
3145 .n(n)
3146 .k(k)
3147 .a_stride(7)
3148 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3149 }
3150 }
3151 }
3152
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2_subtile)3153 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
3154 for (uint32_t n = 3; n < 4; n++) {
3155 for (size_t k = 1; k <= 5; k += 2) {
3156 for (uint32_t m = 1; m <= 4; m++) {
3157 GemmMicrokernelTester()
3158 .mr(4)
3159 .nr(2)
3160 .kr(1)
3161 .sr(1)
3162 .m(m)
3163 .n(n)
3164 .k(k)
3165 .iterations(1)
3166 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3167 }
3168 }
3169 }
3170 }
3171
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2)3172 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2) {
3173 for (uint32_t n = 4; n <= 6; n += 2) {
3174 for (size_t k = 1; k <= 5; k += 2) {
3175 GemmMicrokernelTester()
3176 .mr(4)
3177 .nr(2)
3178 .kr(1)
3179 .sr(1)
3180 .m(4)
3181 .n(n)
3182 .k(k)
3183 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3184 }
3185 }
3186 }
3187
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2_strided_cn)3188 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
3189 for (uint32_t n = 4; n <= 6; n += 2) {
3190 for (size_t k = 1; k <= 5; k += 2) {
3191 GemmMicrokernelTester()
3192 .mr(4)
3193 .nr(2)
3194 .kr(1)
3195 .sr(1)
3196 .m(4)
3197 .n(n)
3198 .k(k)
3199 .cn_stride(5)
3200 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3201 }
3202 }
3203 }
3204
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2_strided_a)3205 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_a) {
3206 for (uint32_t n = 4; n <= 6; n += 2) {
3207 for (size_t k = 1; k <= 5; k += 2) {
3208 GemmMicrokernelTester()
3209 .mr(4)
3210 .nr(2)
3211 .kr(1)
3212 .sr(1)
3213 .m(4)
3214 .n(n)
3215 .k(k)
3216 .a_stride(7)
3217 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3218 }
3219 }
3220 }
3221
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2_subtile)3222 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
3223 for (uint32_t n = 4; n <= 6; n += 2) {
3224 for (size_t k = 1; k <= 5; k += 2) {
3225 for (uint32_t m = 1; m <= 4; m++) {
3226 GemmMicrokernelTester()
3227 .mr(4)
3228 .nr(2)
3229 .kr(1)
3230 .sr(1)
3231 .m(m)
3232 .n(n)
3233 .k(k)
3234 .iterations(1)
3235 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3236 }
3237 }
3238 }
3239 }
3240
TEST(F32_PPMM_MINMAX_4X2__SCALAR,strided_cm_subtile)3241 TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
3242 for (size_t k = 1; k <= 5; k += 2) {
3243 for (uint32_t n = 1; n <= 2; n++) {
3244 for (uint32_t m = 1; m <= 4; m++) {
3245 GemmMicrokernelTester()
3246 .mr(4)
3247 .nr(2)
3248 .kr(1)
3249 .sr(1)
3250 .m(m)
3251 .n(n)
3252 .k(k)
3253 .cm_stride(5)
3254 .iterations(1)
3255 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3256 }
3257 }
3258 }
3259 }
3260
TEST(F32_PPMM_MINMAX_4X2__SCALAR,qmin)3261 TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmin) {
3262 GemmMicrokernelTester()
3263 .mr(4)
3264 .nr(2)
3265 .kr(1)
3266 .sr(1)
3267 .m(4)
3268 .n(2)
3269 .k(1)
3270 .qmin(128)
3271 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3272 }
3273
TEST(F32_PPMM_MINMAX_4X2__SCALAR,qmax)3274 TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmax) {
3275 GemmMicrokernelTester()
3276 .mr(4)
3277 .nr(2)
3278 .kr(1)
3279 .sr(1)
3280 .m(4)
3281 .n(2)
3282 .k(1)
3283 .qmax(128)
3284 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3285 }
3286
TEST(F32_PPMM_MINMAX_4X2__SCALAR,strided_cm)3287 TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm) {
3288 GemmMicrokernelTester()
3289 .mr(4)
3290 .nr(2)
3291 .kr(1)
3292 .sr(1)
3293 .m(4)
3294 .n(2)
3295 .k(1)
3296 .cm_stride(5)
3297 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
3298 }
3299
3300
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1)3301 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1) {
3302 GemmMicrokernelTester()
3303 .mr(4)
3304 .nr(4)
3305 .kr(1)
3306 .sr(1)
3307 .m(4)
3308 .n(4)
3309 .k(1)
3310 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3311 }
3312
TEST(F32_PPMM_MINMAX_4X4__SCALAR,strided_cn)3313 TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cn) {
3314 GemmMicrokernelTester()
3315 .mr(4)
3316 .nr(4)
3317 .kr(1)
3318 .sr(1)
3319 .m(4)
3320 .n(4)
3321 .k(1)
3322 .cn_stride(7)
3323 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3324 }
3325
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_strided_a)3326 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
3327 GemmMicrokernelTester()
3328 .mr(4)
3329 .nr(4)
3330 .kr(1)
3331 .sr(1)
3332 .m(4)
3333 .n(4)
3334 .k(1)
3335 .a_stride(3)
3336 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3337 }
3338
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_subtile)3339 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
3340 for (uint32_t n = 1; n <= 4; n++) {
3341 for (uint32_t m = 1; m <= 4; m++) {
3342 GemmMicrokernelTester()
3343 .mr(4)
3344 .nr(4)
3345 .kr(1)
3346 .sr(1)
3347 .m(m)
3348 .n(n)
3349 .k(1)
3350 .iterations(1)
3351 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3352 }
3353 }
3354 }
3355
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_subtile_m)3356 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
3357 for (uint32_t m = 1; m <= 4; m++) {
3358 GemmMicrokernelTester()
3359 .mr(4)
3360 .nr(4)
3361 .kr(1)
3362 .sr(1)
3363 .m(m)
3364 .n(4)
3365 .k(1)
3366 .iterations(1)
3367 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3368 }
3369 }
3370
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_subtile_n)3371 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
3372 for (uint32_t n = 1; n <= 4; n++) {
3373 GemmMicrokernelTester()
3374 .mr(4)
3375 .nr(4)
3376 .kr(1)
3377 .sr(1)
3378 .m(4)
3379 .n(n)
3380 .k(1)
3381 .iterations(1)
3382 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3383 }
3384 }
3385
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_gt_1)3386 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1) {
3387 for (size_t k = 2; k < 10; k++) {
3388 GemmMicrokernelTester()
3389 .mr(4)
3390 .nr(4)
3391 .kr(1)
3392 .sr(1)
3393 .m(4)
3394 .n(4)
3395 .k(k)
3396 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3397 }
3398 }
3399
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_gt_1_subtile)3400 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
3401 for (size_t k = 2; k < 10; k++) {
3402 for (uint32_t n = 1; n <= 4; n++) {
3403 for (uint32_t m = 1; m <= 4; m++) {
3404 GemmMicrokernelTester()
3405 .mr(4)
3406 .nr(4)
3407 .kr(1)
3408 .sr(1)
3409 .m(m)
3410 .n(n)
3411 .k(k)
3412 .iterations(1)
3413 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3414 }
3415 }
3416 }
3417 }
3418
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4)3419 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4) {
3420 for (uint32_t n = 5; n < 8; n++) {
3421 for (size_t k = 1; k <= 5; k += 2) {
3422 GemmMicrokernelTester()
3423 .mr(4)
3424 .nr(4)
3425 .kr(1)
3426 .sr(1)
3427 .m(4)
3428 .n(n)
3429 .k(k)
3430 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3431 }
3432 }
3433 }
3434
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4_strided_cn)3435 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
3436 for (uint32_t n = 5; n < 8; n++) {
3437 for (size_t k = 1; k <= 5; k += 2) {
3438 GemmMicrokernelTester()
3439 .mr(4)
3440 .nr(4)
3441 .kr(1)
3442 .sr(1)
3443 .m(4)
3444 .n(n)
3445 .k(k)
3446 .cn_stride(7)
3447 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3448 }
3449 }
3450 }
3451
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4_strided_a)3452 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
3453 for (uint32_t n = 5; n < 8; n++) {
3454 for (size_t k = 1; k <= 5; k += 2) {
3455 GemmMicrokernelTester()
3456 .mr(4)
3457 .nr(4)
3458 .kr(1)
3459 .sr(1)
3460 .m(4)
3461 .n(n)
3462 .k(k)
3463 .a_stride(7)
3464 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3465 }
3466 }
3467 }
3468
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4_subtile)3469 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
3470 for (uint32_t n = 5; n < 8; n++) {
3471 for (size_t k = 1; k <= 5; k += 2) {
3472 for (uint32_t m = 1; m <= 4; m++) {
3473 GemmMicrokernelTester()
3474 .mr(4)
3475 .nr(4)
3476 .kr(1)
3477 .sr(1)
3478 .m(m)
3479 .n(n)
3480 .k(k)
3481 .iterations(1)
3482 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3483 }
3484 }
3485 }
3486 }
3487
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4)3488 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4) {
3489 for (uint32_t n = 8; n <= 12; n += 4) {
3490 for (size_t k = 1; k <= 5; k += 2) {
3491 GemmMicrokernelTester()
3492 .mr(4)
3493 .nr(4)
3494 .kr(1)
3495 .sr(1)
3496 .m(4)
3497 .n(n)
3498 .k(k)
3499 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3500 }
3501 }
3502 }
3503
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4_strided_cn)3504 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
3505 for (uint32_t n = 8; n <= 12; n += 4) {
3506 for (size_t k = 1; k <= 5; k += 2) {
3507 GemmMicrokernelTester()
3508 .mr(4)
3509 .nr(4)
3510 .kr(1)
3511 .sr(1)
3512 .m(4)
3513 .n(n)
3514 .k(k)
3515 .cn_stride(7)
3516 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3517 }
3518 }
3519 }
3520
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4_strided_a)3521 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
3522 for (uint32_t n = 8; n <= 12; n += 4) {
3523 for (size_t k = 1; k <= 5; k += 2) {
3524 GemmMicrokernelTester()
3525 .mr(4)
3526 .nr(4)
3527 .kr(1)
3528 .sr(1)
3529 .m(4)
3530 .n(n)
3531 .k(k)
3532 .a_stride(7)
3533 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3534 }
3535 }
3536 }
3537
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4_subtile)3538 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
3539 for (uint32_t n = 8; n <= 12; n += 4) {
3540 for (size_t k = 1; k <= 5; k += 2) {
3541 for (uint32_t m = 1; m <= 4; m++) {
3542 GemmMicrokernelTester()
3543 .mr(4)
3544 .nr(4)
3545 .kr(1)
3546 .sr(1)
3547 .m(m)
3548 .n(n)
3549 .k(k)
3550 .iterations(1)
3551 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3552 }
3553 }
3554 }
3555 }
3556
TEST(F32_PPMM_MINMAX_4X4__SCALAR,strided_cm_subtile)3557 TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
3558 for (size_t k = 1; k <= 5; k += 2) {
3559 for (uint32_t n = 1; n <= 4; n++) {
3560 for (uint32_t m = 1; m <= 4; m++) {
3561 GemmMicrokernelTester()
3562 .mr(4)
3563 .nr(4)
3564 .kr(1)
3565 .sr(1)
3566 .m(m)
3567 .n(n)
3568 .k(k)
3569 .cm_stride(7)
3570 .iterations(1)
3571 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3572 }
3573 }
3574 }
3575 }
3576
TEST(F32_PPMM_MINMAX_4X4__SCALAR,qmin)3577 TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmin) {
3578 GemmMicrokernelTester()
3579 .mr(4)
3580 .nr(4)
3581 .kr(1)
3582 .sr(1)
3583 .m(4)
3584 .n(4)
3585 .k(1)
3586 .qmin(128)
3587 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3588 }
3589
TEST(F32_PPMM_MINMAX_4X4__SCALAR,qmax)3590 TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmax) {
3591 GemmMicrokernelTester()
3592 .mr(4)
3593 .nr(4)
3594 .kr(1)
3595 .sr(1)
3596 .m(4)
3597 .n(4)
3598 .k(1)
3599 .qmax(128)
3600 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3601 }
3602
TEST(F32_PPMM_MINMAX_4X4__SCALAR,strided_cm)3603 TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm) {
3604 GemmMicrokernelTester()
3605 .mr(4)
3606 .nr(4)
3607 .kr(1)
3608 .sr(1)
3609 .m(4)
3610 .n(4)
3611 .k(1)
3612 .cm_stride(7)
3613 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
3614 }
3615