1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-gemm.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1)28 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1) {
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(1)
37 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
38 }
39
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,strided_cn)40 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(1)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
51 }
52
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)53 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
54 GemmMicrokernelTester()
55 .mr(1)
56 .nr(8)
57 .kr(1)
58 .sr(1)
59 .m(1)
60 .n(8)
61 .k(1)
62 .a_stride(3)
63 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
64 }
65
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)66 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
67 for (uint32_t n = 1; n <= 8; n++) {
68 for (uint32_t m = 1; m <= 1; m++) {
69 GemmMicrokernelTester()
70 .mr(1)
71 .nr(8)
72 .kr(1)
73 .sr(1)
74 .m(m)
75 .n(n)
76 .k(1)
77 .iterations(1)
78 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
79 }
80 }
81 }
82
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)83 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
84 for (uint32_t m = 1; m <= 1; m++) {
85 GemmMicrokernelTester()
86 .mr(1)
87 .nr(8)
88 .kr(1)
89 .sr(1)
90 .m(m)
91 .n(8)
92 .k(1)
93 .iterations(1)
94 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
95 }
96 }
97
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)98 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
99 for (uint32_t n = 1; n <= 8; n++) {
100 GemmMicrokernelTester()
101 .mr(1)
102 .nr(8)
103 .kr(1)
104 .sr(1)
105 .m(1)
106 .n(n)
107 .k(1)
108 .iterations(1)
109 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
110 }
111 }
112
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_gt_1)113 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_gt_1) {
114 for (size_t k = 2; k < 10; k++) {
115 GemmMicrokernelTester()
116 .mr(1)
117 .nr(8)
118 .kr(1)
119 .sr(1)
120 .m(1)
121 .n(8)
122 .k(k)
123 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
124 }
125 }
126
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)127 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
128 for (size_t k = 2; k < 10; k++) {
129 GemmMicrokernelTester()
130 .mr(1)
131 .nr(8)
132 .kr(1)
133 .sr(1)
134 .m(1)
135 .n(8)
136 .k(k)
137 .a_stride(11)
138 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
139 }
140 }
141
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)142 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
143 for (size_t k = 2; k < 10; k++) {
144 for (uint32_t n = 1; n <= 8; n++) {
145 for (uint32_t m = 1; m <= 1; m++) {
146 GemmMicrokernelTester()
147 .mr(1)
148 .nr(8)
149 .kr(1)
150 .sr(1)
151 .m(m)
152 .n(n)
153 .k(k)
154 .iterations(1)
155 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
156 }
157 }
158 }
159 }
160
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8)161 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8) {
162 for (uint32_t n = 9; n < 16; n++) {
163 for (size_t k = 1; k <= 5; k += 2) {
164 GemmMicrokernelTester()
165 .mr(1)
166 .nr(8)
167 .kr(1)
168 .sr(1)
169 .m(1)
170 .n(n)
171 .k(k)
172 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
173 }
174 }
175 }
176
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)177 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
178 for (uint32_t n = 9; n < 16; n++) {
179 for (size_t k = 1; k <= 5; k += 2) {
180 GemmMicrokernelTester()
181 .mr(1)
182 .nr(8)
183 .kr(1)
184 .sr(1)
185 .m(1)
186 .n(n)
187 .k(k)
188 .cn_stride(11)
189 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
190 }
191 }
192 }
193
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)194 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
195 for (uint32_t n = 9; n < 16; n++) {
196 for (size_t k = 1; k <= 5; k += 2) {
197 GemmMicrokernelTester()
198 .mr(1)
199 .nr(8)
200 .kr(1)
201 .sr(1)
202 .m(1)
203 .n(n)
204 .k(k)
205 .a_stride(7)
206 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
207 }
208 }
209 }
210
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)211 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
212 for (uint32_t n = 9; n < 16; n++) {
213 for (size_t k = 1; k <= 5; k += 2) {
214 for (uint32_t m = 1; m <= 1; m++) {
215 GemmMicrokernelTester()
216 .mr(1)
217 .nr(8)
218 .kr(1)
219 .sr(1)
220 .m(m)
221 .n(n)
222 .k(k)
223 .iterations(1)
224 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
225 }
226 }
227 }
228 }
229
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8)230 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8) {
231 for (uint32_t n = 16; n <= 24; n += 8) {
232 for (size_t k = 1; k <= 5; k += 2) {
233 GemmMicrokernelTester()
234 .mr(1)
235 .nr(8)
236 .kr(1)
237 .sr(1)
238 .m(1)
239 .n(n)
240 .k(k)
241 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
242 }
243 }
244 }
245
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)246 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
247 for (uint32_t n = 16; n <= 24; n += 8) {
248 for (size_t k = 1; k <= 5; k += 2) {
249 GemmMicrokernelTester()
250 .mr(1)
251 .nr(8)
252 .kr(1)
253 .sr(1)
254 .m(1)
255 .n(n)
256 .k(k)
257 .cn_stride(11)
258 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
259 }
260 }
261 }
262
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)263 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
264 for (uint32_t n = 16; n <= 24; n += 8) {
265 for (size_t k = 1; k <= 5; k += 2) {
266 GemmMicrokernelTester()
267 .mr(1)
268 .nr(8)
269 .kr(1)
270 .sr(1)
271 .m(1)
272 .n(n)
273 .k(k)
274 .a_stride(7)
275 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
276 }
277 }
278 }
279
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)280 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
281 for (uint32_t n = 16; n <= 24; n += 8) {
282 for (size_t k = 1; k <= 5; k += 2) {
283 for (uint32_t m = 1; m <= 1; m++) {
284 GemmMicrokernelTester()
285 .mr(1)
286 .nr(8)
287 .kr(1)
288 .sr(1)
289 .m(m)
290 .n(n)
291 .k(k)
292 .iterations(1)
293 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
294 }
295 }
296 }
297 }
298
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)299 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
300 for (size_t k = 1; k <= 5; k += 2) {
301 for (uint32_t n = 1; n <= 8; n++) {
302 for (uint32_t m = 1; m <= 1; m++) {
303 GemmMicrokernelTester()
304 .mr(1)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .cm_stride(11)
312 .iterations(1)
313 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
314 }
315 }
316 }
317 }
318
TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT,strided_cm)319 TEST(F32_GEMM_1X8__WASMSIMD_LOADSPLAT, strided_cm) {
320 GemmMicrokernelTester()
321 .mr(1)
322 .nr(8)
323 .kr(1)
324 .sr(1)
325 .m(1)
326 .n(8)
327 .k(1)
328 .cm_stride(11)
329 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_loadsplat);
330 }
331 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
332
333
334 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_eq_4)335 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4) {
336 GemmMicrokernelTester()
337 .mr(1)
338 .nr(8)
339 .kr(1)
340 .sr(1)
341 .m(1)
342 .n(8)
343 .k(4)
344 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
345 }
346
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,strided_cn)347 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, strided_cn) {
348 GemmMicrokernelTester()
349 .mr(1)
350 .nr(8)
351 .kr(1)
352 .sr(1)
353 .m(1)
354 .n(8)
355 .k(4)
356 .cn_stride(11)
357 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
358 }
359
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_eq_4_strided_a)360 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
361 GemmMicrokernelTester()
362 .mr(1)
363 .nr(8)
364 .kr(1)
365 .sr(1)
366 .m(1)
367 .n(8)
368 .k(4)
369 .a_stride(7)
370 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
371 }
372
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_eq_4_subtile)373 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
374 for (uint32_t n = 1; n <= 8; n++) {
375 for (uint32_t m = 1; m <= 1; m++) {
376 GemmMicrokernelTester()
377 .mr(1)
378 .nr(8)
379 .kr(1)
380 .sr(1)
381 .m(m)
382 .n(n)
383 .k(4)
384 .iterations(1)
385 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
386 }
387 }
388 }
389
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)390 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
391 for (uint32_t m = 1; m <= 1; m++) {
392 GemmMicrokernelTester()
393 .mr(1)
394 .nr(8)
395 .kr(1)
396 .sr(1)
397 .m(m)
398 .n(8)
399 .k(4)
400 .iterations(1)
401 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
402 }
403 }
404
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)405 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
406 for (uint32_t n = 1; n <= 8; n++) {
407 GemmMicrokernelTester()
408 .mr(1)
409 .nr(8)
410 .kr(1)
411 .sr(1)
412 .m(1)
413 .n(n)
414 .k(4)
415 .iterations(1)
416 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
417 }
418 }
419
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_lt_4)420 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_lt_4) {
421 for (size_t k = 1; k < 4; k++) {
422 GemmMicrokernelTester()
423 .mr(1)
424 .nr(8)
425 .kr(1)
426 .sr(1)
427 .m(1)
428 .n(8)
429 .k(k)
430 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
431 }
432 }
433
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_lt_4_strided_a)434 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
435 for (size_t k = 1; k < 4; k++) {
436 GemmMicrokernelTester()
437 .mr(1)
438 .nr(8)
439 .kr(1)
440 .sr(1)
441 .m(1)
442 .n(8)
443 .k(k)
444 .a_stride(7)
445 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
446 }
447 }
448
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_lt_4_subtile)449 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
450 for (size_t k = 1; k < 4; k++) {
451 for (uint32_t n = 1; n <= 8; n++) {
452 for (uint32_t m = 1; m <= 1; m++) {
453 GemmMicrokernelTester()
454 .mr(1)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(m)
459 .n(n)
460 .k(k)
461 .iterations(1)
462 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
463 }
464 }
465 }
466 }
467
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_gt_4)468 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_gt_4) {
469 for (size_t k = 5; k < 8; k++) {
470 GemmMicrokernelTester()
471 .mr(1)
472 .nr(8)
473 .kr(1)
474 .sr(1)
475 .m(1)
476 .n(8)
477 .k(k)
478 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
479 }
480 }
481
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_gt_4_strided_a)482 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
483 for (size_t k = 5; k < 8; k++) {
484 GemmMicrokernelTester()
485 .mr(1)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(1)
490 .n(8)
491 .k(k)
492 .a_stride(11)
493 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
494 }
495 }
496
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_gt_4_subtile)497 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
498 for (size_t k = 5; k < 8; k++) {
499 for (uint32_t n = 1; n <= 8; n++) {
500 for (uint32_t m = 1; m <= 1; m++) {
501 GemmMicrokernelTester()
502 .mr(1)
503 .nr(8)
504 .kr(1)
505 .sr(1)
506 .m(m)
507 .n(n)
508 .k(k)
509 .iterations(1)
510 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
511 }
512 }
513 }
514 }
515
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_div_4)516 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_div_4) {
517 for (size_t k = 8; k <= 40; k += 4) {
518 GemmMicrokernelTester()
519 .mr(1)
520 .nr(8)
521 .kr(1)
522 .sr(1)
523 .m(1)
524 .n(8)
525 .k(k)
526 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
527 }
528 }
529
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_div_4_strided_a)530 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
531 for (size_t k = 8; k <= 40; k += 4) {
532 GemmMicrokernelTester()
533 .mr(1)
534 .nr(8)
535 .kr(1)
536 .sr(1)
537 .m(1)
538 .n(8)
539 .k(k)
540 .a_stride(43)
541 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
542 }
543 }
544
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,k_div_4_subtile)545 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
546 for (size_t k = 8; k <= 40; k += 4) {
547 for (uint32_t n = 1; n <= 8; n++) {
548 for (uint32_t m = 1; m <= 1; m++) {
549 GemmMicrokernelTester()
550 .mr(1)
551 .nr(8)
552 .kr(1)
553 .sr(1)
554 .m(m)
555 .n(n)
556 .k(k)
557 .iterations(1)
558 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
559 }
560 }
561 }
562 }
563
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_gt_8)564 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8) {
565 for (uint32_t n = 9; n < 16; n++) {
566 for (size_t k = 1; k <= 20; k += 5) {
567 GemmMicrokernelTester()
568 .mr(1)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(1)
573 .n(n)
574 .k(k)
575 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
576 }
577 }
578 }
579
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)580 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
581 for (uint32_t n = 9; n < 16; n++) {
582 for (size_t k = 1; k <= 20; k += 5) {
583 GemmMicrokernelTester()
584 .mr(1)
585 .nr(8)
586 .kr(1)
587 .sr(1)
588 .m(1)
589 .n(n)
590 .k(k)
591 .cn_stride(11)
592 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
593 }
594 }
595 }
596
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_gt_8_strided_a)597 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
598 for (uint32_t n = 9; n < 16; n++) {
599 for (size_t k = 1; k <= 20; k += 5) {
600 GemmMicrokernelTester()
601 .mr(1)
602 .nr(8)
603 .kr(1)
604 .sr(1)
605 .m(1)
606 .n(n)
607 .k(k)
608 .a_stride(23)
609 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
610 }
611 }
612 }
613
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_gt_8_subtile)614 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
615 for (uint32_t n = 9; n < 16; n++) {
616 for (size_t k = 1; k <= 20; k += 5) {
617 for (uint32_t m = 1; m <= 1; m++) {
618 GemmMicrokernelTester()
619 .mr(1)
620 .nr(8)
621 .kr(1)
622 .sr(1)
623 .m(m)
624 .n(n)
625 .k(k)
626 .iterations(1)
627 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
628 }
629 }
630 }
631 }
632
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_div_8)633 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8) {
634 for (uint32_t n = 16; n <= 24; n += 8) {
635 for (size_t k = 1; k <= 20; k += 5) {
636 GemmMicrokernelTester()
637 .mr(1)
638 .nr(8)
639 .kr(1)
640 .sr(1)
641 .m(1)
642 .n(n)
643 .k(k)
644 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
645 }
646 }
647 }
648
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_div_8_strided_cn)649 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
650 for (uint32_t n = 16; n <= 24; n += 8) {
651 for (size_t k = 1; k <= 20; k += 5) {
652 GemmMicrokernelTester()
653 .mr(1)
654 .nr(8)
655 .kr(1)
656 .sr(1)
657 .m(1)
658 .n(n)
659 .k(k)
660 .cn_stride(11)
661 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
662 }
663 }
664 }
665
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_div_8_strided_a)666 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
667 for (uint32_t n = 16; n <= 24; n += 8) {
668 for (size_t k = 1; k <= 20; k += 5) {
669 GemmMicrokernelTester()
670 .mr(1)
671 .nr(8)
672 .kr(1)
673 .sr(1)
674 .m(1)
675 .n(n)
676 .k(k)
677 .a_stride(23)
678 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
679 }
680 }
681 }
682
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,n_div_8_subtile)683 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
684 for (uint32_t n = 16; n <= 24; n += 8) {
685 for (size_t k = 1; k <= 20; k += 5) {
686 for (uint32_t m = 1; m <= 1; m++) {
687 GemmMicrokernelTester()
688 .mr(1)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(m)
693 .n(n)
694 .k(k)
695 .iterations(1)
696 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
697 }
698 }
699 }
700 }
701
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,strided_cm_subtile)702 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
703 for (size_t k = 1; k <= 20; k += 5) {
704 for (uint32_t n = 1; n <= 8; n++) {
705 for (uint32_t m = 1; m <= 1; m++) {
706 GemmMicrokernelTester()
707 .mr(1)
708 .nr(8)
709 .kr(1)
710 .sr(1)
711 .m(m)
712 .n(n)
713 .k(k)
714 .cm_stride(11)
715 .iterations(1)
716 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
717 }
718 }
719 }
720 }
721
TEST(F32_GEMM_1X8__WASMSIMD_SPLAT,strided_cm)722 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, strided_cm) {
723 GemmMicrokernelTester()
724 .mr(1)
725 .nr(8)
726 .kr(1)
727 .sr(1)
728 .m(1)
729 .n(8)
730 .k(4)
731 .cm_stride(11)
732 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
733 }
734 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
735
736
737 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_1X8S4__WASMSIMD,k_eq_4)738 TEST(F32_GEMM_1X8S4__WASMSIMD, k_eq_4) {
739 GemmMicrokernelTester()
740 .mr(1)
741 .nr(8)
742 .kr(1)
743 .sr(4)
744 .m(1)
745 .n(8)
746 .k(4)
747 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
748 }
749
TEST(F32_GEMM_1X8S4__WASMSIMD,strided_cn)750 TEST(F32_GEMM_1X8S4__WASMSIMD, strided_cn) {
751 GemmMicrokernelTester()
752 .mr(1)
753 .nr(8)
754 .kr(1)
755 .sr(4)
756 .m(1)
757 .n(8)
758 .k(4)
759 .cn_stride(11)
760 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
761 }
762
TEST(F32_GEMM_1X8S4__WASMSIMD,k_eq_4_strided_a)763 TEST(F32_GEMM_1X8S4__WASMSIMD, k_eq_4_strided_a) {
764 GemmMicrokernelTester()
765 .mr(1)
766 .nr(8)
767 .kr(1)
768 .sr(4)
769 .m(1)
770 .n(8)
771 .k(4)
772 .a_stride(7)
773 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
774 }
775
TEST(F32_GEMM_1X8S4__WASMSIMD,k_eq_4_subtile)776 TEST(F32_GEMM_1X8S4__WASMSIMD, k_eq_4_subtile) {
777 for (uint32_t n = 1; n <= 8; n++) {
778 for (uint32_t m = 1; m <= 1; m++) {
779 GemmMicrokernelTester()
780 .mr(1)
781 .nr(8)
782 .kr(1)
783 .sr(4)
784 .m(m)
785 .n(n)
786 .k(4)
787 .iterations(1)
788 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
789 }
790 }
791 }
792
TEST(F32_GEMM_1X8S4__WASMSIMD,k_eq_4_subtile_m)793 TEST(F32_GEMM_1X8S4__WASMSIMD, k_eq_4_subtile_m) {
794 for (uint32_t m = 1; m <= 1; m++) {
795 GemmMicrokernelTester()
796 .mr(1)
797 .nr(8)
798 .kr(1)
799 .sr(4)
800 .m(m)
801 .n(8)
802 .k(4)
803 .iterations(1)
804 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
805 }
806 }
807
TEST(F32_GEMM_1X8S4__WASMSIMD,k_eq_4_subtile_n)808 TEST(F32_GEMM_1X8S4__WASMSIMD, k_eq_4_subtile_n) {
809 for (uint32_t n = 1; n <= 8; n++) {
810 GemmMicrokernelTester()
811 .mr(1)
812 .nr(8)
813 .kr(1)
814 .sr(4)
815 .m(1)
816 .n(n)
817 .k(4)
818 .iterations(1)
819 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
820 }
821 }
822
TEST(F32_GEMM_1X8S4__WASMSIMD,k_lt_4)823 TEST(F32_GEMM_1X8S4__WASMSIMD, k_lt_4) {
824 for (size_t k = 1; k < 4; k++) {
825 GemmMicrokernelTester()
826 .mr(1)
827 .nr(8)
828 .kr(1)
829 .sr(4)
830 .m(1)
831 .n(8)
832 .k(k)
833 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
834 }
835 }
836
TEST(F32_GEMM_1X8S4__WASMSIMD,k_lt_4_strided_a)837 TEST(F32_GEMM_1X8S4__WASMSIMD, k_lt_4_strided_a) {
838 for (size_t k = 1; k < 4; k++) {
839 GemmMicrokernelTester()
840 .mr(1)
841 .nr(8)
842 .kr(1)
843 .sr(4)
844 .m(1)
845 .n(8)
846 .k(k)
847 .a_stride(7)
848 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
849 }
850 }
851
TEST(F32_GEMM_1X8S4__WASMSIMD,k_lt_4_subtile)852 TEST(F32_GEMM_1X8S4__WASMSIMD, k_lt_4_subtile) {
853 for (size_t k = 1; k < 4; k++) {
854 for (uint32_t n = 1; n <= 8; n++) {
855 for (uint32_t m = 1; m <= 1; m++) {
856 GemmMicrokernelTester()
857 .mr(1)
858 .nr(8)
859 .kr(1)
860 .sr(4)
861 .m(m)
862 .n(n)
863 .k(k)
864 .iterations(1)
865 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
866 }
867 }
868 }
869 }
870
TEST(F32_GEMM_1X8S4__WASMSIMD,k_gt_4)871 TEST(F32_GEMM_1X8S4__WASMSIMD, k_gt_4) {
872 for (size_t k = 5; k < 8; k++) {
873 GemmMicrokernelTester()
874 .mr(1)
875 .nr(8)
876 .kr(1)
877 .sr(4)
878 .m(1)
879 .n(8)
880 .k(k)
881 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
882 }
883 }
884
TEST(F32_GEMM_1X8S4__WASMSIMD,k_gt_4_strided_a)885 TEST(F32_GEMM_1X8S4__WASMSIMD, k_gt_4_strided_a) {
886 for (size_t k = 5; k < 8; k++) {
887 GemmMicrokernelTester()
888 .mr(1)
889 .nr(8)
890 .kr(1)
891 .sr(4)
892 .m(1)
893 .n(8)
894 .k(k)
895 .a_stride(11)
896 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
897 }
898 }
899
TEST(F32_GEMM_1X8S4__WASMSIMD,k_gt_4_subtile)900 TEST(F32_GEMM_1X8S4__WASMSIMD, k_gt_4_subtile) {
901 for (size_t k = 5; k < 8; k++) {
902 for (uint32_t n = 1; n <= 8; n++) {
903 for (uint32_t m = 1; m <= 1; m++) {
904 GemmMicrokernelTester()
905 .mr(1)
906 .nr(8)
907 .kr(1)
908 .sr(4)
909 .m(m)
910 .n(n)
911 .k(k)
912 .iterations(1)
913 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
914 }
915 }
916 }
917 }
918
TEST(F32_GEMM_1X8S4__WASMSIMD,k_div_4)919 TEST(F32_GEMM_1X8S4__WASMSIMD, k_div_4) {
920 for (size_t k = 8; k <= 40; k += 4) {
921 GemmMicrokernelTester()
922 .mr(1)
923 .nr(8)
924 .kr(1)
925 .sr(4)
926 .m(1)
927 .n(8)
928 .k(k)
929 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
930 }
931 }
932
TEST(F32_GEMM_1X8S4__WASMSIMD,k_div_4_strided_a)933 TEST(F32_GEMM_1X8S4__WASMSIMD, k_div_4_strided_a) {
934 for (size_t k = 8; k <= 40; k += 4) {
935 GemmMicrokernelTester()
936 .mr(1)
937 .nr(8)
938 .kr(1)
939 .sr(4)
940 .m(1)
941 .n(8)
942 .k(k)
943 .a_stride(43)
944 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
945 }
946 }
947
TEST(F32_GEMM_1X8S4__WASMSIMD,k_div_4_subtile)948 TEST(F32_GEMM_1X8S4__WASMSIMD, k_div_4_subtile) {
949 for (size_t k = 8; k <= 40; k += 4) {
950 for (uint32_t n = 1; n <= 8; n++) {
951 for (uint32_t m = 1; m <= 1; m++) {
952 GemmMicrokernelTester()
953 .mr(1)
954 .nr(8)
955 .kr(1)
956 .sr(4)
957 .m(m)
958 .n(n)
959 .k(k)
960 .iterations(1)
961 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
962 }
963 }
964 }
965 }
966
TEST(F32_GEMM_1X8S4__WASMSIMD,n_gt_8)967 TEST(F32_GEMM_1X8S4__WASMSIMD, n_gt_8) {
968 for (uint32_t n = 9; n < 16; n++) {
969 for (size_t k = 1; k <= 20; k += 5) {
970 GemmMicrokernelTester()
971 .mr(1)
972 .nr(8)
973 .kr(1)
974 .sr(4)
975 .m(1)
976 .n(n)
977 .k(k)
978 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
979 }
980 }
981 }
982
TEST(F32_GEMM_1X8S4__WASMSIMD,n_gt_8_strided_cn)983 TEST(F32_GEMM_1X8S4__WASMSIMD, n_gt_8_strided_cn) {
984 for (uint32_t n = 9; n < 16; n++) {
985 for (size_t k = 1; k <= 20; k += 5) {
986 GemmMicrokernelTester()
987 .mr(1)
988 .nr(8)
989 .kr(1)
990 .sr(4)
991 .m(1)
992 .n(n)
993 .k(k)
994 .cn_stride(11)
995 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
996 }
997 }
998 }
999
TEST(F32_GEMM_1X8S4__WASMSIMD,n_gt_8_strided_a)1000 TEST(F32_GEMM_1X8S4__WASMSIMD, n_gt_8_strided_a) {
1001 for (uint32_t n = 9; n < 16; n++) {
1002 for (size_t k = 1; k <= 20; k += 5) {
1003 GemmMicrokernelTester()
1004 .mr(1)
1005 .nr(8)
1006 .kr(1)
1007 .sr(4)
1008 .m(1)
1009 .n(n)
1010 .k(k)
1011 .a_stride(23)
1012 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1013 }
1014 }
1015 }
1016
TEST(F32_GEMM_1X8S4__WASMSIMD,n_gt_8_subtile)1017 TEST(F32_GEMM_1X8S4__WASMSIMD, n_gt_8_subtile) {
1018 for (uint32_t n = 9; n < 16; n++) {
1019 for (size_t k = 1; k <= 20; k += 5) {
1020 for (uint32_t m = 1; m <= 1; m++) {
1021 GemmMicrokernelTester()
1022 .mr(1)
1023 .nr(8)
1024 .kr(1)
1025 .sr(4)
1026 .m(m)
1027 .n(n)
1028 .k(k)
1029 .iterations(1)
1030 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1031 }
1032 }
1033 }
1034 }
1035
TEST(F32_GEMM_1X8S4__WASMSIMD,n_div_8)1036 TEST(F32_GEMM_1X8S4__WASMSIMD, n_div_8) {
1037 for (uint32_t n = 16; n <= 24; n += 8) {
1038 for (size_t k = 1; k <= 20; k += 5) {
1039 GemmMicrokernelTester()
1040 .mr(1)
1041 .nr(8)
1042 .kr(1)
1043 .sr(4)
1044 .m(1)
1045 .n(n)
1046 .k(k)
1047 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1048 }
1049 }
1050 }
1051
TEST(F32_GEMM_1X8S4__WASMSIMD,n_div_8_strided_cn)1052 TEST(F32_GEMM_1X8S4__WASMSIMD, n_div_8_strided_cn) {
1053 for (uint32_t n = 16; n <= 24; n += 8) {
1054 for (size_t k = 1; k <= 20; k += 5) {
1055 GemmMicrokernelTester()
1056 .mr(1)
1057 .nr(8)
1058 .kr(1)
1059 .sr(4)
1060 .m(1)
1061 .n(n)
1062 .k(k)
1063 .cn_stride(11)
1064 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1065 }
1066 }
1067 }
1068
TEST(F32_GEMM_1X8S4__WASMSIMD,n_div_8_strided_a)1069 TEST(F32_GEMM_1X8S4__WASMSIMD, n_div_8_strided_a) {
1070 for (uint32_t n = 16; n <= 24; n += 8) {
1071 for (size_t k = 1; k <= 20; k += 5) {
1072 GemmMicrokernelTester()
1073 .mr(1)
1074 .nr(8)
1075 .kr(1)
1076 .sr(4)
1077 .m(1)
1078 .n(n)
1079 .k(k)
1080 .a_stride(23)
1081 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1082 }
1083 }
1084 }
1085
TEST(F32_GEMM_1X8S4__WASMSIMD,n_div_8_subtile)1086 TEST(F32_GEMM_1X8S4__WASMSIMD, n_div_8_subtile) {
1087 for (uint32_t n = 16; n <= 24; n += 8) {
1088 for (size_t k = 1; k <= 20; k += 5) {
1089 for (uint32_t m = 1; m <= 1; m++) {
1090 GemmMicrokernelTester()
1091 .mr(1)
1092 .nr(8)
1093 .kr(1)
1094 .sr(4)
1095 .m(m)
1096 .n(n)
1097 .k(k)
1098 .iterations(1)
1099 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1100 }
1101 }
1102 }
1103 }
1104
TEST(F32_GEMM_1X8S4__WASMSIMD,strided_cm_subtile)1105 TEST(F32_GEMM_1X8S4__WASMSIMD, strided_cm_subtile) {
1106 for (size_t k = 1; k <= 20; k += 5) {
1107 for (uint32_t n = 1; n <= 8; n++) {
1108 for (uint32_t m = 1; m <= 1; m++) {
1109 GemmMicrokernelTester()
1110 .mr(1)
1111 .nr(8)
1112 .kr(1)
1113 .sr(4)
1114 .m(m)
1115 .n(n)
1116 .k(k)
1117 .cm_stride(11)
1118 .iterations(1)
1119 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1120 }
1121 }
1122 }
1123 }
1124
TEST(F32_GEMM_1X8S4__WASMSIMD,strided_cm)1125 TEST(F32_GEMM_1X8S4__WASMSIMD, strided_cm) {
1126 GemmMicrokernelTester()
1127 .mr(1)
1128 .nr(8)
1129 .kr(1)
1130 .sr(4)
1131 .m(1)
1132 .n(8)
1133 .k(4)
1134 .cm_stride(11)
1135 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmsimd);
1136 }
1137 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1138
1139
1140 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_eq_4)1141 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4) {
1142 GemmMicrokernelTester()
1143 .mr(4)
1144 .nr(8)
1145 .kr(1)
1146 .sr(1)
1147 .m(4)
1148 .n(8)
1149 .k(4)
1150 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1151 }
1152
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,strided_cn)1153 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, strided_cn) {
1154 GemmMicrokernelTester()
1155 .mr(4)
1156 .nr(8)
1157 .kr(1)
1158 .sr(1)
1159 .m(4)
1160 .n(8)
1161 .k(4)
1162 .cn_stride(11)
1163 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1164 }
1165
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_eq_4_strided_a)1166 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
1167 GemmMicrokernelTester()
1168 .mr(4)
1169 .nr(8)
1170 .kr(1)
1171 .sr(1)
1172 .m(4)
1173 .n(8)
1174 .k(4)
1175 .a_stride(7)
1176 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1177 }
1178
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_eq_4_subtile)1179 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
1180 for (uint32_t n = 1; n <= 8; n++) {
1181 for (uint32_t m = 1; m <= 4; m++) {
1182 GemmMicrokernelTester()
1183 .mr(4)
1184 .nr(8)
1185 .kr(1)
1186 .sr(1)
1187 .m(m)
1188 .n(n)
1189 .k(4)
1190 .iterations(1)
1191 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1192 }
1193 }
1194 }
1195
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)1196 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
1197 for (uint32_t m = 1; m <= 4; m++) {
1198 GemmMicrokernelTester()
1199 .mr(4)
1200 .nr(8)
1201 .kr(1)
1202 .sr(1)
1203 .m(m)
1204 .n(8)
1205 .k(4)
1206 .iterations(1)
1207 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1208 }
1209 }
1210
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)1211 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
1212 for (uint32_t n = 1; n <= 8; n++) {
1213 GemmMicrokernelTester()
1214 .mr(4)
1215 .nr(8)
1216 .kr(1)
1217 .sr(1)
1218 .m(4)
1219 .n(n)
1220 .k(4)
1221 .iterations(1)
1222 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1223 }
1224 }
1225
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_lt_4)1226 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_lt_4) {
1227 for (size_t k = 1; k < 4; k++) {
1228 GemmMicrokernelTester()
1229 .mr(4)
1230 .nr(8)
1231 .kr(1)
1232 .sr(1)
1233 .m(4)
1234 .n(8)
1235 .k(k)
1236 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1237 }
1238 }
1239
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_lt_4_strided_a)1240 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
1241 for (size_t k = 1; k < 4; k++) {
1242 GemmMicrokernelTester()
1243 .mr(4)
1244 .nr(8)
1245 .kr(1)
1246 .sr(1)
1247 .m(4)
1248 .n(8)
1249 .k(k)
1250 .a_stride(7)
1251 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1252 }
1253 }
1254
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_lt_4_subtile)1255 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
1256 for (size_t k = 1; k < 4; k++) {
1257 for (uint32_t n = 1; n <= 8; n++) {
1258 for (uint32_t m = 1; m <= 4; m++) {
1259 GemmMicrokernelTester()
1260 .mr(4)
1261 .nr(8)
1262 .kr(1)
1263 .sr(1)
1264 .m(m)
1265 .n(n)
1266 .k(k)
1267 .iterations(1)
1268 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1269 }
1270 }
1271 }
1272 }
1273
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_gt_4)1274 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_gt_4) {
1275 for (size_t k = 5; k < 8; k++) {
1276 GemmMicrokernelTester()
1277 .mr(4)
1278 .nr(8)
1279 .kr(1)
1280 .sr(1)
1281 .m(4)
1282 .n(8)
1283 .k(k)
1284 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1285 }
1286 }
1287
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_gt_4_strided_a)1288 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
1289 for (size_t k = 5; k < 8; k++) {
1290 GemmMicrokernelTester()
1291 .mr(4)
1292 .nr(8)
1293 .kr(1)
1294 .sr(1)
1295 .m(4)
1296 .n(8)
1297 .k(k)
1298 .a_stride(11)
1299 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1300 }
1301 }
1302
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_gt_4_subtile)1303 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
1304 for (size_t k = 5; k < 8; k++) {
1305 for (uint32_t n = 1; n <= 8; n++) {
1306 for (uint32_t m = 1; m <= 4; m++) {
1307 GemmMicrokernelTester()
1308 .mr(4)
1309 .nr(8)
1310 .kr(1)
1311 .sr(1)
1312 .m(m)
1313 .n(n)
1314 .k(k)
1315 .iterations(1)
1316 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1317 }
1318 }
1319 }
1320 }
1321
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_div_4)1322 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_div_4) {
1323 for (size_t k = 8; k <= 40; k += 4) {
1324 GemmMicrokernelTester()
1325 .mr(4)
1326 .nr(8)
1327 .kr(1)
1328 .sr(1)
1329 .m(4)
1330 .n(8)
1331 .k(k)
1332 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1333 }
1334 }
1335
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_div_4_strided_a)1336 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
1337 for (size_t k = 8; k <= 40; k += 4) {
1338 GemmMicrokernelTester()
1339 .mr(4)
1340 .nr(8)
1341 .kr(1)
1342 .sr(1)
1343 .m(4)
1344 .n(8)
1345 .k(k)
1346 .a_stride(43)
1347 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1348 }
1349 }
1350
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,k_div_4_subtile)1351 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1352 for (size_t k = 8; k <= 40; k += 4) {
1353 for (uint32_t n = 1; n <= 8; n++) {
1354 for (uint32_t m = 1; m <= 4; m++) {
1355 GemmMicrokernelTester()
1356 .mr(4)
1357 .nr(8)
1358 .kr(1)
1359 .sr(1)
1360 .m(m)
1361 .n(n)
1362 .k(k)
1363 .iterations(1)
1364 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1365 }
1366 }
1367 }
1368 }
1369
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_gt_8)1370 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8) {
1371 for (uint32_t n = 9; n < 16; n++) {
1372 for (size_t k = 1; k <= 20; k += 5) {
1373 GemmMicrokernelTester()
1374 .mr(4)
1375 .nr(8)
1376 .kr(1)
1377 .sr(1)
1378 .m(4)
1379 .n(n)
1380 .k(k)
1381 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1382 }
1383 }
1384 }
1385
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1386 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1387 for (uint32_t n = 9; n < 16; n++) {
1388 for (size_t k = 1; k <= 20; k += 5) {
1389 GemmMicrokernelTester()
1390 .mr(4)
1391 .nr(8)
1392 .kr(1)
1393 .sr(1)
1394 .m(4)
1395 .n(n)
1396 .k(k)
1397 .cn_stride(11)
1398 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1399 }
1400 }
1401 }
1402
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_gt_8_strided_a)1403 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
1404 for (uint32_t n = 9; n < 16; n++) {
1405 for (size_t k = 1; k <= 20; k += 5) {
1406 GemmMicrokernelTester()
1407 .mr(4)
1408 .nr(8)
1409 .kr(1)
1410 .sr(1)
1411 .m(4)
1412 .n(n)
1413 .k(k)
1414 .a_stride(23)
1415 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1416 }
1417 }
1418 }
1419
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_gt_8_subtile)1420 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1421 for (uint32_t n = 9; n < 16; n++) {
1422 for (size_t k = 1; k <= 20; k += 5) {
1423 for (uint32_t m = 1; m <= 4; m++) {
1424 GemmMicrokernelTester()
1425 .mr(4)
1426 .nr(8)
1427 .kr(1)
1428 .sr(1)
1429 .m(m)
1430 .n(n)
1431 .k(k)
1432 .iterations(1)
1433 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1434 }
1435 }
1436 }
1437 }
1438
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_div_8)1439 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8) {
1440 for (uint32_t n = 16; n <= 24; n += 8) {
1441 for (size_t k = 1; k <= 20; k += 5) {
1442 GemmMicrokernelTester()
1443 .mr(4)
1444 .nr(8)
1445 .kr(1)
1446 .sr(1)
1447 .m(4)
1448 .n(n)
1449 .k(k)
1450 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1451 }
1452 }
1453 }
1454
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1455 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1456 for (uint32_t n = 16; n <= 24; n += 8) {
1457 for (size_t k = 1; k <= 20; k += 5) {
1458 GemmMicrokernelTester()
1459 .mr(4)
1460 .nr(8)
1461 .kr(1)
1462 .sr(1)
1463 .m(4)
1464 .n(n)
1465 .k(k)
1466 .cn_stride(11)
1467 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1468 }
1469 }
1470 }
1471
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_div_8_strided_a)1472 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
1473 for (uint32_t n = 16; n <= 24; n += 8) {
1474 for (size_t k = 1; k <= 20; k += 5) {
1475 GemmMicrokernelTester()
1476 .mr(4)
1477 .nr(8)
1478 .kr(1)
1479 .sr(1)
1480 .m(4)
1481 .n(n)
1482 .k(k)
1483 .a_stride(23)
1484 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1485 }
1486 }
1487 }
1488
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,n_div_8_subtile)1489 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1490 for (uint32_t n = 16; n <= 24; n += 8) {
1491 for (size_t k = 1; k <= 20; k += 5) {
1492 for (uint32_t m = 1; m <= 4; m++) {
1493 GemmMicrokernelTester()
1494 .mr(4)
1495 .nr(8)
1496 .kr(1)
1497 .sr(1)
1498 .m(m)
1499 .n(n)
1500 .k(k)
1501 .iterations(1)
1502 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1503 }
1504 }
1505 }
1506 }
1507
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,strided_cm_subtile)1508 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1509 for (size_t k = 1; k <= 20; k += 5) {
1510 for (uint32_t n = 1; n <= 8; n++) {
1511 for (uint32_t m = 1; m <= 4; m++) {
1512 GemmMicrokernelTester()
1513 .mr(4)
1514 .nr(8)
1515 .kr(1)
1516 .sr(1)
1517 .m(m)
1518 .n(n)
1519 .k(k)
1520 .cm_stride(11)
1521 .iterations(1)
1522 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1523 }
1524 }
1525 }
1526 }
1527
TEST(F32_GEMM_4X8__WASMSIMD_SPLAT,strided_cm)1528 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, strided_cm) {
1529 GemmMicrokernelTester()
1530 .mr(4)
1531 .nr(8)
1532 .kr(1)
1533 .sr(1)
1534 .m(4)
1535 .n(8)
1536 .k(4)
1537 .cm_stride(11)
1538 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
1539 }
1540 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1541
1542
1543 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_eq_4)1544 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4) {
1545 GemmMicrokernelTester()
1546 .mr(5)
1547 .nr(8)
1548 .kr(1)
1549 .sr(1)
1550 .m(5)
1551 .n(8)
1552 .k(4)
1553 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1554 }
1555
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,strided_cn)1556 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, strided_cn) {
1557 GemmMicrokernelTester()
1558 .mr(5)
1559 .nr(8)
1560 .kr(1)
1561 .sr(1)
1562 .m(5)
1563 .n(8)
1564 .k(4)
1565 .cn_stride(11)
1566 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1567 }
1568
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_eq_4_strided_a)1569 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
1570 GemmMicrokernelTester()
1571 .mr(5)
1572 .nr(8)
1573 .kr(1)
1574 .sr(1)
1575 .m(5)
1576 .n(8)
1577 .k(4)
1578 .a_stride(7)
1579 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1580 }
1581
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_eq_4_subtile)1582 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
1583 for (uint32_t n = 1; n <= 8; n++) {
1584 for (uint32_t m = 1; m <= 5; m++) {
1585 GemmMicrokernelTester()
1586 .mr(5)
1587 .nr(8)
1588 .kr(1)
1589 .sr(1)
1590 .m(m)
1591 .n(n)
1592 .k(4)
1593 .iterations(1)
1594 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1595 }
1596 }
1597 }
1598
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)1599 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
1600 for (uint32_t m = 1; m <= 5; m++) {
1601 GemmMicrokernelTester()
1602 .mr(5)
1603 .nr(8)
1604 .kr(1)
1605 .sr(1)
1606 .m(m)
1607 .n(8)
1608 .k(4)
1609 .iterations(1)
1610 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1611 }
1612 }
1613
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)1614 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
1615 for (uint32_t n = 1; n <= 8; n++) {
1616 GemmMicrokernelTester()
1617 .mr(5)
1618 .nr(8)
1619 .kr(1)
1620 .sr(1)
1621 .m(5)
1622 .n(n)
1623 .k(4)
1624 .iterations(1)
1625 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1626 }
1627 }
1628
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_lt_4)1629 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_lt_4) {
1630 for (size_t k = 1; k < 4; k++) {
1631 GemmMicrokernelTester()
1632 .mr(5)
1633 .nr(8)
1634 .kr(1)
1635 .sr(1)
1636 .m(5)
1637 .n(8)
1638 .k(k)
1639 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1640 }
1641 }
1642
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_lt_4_strided_a)1643 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
1644 for (size_t k = 1; k < 4; k++) {
1645 GemmMicrokernelTester()
1646 .mr(5)
1647 .nr(8)
1648 .kr(1)
1649 .sr(1)
1650 .m(5)
1651 .n(8)
1652 .k(k)
1653 .a_stride(7)
1654 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1655 }
1656 }
1657
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_lt_4_subtile)1658 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
1659 for (size_t k = 1; k < 4; k++) {
1660 for (uint32_t n = 1; n <= 8; n++) {
1661 for (uint32_t m = 1; m <= 5; m++) {
1662 GemmMicrokernelTester()
1663 .mr(5)
1664 .nr(8)
1665 .kr(1)
1666 .sr(1)
1667 .m(m)
1668 .n(n)
1669 .k(k)
1670 .iterations(1)
1671 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1672 }
1673 }
1674 }
1675 }
1676
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_gt_4)1677 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_gt_4) {
1678 for (size_t k = 5; k < 8; k++) {
1679 GemmMicrokernelTester()
1680 .mr(5)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(5)
1685 .n(8)
1686 .k(k)
1687 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1688 }
1689 }
1690
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_gt_4_strided_a)1691 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
1692 for (size_t k = 5; k < 8; k++) {
1693 GemmMicrokernelTester()
1694 .mr(5)
1695 .nr(8)
1696 .kr(1)
1697 .sr(1)
1698 .m(5)
1699 .n(8)
1700 .k(k)
1701 .a_stride(11)
1702 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1703 }
1704 }
1705
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_gt_4_subtile)1706 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
1707 for (size_t k = 5; k < 8; k++) {
1708 for (uint32_t n = 1; n <= 8; n++) {
1709 for (uint32_t m = 1; m <= 5; m++) {
1710 GemmMicrokernelTester()
1711 .mr(5)
1712 .nr(8)
1713 .kr(1)
1714 .sr(1)
1715 .m(m)
1716 .n(n)
1717 .k(k)
1718 .iterations(1)
1719 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1720 }
1721 }
1722 }
1723 }
1724
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_div_4)1725 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_div_4) {
1726 for (size_t k = 8; k <= 40; k += 4) {
1727 GemmMicrokernelTester()
1728 .mr(5)
1729 .nr(8)
1730 .kr(1)
1731 .sr(1)
1732 .m(5)
1733 .n(8)
1734 .k(k)
1735 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1736 }
1737 }
1738
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_div_4_strided_a)1739 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
1740 for (size_t k = 8; k <= 40; k += 4) {
1741 GemmMicrokernelTester()
1742 .mr(5)
1743 .nr(8)
1744 .kr(1)
1745 .sr(1)
1746 .m(5)
1747 .n(8)
1748 .k(k)
1749 .a_stride(43)
1750 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1751 }
1752 }
1753
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,k_div_4_subtile)1754 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1755 for (size_t k = 8; k <= 40; k += 4) {
1756 for (uint32_t n = 1; n <= 8; n++) {
1757 for (uint32_t m = 1; m <= 5; m++) {
1758 GemmMicrokernelTester()
1759 .mr(5)
1760 .nr(8)
1761 .kr(1)
1762 .sr(1)
1763 .m(m)
1764 .n(n)
1765 .k(k)
1766 .iterations(1)
1767 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1768 }
1769 }
1770 }
1771 }
1772
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_gt_8)1773 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8) {
1774 for (uint32_t n = 9; n < 16; n++) {
1775 for (size_t k = 1; k <= 20; k += 5) {
1776 GemmMicrokernelTester()
1777 .mr(5)
1778 .nr(8)
1779 .kr(1)
1780 .sr(1)
1781 .m(5)
1782 .n(n)
1783 .k(k)
1784 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1785 }
1786 }
1787 }
1788
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1789 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1790 for (uint32_t n = 9; n < 16; n++) {
1791 for (size_t k = 1; k <= 20; k += 5) {
1792 GemmMicrokernelTester()
1793 .mr(5)
1794 .nr(8)
1795 .kr(1)
1796 .sr(1)
1797 .m(5)
1798 .n(n)
1799 .k(k)
1800 .cn_stride(11)
1801 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1802 }
1803 }
1804 }
1805
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_gt_8_strided_a)1806 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
1807 for (uint32_t n = 9; n < 16; n++) {
1808 for (size_t k = 1; k <= 20; k += 5) {
1809 GemmMicrokernelTester()
1810 .mr(5)
1811 .nr(8)
1812 .kr(1)
1813 .sr(1)
1814 .m(5)
1815 .n(n)
1816 .k(k)
1817 .a_stride(23)
1818 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1819 }
1820 }
1821 }
1822
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_gt_8_subtile)1823 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1824 for (uint32_t n = 9; n < 16; n++) {
1825 for (size_t k = 1; k <= 20; k += 5) {
1826 for (uint32_t m = 1; m <= 5; m++) {
1827 GemmMicrokernelTester()
1828 .mr(5)
1829 .nr(8)
1830 .kr(1)
1831 .sr(1)
1832 .m(m)
1833 .n(n)
1834 .k(k)
1835 .iterations(1)
1836 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1837 }
1838 }
1839 }
1840 }
1841
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_div_8)1842 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8) {
1843 for (uint32_t n = 16; n <= 24; n += 8) {
1844 for (size_t k = 1; k <= 20; k += 5) {
1845 GemmMicrokernelTester()
1846 .mr(5)
1847 .nr(8)
1848 .kr(1)
1849 .sr(1)
1850 .m(5)
1851 .n(n)
1852 .k(k)
1853 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1854 }
1855 }
1856 }
1857
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1858 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1859 for (uint32_t n = 16; n <= 24; n += 8) {
1860 for (size_t k = 1; k <= 20; k += 5) {
1861 GemmMicrokernelTester()
1862 .mr(5)
1863 .nr(8)
1864 .kr(1)
1865 .sr(1)
1866 .m(5)
1867 .n(n)
1868 .k(k)
1869 .cn_stride(11)
1870 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1871 }
1872 }
1873 }
1874
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_div_8_strided_a)1875 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
1876 for (uint32_t n = 16; n <= 24; n += 8) {
1877 for (size_t k = 1; k <= 20; k += 5) {
1878 GemmMicrokernelTester()
1879 .mr(5)
1880 .nr(8)
1881 .kr(1)
1882 .sr(1)
1883 .m(5)
1884 .n(n)
1885 .k(k)
1886 .a_stride(23)
1887 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1888 }
1889 }
1890 }
1891
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,n_div_8_subtile)1892 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1893 for (uint32_t n = 16; n <= 24; n += 8) {
1894 for (size_t k = 1; k <= 20; k += 5) {
1895 for (uint32_t m = 1; m <= 5; m++) {
1896 GemmMicrokernelTester()
1897 .mr(5)
1898 .nr(8)
1899 .kr(1)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(k)
1904 .iterations(1)
1905 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1906 }
1907 }
1908 }
1909 }
1910
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,strided_cm_subtile)1911 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1912 for (size_t k = 1; k <= 20; k += 5) {
1913 for (uint32_t n = 1; n <= 8; n++) {
1914 for (uint32_t m = 1; m <= 5; m++) {
1915 GemmMicrokernelTester()
1916 .mr(5)
1917 .nr(8)
1918 .kr(1)
1919 .sr(1)
1920 .m(m)
1921 .n(n)
1922 .k(k)
1923 .cm_stride(11)
1924 .iterations(1)
1925 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1926 }
1927 }
1928 }
1929 }
1930
TEST(F32_GEMM_5X8__WASMSIMD_SPLAT,strided_cm)1931 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, strided_cm) {
1932 GemmMicrokernelTester()
1933 .mr(5)
1934 .nr(8)
1935 .kr(1)
1936 .sr(1)
1937 .m(5)
1938 .n(8)
1939 .k(4)
1940 .cm_stride(11)
1941 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1942 }
1943 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1944
1945
1946 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1)1947 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1) {
1948 GemmMicrokernelTester()
1949 .mr(6)
1950 .nr(8)
1951 .kr(1)
1952 .sr(1)
1953 .m(6)
1954 .n(8)
1955 .k(1)
1956 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
1957 }
1958
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,strided_cn)1959 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, strided_cn) {
1960 GemmMicrokernelTester()
1961 .mr(6)
1962 .nr(8)
1963 .kr(1)
1964 .sr(1)
1965 .m(6)
1966 .n(8)
1967 .k(1)
1968 .cn_stride(11)
1969 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
1970 }
1971
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)1972 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
1973 GemmMicrokernelTester()
1974 .mr(6)
1975 .nr(8)
1976 .kr(1)
1977 .sr(1)
1978 .m(6)
1979 .n(8)
1980 .k(1)
1981 .a_stride(3)
1982 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
1983 }
1984
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)1985 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
1986 for (uint32_t n = 1; n <= 8; n++) {
1987 for (uint32_t m = 1; m <= 6; m++) {
1988 GemmMicrokernelTester()
1989 .mr(6)
1990 .nr(8)
1991 .kr(1)
1992 .sr(1)
1993 .m(m)
1994 .n(n)
1995 .k(1)
1996 .iterations(1)
1997 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
1998 }
1999 }
2000 }
2001
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2002 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2003 for (uint32_t m = 1; m <= 6; m++) {
2004 GemmMicrokernelTester()
2005 .mr(6)
2006 .nr(8)
2007 .kr(1)
2008 .sr(1)
2009 .m(m)
2010 .n(8)
2011 .k(1)
2012 .iterations(1)
2013 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2014 }
2015 }
2016
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2017 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2018 for (uint32_t n = 1; n <= 8; n++) {
2019 GemmMicrokernelTester()
2020 .mr(6)
2021 .nr(8)
2022 .kr(1)
2023 .sr(1)
2024 .m(6)
2025 .n(n)
2026 .k(1)
2027 .iterations(1)
2028 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2029 }
2030 }
2031
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_gt_1)2032 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2033 for (size_t k = 2; k < 10; k++) {
2034 GemmMicrokernelTester()
2035 .mr(6)
2036 .nr(8)
2037 .kr(1)
2038 .sr(1)
2039 .m(6)
2040 .n(8)
2041 .k(k)
2042 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2043 }
2044 }
2045
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)2046 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
2047 for (size_t k = 2; k < 10; k++) {
2048 GemmMicrokernelTester()
2049 .mr(6)
2050 .nr(8)
2051 .kr(1)
2052 .sr(1)
2053 .m(6)
2054 .n(8)
2055 .k(k)
2056 .a_stride(11)
2057 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2058 }
2059 }
2060
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2061 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2062 for (size_t k = 2; k < 10; k++) {
2063 for (uint32_t n = 1; n <= 8; n++) {
2064 for (uint32_t m = 1; m <= 6; m++) {
2065 GemmMicrokernelTester()
2066 .mr(6)
2067 .nr(8)
2068 .kr(1)
2069 .sr(1)
2070 .m(m)
2071 .n(n)
2072 .k(k)
2073 .iterations(1)
2074 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2075 }
2076 }
2077 }
2078 }
2079
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8)2080 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2081 for (uint32_t n = 9; n < 16; n++) {
2082 for (size_t k = 1; k <= 5; k += 2) {
2083 GemmMicrokernelTester()
2084 .mr(6)
2085 .nr(8)
2086 .kr(1)
2087 .sr(1)
2088 .m(6)
2089 .n(n)
2090 .k(k)
2091 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2092 }
2093 }
2094 }
2095
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2096 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2097 for (uint32_t n = 9; n < 16; n++) {
2098 for (size_t k = 1; k <= 5; k += 2) {
2099 GemmMicrokernelTester()
2100 .mr(6)
2101 .nr(8)
2102 .kr(1)
2103 .sr(1)
2104 .m(6)
2105 .n(n)
2106 .k(k)
2107 .cn_stride(11)
2108 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2109 }
2110 }
2111 }
2112
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)2113 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
2114 for (uint32_t n = 9; n < 16; n++) {
2115 for (size_t k = 1; k <= 5; k += 2) {
2116 GemmMicrokernelTester()
2117 .mr(6)
2118 .nr(8)
2119 .kr(1)
2120 .sr(1)
2121 .m(6)
2122 .n(n)
2123 .k(k)
2124 .a_stride(7)
2125 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2126 }
2127 }
2128 }
2129
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2130 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2131 for (uint32_t n = 9; n < 16; n++) {
2132 for (size_t k = 1; k <= 5; k += 2) {
2133 for (uint32_t m = 1; m <= 6; m++) {
2134 GemmMicrokernelTester()
2135 .mr(6)
2136 .nr(8)
2137 .kr(1)
2138 .sr(1)
2139 .m(m)
2140 .n(n)
2141 .k(k)
2142 .iterations(1)
2143 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2144 }
2145 }
2146 }
2147 }
2148
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8)2149 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8) {
2150 for (uint32_t n = 16; n <= 24; n += 8) {
2151 for (size_t k = 1; k <= 5; k += 2) {
2152 GemmMicrokernelTester()
2153 .mr(6)
2154 .nr(8)
2155 .kr(1)
2156 .sr(1)
2157 .m(6)
2158 .n(n)
2159 .k(k)
2160 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2161 }
2162 }
2163 }
2164
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2165 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2166 for (uint32_t n = 16; n <= 24; n += 8) {
2167 for (size_t k = 1; k <= 5; k += 2) {
2168 GemmMicrokernelTester()
2169 .mr(6)
2170 .nr(8)
2171 .kr(1)
2172 .sr(1)
2173 .m(6)
2174 .n(n)
2175 .k(k)
2176 .cn_stride(11)
2177 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2178 }
2179 }
2180 }
2181
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)2182 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
2183 for (uint32_t n = 16; n <= 24; n += 8) {
2184 for (size_t k = 1; k <= 5; k += 2) {
2185 GemmMicrokernelTester()
2186 .mr(6)
2187 .nr(8)
2188 .kr(1)
2189 .sr(1)
2190 .m(6)
2191 .n(n)
2192 .k(k)
2193 .a_stride(7)
2194 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2195 }
2196 }
2197 }
2198
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2199 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2200 for (uint32_t n = 16; n <= 24; n += 8) {
2201 for (size_t k = 1; k <= 5; k += 2) {
2202 for (uint32_t m = 1; m <= 6; m++) {
2203 GemmMicrokernelTester()
2204 .mr(6)
2205 .nr(8)
2206 .kr(1)
2207 .sr(1)
2208 .m(m)
2209 .n(n)
2210 .k(k)
2211 .iterations(1)
2212 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2213 }
2214 }
2215 }
2216 }
2217
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2218 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2219 for (size_t k = 1; k <= 5; k += 2) {
2220 for (uint32_t n = 1; n <= 8; n++) {
2221 for (uint32_t m = 1; m <= 6; m++) {
2222 GemmMicrokernelTester()
2223 .mr(6)
2224 .nr(8)
2225 .kr(1)
2226 .sr(1)
2227 .m(m)
2228 .n(n)
2229 .k(k)
2230 .cm_stride(11)
2231 .iterations(1)
2232 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2233 }
2234 }
2235 }
2236 }
2237
TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT,strided_cm)2238 TEST(F32_GEMM_6X8__WASMSIMD_LOADSPLAT, strided_cm) {
2239 GemmMicrokernelTester()
2240 .mr(6)
2241 .nr(8)
2242 .kr(1)
2243 .sr(1)
2244 .m(6)
2245 .n(8)
2246 .k(1)
2247 .cm_stride(11)
2248 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat);
2249 }
2250 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2251
2252
2253 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_6X8S4__WASMSIMD,k_eq_4)2254 TEST(F32_GEMM_6X8S4__WASMSIMD, k_eq_4) {
2255 GemmMicrokernelTester()
2256 .mr(6)
2257 .nr(8)
2258 .kr(1)
2259 .sr(4)
2260 .m(6)
2261 .n(8)
2262 .k(4)
2263 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2264 }
2265
TEST(F32_GEMM_6X8S4__WASMSIMD,strided_cn)2266 TEST(F32_GEMM_6X8S4__WASMSIMD, strided_cn) {
2267 GemmMicrokernelTester()
2268 .mr(6)
2269 .nr(8)
2270 .kr(1)
2271 .sr(4)
2272 .m(6)
2273 .n(8)
2274 .k(4)
2275 .cn_stride(11)
2276 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2277 }
2278
TEST(F32_GEMM_6X8S4__WASMSIMD,k_eq_4_strided_a)2279 TEST(F32_GEMM_6X8S4__WASMSIMD, k_eq_4_strided_a) {
2280 GemmMicrokernelTester()
2281 .mr(6)
2282 .nr(8)
2283 .kr(1)
2284 .sr(4)
2285 .m(6)
2286 .n(8)
2287 .k(4)
2288 .a_stride(7)
2289 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2290 }
2291
TEST(F32_GEMM_6X8S4__WASMSIMD,k_eq_4_subtile)2292 TEST(F32_GEMM_6X8S4__WASMSIMD, k_eq_4_subtile) {
2293 for (uint32_t n = 1; n <= 8; n++) {
2294 for (uint32_t m = 1; m <= 6; m++) {
2295 GemmMicrokernelTester()
2296 .mr(6)
2297 .nr(8)
2298 .kr(1)
2299 .sr(4)
2300 .m(m)
2301 .n(n)
2302 .k(4)
2303 .iterations(1)
2304 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2305 }
2306 }
2307 }
2308
TEST(F32_GEMM_6X8S4__WASMSIMD,k_eq_4_subtile_m)2309 TEST(F32_GEMM_6X8S4__WASMSIMD, k_eq_4_subtile_m) {
2310 for (uint32_t m = 1; m <= 6; m++) {
2311 GemmMicrokernelTester()
2312 .mr(6)
2313 .nr(8)
2314 .kr(1)
2315 .sr(4)
2316 .m(m)
2317 .n(8)
2318 .k(4)
2319 .iterations(1)
2320 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2321 }
2322 }
2323
TEST(F32_GEMM_6X8S4__WASMSIMD,k_eq_4_subtile_n)2324 TEST(F32_GEMM_6X8S4__WASMSIMD, k_eq_4_subtile_n) {
2325 for (uint32_t n = 1; n <= 8; n++) {
2326 GemmMicrokernelTester()
2327 .mr(6)
2328 .nr(8)
2329 .kr(1)
2330 .sr(4)
2331 .m(6)
2332 .n(n)
2333 .k(4)
2334 .iterations(1)
2335 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2336 }
2337 }
2338
TEST(F32_GEMM_6X8S4__WASMSIMD,k_lt_4)2339 TEST(F32_GEMM_6X8S4__WASMSIMD, k_lt_4) {
2340 for (size_t k = 1; k < 4; k++) {
2341 GemmMicrokernelTester()
2342 .mr(6)
2343 .nr(8)
2344 .kr(1)
2345 .sr(4)
2346 .m(6)
2347 .n(8)
2348 .k(k)
2349 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2350 }
2351 }
2352
TEST(F32_GEMM_6X8S4__WASMSIMD,k_lt_4_strided_a)2353 TEST(F32_GEMM_6X8S4__WASMSIMD, k_lt_4_strided_a) {
2354 for (size_t k = 1; k < 4; k++) {
2355 GemmMicrokernelTester()
2356 .mr(6)
2357 .nr(8)
2358 .kr(1)
2359 .sr(4)
2360 .m(6)
2361 .n(8)
2362 .k(k)
2363 .a_stride(7)
2364 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2365 }
2366 }
2367
TEST(F32_GEMM_6X8S4__WASMSIMD,k_lt_4_subtile)2368 TEST(F32_GEMM_6X8S4__WASMSIMD, k_lt_4_subtile) {
2369 for (size_t k = 1; k < 4; k++) {
2370 for (uint32_t n = 1; n <= 8; n++) {
2371 for (uint32_t m = 1; m <= 6; m++) {
2372 GemmMicrokernelTester()
2373 .mr(6)
2374 .nr(8)
2375 .kr(1)
2376 .sr(4)
2377 .m(m)
2378 .n(n)
2379 .k(k)
2380 .iterations(1)
2381 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2382 }
2383 }
2384 }
2385 }
2386
TEST(F32_GEMM_6X8S4__WASMSIMD,k_gt_4)2387 TEST(F32_GEMM_6X8S4__WASMSIMD, k_gt_4) {
2388 for (size_t k = 5; k < 8; k++) {
2389 GemmMicrokernelTester()
2390 .mr(6)
2391 .nr(8)
2392 .kr(1)
2393 .sr(4)
2394 .m(6)
2395 .n(8)
2396 .k(k)
2397 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2398 }
2399 }
2400
TEST(F32_GEMM_6X8S4__WASMSIMD,k_gt_4_strided_a)2401 TEST(F32_GEMM_6X8S4__WASMSIMD, k_gt_4_strided_a) {
2402 for (size_t k = 5; k < 8; k++) {
2403 GemmMicrokernelTester()
2404 .mr(6)
2405 .nr(8)
2406 .kr(1)
2407 .sr(4)
2408 .m(6)
2409 .n(8)
2410 .k(k)
2411 .a_stride(11)
2412 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2413 }
2414 }
2415
TEST(F32_GEMM_6X8S4__WASMSIMD,k_gt_4_subtile)2416 TEST(F32_GEMM_6X8S4__WASMSIMD, k_gt_4_subtile) {
2417 for (size_t k = 5; k < 8; k++) {
2418 for (uint32_t n = 1; n <= 8; n++) {
2419 for (uint32_t m = 1; m <= 6; m++) {
2420 GemmMicrokernelTester()
2421 .mr(6)
2422 .nr(8)
2423 .kr(1)
2424 .sr(4)
2425 .m(m)
2426 .n(n)
2427 .k(k)
2428 .iterations(1)
2429 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2430 }
2431 }
2432 }
2433 }
2434
TEST(F32_GEMM_6X8S4__WASMSIMD,k_div_4)2435 TEST(F32_GEMM_6X8S4__WASMSIMD, k_div_4) {
2436 for (size_t k = 8; k <= 40; k += 4) {
2437 GemmMicrokernelTester()
2438 .mr(6)
2439 .nr(8)
2440 .kr(1)
2441 .sr(4)
2442 .m(6)
2443 .n(8)
2444 .k(k)
2445 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2446 }
2447 }
2448
TEST(F32_GEMM_6X8S4__WASMSIMD,k_div_4_strided_a)2449 TEST(F32_GEMM_6X8S4__WASMSIMD, k_div_4_strided_a) {
2450 for (size_t k = 8; k <= 40; k += 4) {
2451 GemmMicrokernelTester()
2452 .mr(6)
2453 .nr(8)
2454 .kr(1)
2455 .sr(4)
2456 .m(6)
2457 .n(8)
2458 .k(k)
2459 .a_stride(43)
2460 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2461 }
2462 }
2463
TEST(F32_GEMM_6X8S4__WASMSIMD,k_div_4_subtile)2464 TEST(F32_GEMM_6X8S4__WASMSIMD, k_div_4_subtile) {
2465 for (size_t k = 8; k <= 40; k += 4) {
2466 for (uint32_t n = 1; n <= 8; n++) {
2467 for (uint32_t m = 1; m <= 6; m++) {
2468 GemmMicrokernelTester()
2469 .mr(6)
2470 .nr(8)
2471 .kr(1)
2472 .sr(4)
2473 .m(m)
2474 .n(n)
2475 .k(k)
2476 .iterations(1)
2477 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2478 }
2479 }
2480 }
2481 }
2482
TEST(F32_GEMM_6X8S4__WASMSIMD,n_gt_8)2483 TEST(F32_GEMM_6X8S4__WASMSIMD, n_gt_8) {
2484 for (uint32_t n = 9; n < 16; n++) {
2485 for (size_t k = 1; k <= 20; k += 5) {
2486 GemmMicrokernelTester()
2487 .mr(6)
2488 .nr(8)
2489 .kr(1)
2490 .sr(4)
2491 .m(6)
2492 .n(n)
2493 .k(k)
2494 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2495 }
2496 }
2497 }
2498
TEST(F32_GEMM_6X8S4__WASMSIMD,n_gt_8_strided_cn)2499 TEST(F32_GEMM_6X8S4__WASMSIMD, n_gt_8_strided_cn) {
2500 for (uint32_t n = 9; n < 16; n++) {
2501 for (size_t k = 1; k <= 20; k += 5) {
2502 GemmMicrokernelTester()
2503 .mr(6)
2504 .nr(8)
2505 .kr(1)
2506 .sr(4)
2507 .m(6)
2508 .n(n)
2509 .k(k)
2510 .cn_stride(11)
2511 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2512 }
2513 }
2514 }
2515
TEST(F32_GEMM_6X8S4__WASMSIMD,n_gt_8_strided_a)2516 TEST(F32_GEMM_6X8S4__WASMSIMD, n_gt_8_strided_a) {
2517 for (uint32_t n = 9; n < 16; n++) {
2518 for (size_t k = 1; k <= 20; k += 5) {
2519 GemmMicrokernelTester()
2520 .mr(6)
2521 .nr(8)
2522 .kr(1)
2523 .sr(4)
2524 .m(6)
2525 .n(n)
2526 .k(k)
2527 .a_stride(23)
2528 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2529 }
2530 }
2531 }
2532
TEST(F32_GEMM_6X8S4__WASMSIMD,n_gt_8_subtile)2533 TEST(F32_GEMM_6X8S4__WASMSIMD, n_gt_8_subtile) {
2534 for (uint32_t n = 9; n < 16; n++) {
2535 for (size_t k = 1; k <= 20; k += 5) {
2536 for (uint32_t m = 1; m <= 6; m++) {
2537 GemmMicrokernelTester()
2538 .mr(6)
2539 .nr(8)
2540 .kr(1)
2541 .sr(4)
2542 .m(m)
2543 .n(n)
2544 .k(k)
2545 .iterations(1)
2546 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2547 }
2548 }
2549 }
2550 }
2551
TEST(F32_GEMM_6X8S4__WASMSIMD,n_div_8)2552 TEST(F32_GEMM_6X8S4__WASMSIMD, n_div_8) {
2553 for (uint32_t n = 16; n <= 24; n += 8) {
2554 for (size_t k = 1; k <= 20; k += 5) {
2555 GemmMicrokernelTester()
2556 .mr(6)
2557 .nr(8)
2558 .kr(1)
2559 .sr(4)
2560 .m(6)
2561 .n(n)
2562 .k(k)
2563 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2564 }
2565 }
2566 }
2567
TEST(F32_GEMM_6X8S4__WASMSIMD,n_div_8_strided_cn)2568 TEST(F32_GEMM_6X8S4__WASMSIMD, n_div_8_strided_cn) {
2569 for (uint32_t n = 16; n <= 24; n += 8) {
2570 for (size_t k = 1; k <= 20; k += 5) {
2571 GemmMicrokernelTester()
2572 .mr(6)
2573 .nr(8)
2574 .kr(1)
2575 .sr(4)
2576 .m(6)
2577 .n(n)
2578 .k(k)
2579 .cn_stride(11)
2580 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2581 }
2582 }
2583 }
2584
TEST(F32_GEMM_6X8S4__WASMSIMD,n_div_8_strided_a)2585 TEST(F32_GEMM_6X8S4__WASMSIMD, n_div_8_strided_a) {
2586 for (uint32_t n = 16; n <= 24; n += 8) {
2587 for (size_t k = 1; k <= 20; k += 5) {
2588 GemmMicrokernelTester()
2589 .mr(6)
2590 .nr(8)
2591 .kr(1)
2592 .sr(4)
2593 .m(6)
2594 .n(n)
2595 .k(k)
2596 .a_stride(23)
2597 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2598 }
2599 }
2600 }
2601
TEST(F32_GEMM_6X8S4__WASMSIMD,n_div_8_subtile)2602 TEST(F32_GEMM_6X8S4__WASMSIMD, n_div_8_subtile) {
2603 for (uint32_t n = 16; n <= 24; n += 8) {
2604 for (size_t k = 1; k <= 20; k += 5) {
2605 for (uint32_t m = 1; m <= 6; m++) {
2606 GemmMicrokernelTester()
2607 .mr(6)
2608 .nr(8)
2609 .kr(1)
2610 .sr(4)
2611 .m(m)
2612 .n(n)
2613 .k(k)
2614 .iterations(1)
2615 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2616 }
2617 }
2618 }
2619 }
2620
TEST(F32_GEMM_6X8S4__WASMSIMD,strided_cm_subtile)2621 TEST(F32_GEMM_6X8S4__WASMSIMD, strided_cm_subtile) {
2622 for (size_t k = 1; k <= 20; k += 5) {
2623 for (uint32_t n = 1; n <= 8; n++) {
2624 for (uint32_t m = 1; m <= 6; m++) {
2625 GemmMicrokernelTester()
2626 .mr(6)
2627 .nr(8)
2628 .kr(1)
2629 .sr(4)
2630 .m(m)
2631 .n(n)
2632 .k(k)
2633 .cm_stride(11)
2634 .iterations(1)
2635 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2636 }
2637 }
2638 }
2639 }
2640
TEST(F32_GEMM_6X8S4__WASMSIMD,strided_cm)2641 TEST(F32_GEMM_6X8S4__WASMSIMD, strided_cm) {
2642 GemmMicrokernelTester()
2643 .mr(6)
2644 .nr(8)
2645 .kr(1)
2646 .sr(4)
2647 .m(6)
2648 .n(8)
2649 .k(4)
2650 .cm_stride(11)
2651 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmsimd);
2652 }
2653 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2654
2655
2656 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)2657 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
2658 GemmMicrokernelTester()
2659 .mr(1)
2660 .nr(8)
2661 .kr(1)
2662 .sr(1)
2663 .m(1)
2664 .n(8)
2665 .k(4)
2666 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2667 }
2668
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)2669 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
2670 GemmMicrokernelTester()
2671 .mr(1)
2672 .nr(8)
2673 .kr(1)
2674 .sr(1)
2675 .m(1)
2676 .n(8)
2677 .k(4)
2678 .cn_stride(11)
2679 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2680 }
2681
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)2682 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
2683 GemmMicrokernelTester()
2684 .mr(1)
2685 .nr(8)
2686 .kr(1)
2687 .sr(1)
2688 .m(1)
2689 .n(8)
2690 .k(4)
2691 .a_stride(7)
2692 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2693 }
2694
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)2695 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
2696 for (uint32_t n = 1; n <= 8; n++) {
2697 for (uint32_t m = 1; m <= 1; m++) {
2698 GemmMicrokernelTester()
2699 .mr(1)
2700 .nr(8)
2701 .kr(1)
2702 .sr(1)
2703 .m(m)
2704 .n(n)
2705 .k(4)
2706 .iterations(1)
2707 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2708 }
2709 }
2710 }
2711
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)2712 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
2713 for (uint32_t m = 1; m <= 1; m++) {
2714 GemmMicrokernelTester()
2715 .mr(1)
2716 .nr(8)
2717 .kr(1)
2718 .sr(1)
2719 .m(m)
2720 .n(8)
2721 .k(4)
2722 .iterations(1)
2723 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2724 }
2725 }
2726
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)2727 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
2728 for (uint32_t n = 1; n <= 8; n++) {
2729 GemmMicrokernelTester()
2730 .mr(1)
2731 .nr(8)
2732 .kr(1)
2733 .sr(1)
2734 .m(1)
2735 .n(n)
2736 .k(4)
2737 .iterations(1)
2738 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2739 }
2740 }
2741
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)2742 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
2743 for (size_t k = 1; k < 4; k++) {
2744 GemmMicrokernelTester()
2745 .mr(1)
2746 .nr(8)
2747 .kr(1)
2748 .sr(1)
2749 .m(1)
2750 .n(8)
2751 .k(k)
2752 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2753 }
2754 }
2755
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)2756 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
2757 for (size_t k = 1; k < 4; k++) {
2758 GemmMicrokernelTester()
2759 .mr(1)
2760 .nr(8)
2761 .kr(1)
2762 .sr(1)
2763 .m(1)
2764 .n(8)
2765 .k(k)
2766 .a_stride(7)
2767 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2768 }
2769 }
2770
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)2771 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
2772 for (size_t k = 1; k < 4; k++) {
2773 for (uint32_t n = 1; n <= 8; n++) {
2774 for (uint32_t m = 1; m <= 1; m++) {
2775 GemmMicrokernelTester()
2776 .mr(1)
2777 .nr(8)
2778 .kr(1)
2779 .sr(1)
2780 .m(m)
2781 .n(n)
2782 .k(k)
2783 .iterations(1)
2784 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2785 }
2786 }
2787 }
2788 }
2789
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)2790 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
2791 for (size_t k = 5; k < 8; k++) {
2792 GemmMicrokernelTester()
2793 .mr(1)
2794 .nr(8)
2795 .kr(1)
2796 .sr(1)
2797 .m(1)
2798 .n(8)
2799 .k(k)
2800 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2801 }
2802 }
2803
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)2804 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
2805 for (size_t k = 5; k < 8; k++) {
2806 GemmMicrokernelTester()
2807 .mr(1)
2808 .nr(8)
2809 .kr(1)
2810 .sr(1)
2811 .m(1)
2812 .n(8)
2813 .k(k)
2814 .a_stride(11)
2815 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2816 }
2817 }
2818
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)2819 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
2820 for (size_t k = 5; k < 8; k++) {
2821 for (uint32_t n = 1; n <= 8; n++) {
2822 for (uint32_t m = 1; m <= 1; m++) {
2823 GemmMicrokernelTester()
2824 .mr(1)
2825 .nr(8)
2826 .kr(1)
2827 .sr(1)
2828 .m(m)
2829 .n(n)
2830 .k(k)
2831 .iterations(1)
2832 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2833 }
2834 }
2835 }
2836 }
2837
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)2838 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
2839 for (size_t k = 8; k <= 40; k += 4) {
2840 GemmMicrokernelTester()
2841 .mr(1)
2842 .nr(8)
2843 .kr(1)
2844 .sr(1)
2845 .m(1)
2846 .n(8)
2847 .k(k)
2848 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2849 }
2850 }
2851
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)2852 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
2853 for (size_t k = 8; k <= 40; k += 4) {
2854 GemmMicrokernelTester()
2855 .mr(1)
2856 .nr(8)
2857 .kr(1)
2858 .sr(1)
2859 .m(1)
2860 .n(8)
2861 .k(k)
2862 .a_stride(43)
2863 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2864 }
2865 }
2866
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)2867 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
2868 for (size_t k = 8; k <= 40; k += 4) {
2869 for (uint32_t n = 1; n <= 8; n++) {
2870 for (uint32_t m = 1; m <= 1; m++) {
2871 GemmMicrokernelTester()
2872 .mr(1)
2873 .nr(8)
2874 .kr(1)
2875 .sr(1)
2876 .m(m)
2877 .n(n)
2878 .k(k)
2879 .iterations(1)
2880 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2881 }
2882 }
2883 }
2884 }
2885
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)2886 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
2887 for (uint32_t n = 9; n < 16; n++) {
2888 for (size_t k = 1; k <= 20; k += 5) {
2889 GemmMicrokernelTester()
2890 .mr(1)
2891 .nr(8)
2892 .kr(1)
2893 .sr(1)
2894 .m(1)
2895 .n(n)
2896 .k(k)
2897 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2898 }
2899 }
2900 }
2901
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)2902 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
2903 for (uint32_t n = 9; n < 16; n++) {
2904 for (size_t k = 1; k <= 20; k += 5) {
2905 GemmMicrokernelTester()
2906 .mr(1)
2907 .nr(8)
2908 .kr(1)
2909 .sr(1)
2910 .m(1)
2911 .n(n)
2912 .k(k)
2913 .cn_stride(11)
2914 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2915 }
2916 }
2917 }
2918
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)2919 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
2920 for (uint32_t n = 9; n < 16; n++) {
2921 for (size_t k = 1; k <= 20; k += 5) {
2922 GemmMicrokernelTester()
2923 .mr(1)
2924 .nr(8)
2925 .kr(1)
2926 .sr(1)
2927 .m(1)
2928 .n(n)
2929 .k(k)
2930 .a_stride(23)
2931 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2932 }
2933 }
2934 }
2935
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)2936 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
2937 for (uint32_t n = 9; n < 16; n++) {
2938 for (size_t k = 1; k <= 20; k += 5) {
2939 for (uint32_t m = 1; m <= 1; m++) {
2940 GemmMicrokernelTester()
2941 .mr(1)
2942 .nr(8)
2943 .kr(1)
2944 .sr(1)
2945 .m(m)
2946 .n(n)
2947 .k(k)
2948 .iterations(1)
2949 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2950 }
2951 }
2952 }
2953 }
2954
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)2955 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
2956 for (uint32_t n = 16; n <= 24; n += 8) {
2957 for (size_t k = 1; k <= 20; k += 5) {
2958 GemmMicrokernelTester()
2959 .mr(1)
2960 .nr(8)
2961 .kr(1)
2962 .sr(1)
2963 .m(1)
2964 .n(n)
2965 .k(k)
2966 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2967 }
2968 }
2969 }
2970
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)2971 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
2972 for (uint32_t n = 16; n <= 24; n += 8) {
2973 for (size_t k = 1; k <= 20; k += 5) {
2974 GemmMicrokernelTester()
2975 .mr(1)
2976 .nr(8)
2977 .kr(1)
2978 .sr(1)
2979 .m(1)
2980 .n(n)
2981 .k(k)
2982 .cn_stride(11)
2983 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2984 }
2985 }
2986 }
2987
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)2988 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
2989 for (uint32_t n = 16; n <= 24; n += 8) {
2990 for (size_t k = 1; k <= 20; k += 5) {
2991 GemmMicrokernelTester()
2992 .mr(1)
2993 .nr(8)
2994 .kr(1)
2995 .sr(1)
2996 .m(1)
2997 .n(n)
2998 .k(k)
2999 .a_stride(23)
3000 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3001 }
3002 }
3003 }
3004
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)3005 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
3006 for (uint32_t n = 16; n <= 24; n += 8) {
3007 for (size_t k = 1; k <= 20; k += 5) {
3008 for (uint32_t m = 1; m <= 1; m++) {
3009 GemmMicrokernelTester()
3010 .mr(1)
3011 .nr(8)
3012 .kr(1)
3013 .sr(1)
3014 .m(m)
3015 .n(n)
3016 .k(k)
3017 .iterations(1)
3018 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3019 }
3020 }
3021 }
3022 }
3023
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)3024 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
3025 for (size_t k = 1; k <= 20; k += 5) {
3026 for (uint32_t n = 1; n <= 8; n++) {
3027 for (uint32_t m = 1; m <= 1; m++) {
3028 GemmMicrokernelTester()
3029 .mr(1)
3030 .nr(8)
3031 .kr(1)
3032 .sr(1)
3033 .m(m)
3034 .n(n)
3035 .k(k)
3036 .cm_stride(11)
3037 .iterations(1)
3038 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3039 }
3040 }
3041 }
3042 }
3043
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)3044 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
3045 GemmMicrokernelTester()
3046 .mr(1)
3047 .nr(8)
3048 .kr(1)
3049 .sr(1)
3050 .m(1)
3051 .n(8)
3052 .k(4)
3053 .cm_stride(11)
3054 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3055 }
3056 #endif // XNN_ARCH_WASMRELAXEDSIMD
3057
3058
3059 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)3060 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3061 GemmMicrokernelTester()
3062 .mr(1)
3063 .nr(8)
3064 .kr(1)
3065 .sr(4)
3066 .m(1)
3067 .n(8)
3068 .k(4)
3069 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3070 }
3071
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,strided_cn)3072 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
3073 GemmMicrokernelTester()
3074 .mr(1)
3075 .nr(8)
3076 .kr(1)
3077 .sr(4)
3078 .m(1)
3079 .n(8)
3080 .k(4)
3081 .cn_stride(11)
3082 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3083 }
3084
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)3085 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
3086 GemmMicrokernelTester()
3087 .mr(1)
3088 .nr(8)
3089 .kr(1)
3090 .sr(4)
3091 .m(1)
3092 .n(8)
3093 .k(4)
3094 .a_stride(7)
3095 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3096 }
3097
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)3098 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
3099 for (uint32_t n = 1; n <= 8; n++) {
3100 for (uint32_t m = 1; m <= 1; m++) {
3101 GemmMicrokernelTester()
3102 .mr(1)
3103 .nr(8)
3104 .kr(1)
3105 .sr(4)
3106 .m(m)
3107 .n(n)
3108 .k(4)
3109 .iterations(1)
3110 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3111 }
3112 }
3113 }
3114
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)3115 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
3116 for (uint32_t m = 1; m <= 1; m++) {
3117 GemmMicrokernelTester()
3118 .mr(1)
3119 .nr(8)
3120 .kr(1)
3121 .sr(4)
3122 .m(m)
3123 .n(8)
3124 .k(4)
3125 .iterations(1)
3126 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3127 }
3128 }
3129
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)3130 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
3131 for (uint32_t n = 1; n <= 8; n++) {
3132 GemmMicrokernelTester()
3133 .mr(1)
3134 .nr(8)
3135 .kr(1)
3136 .sr(4)
3137 .m(1)
3138 .n(n)
3139 .k(4)
3140 .iterations(1)
3141 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3142 }
3143 }
3144
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)3145 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
3146 for (size_t k = 1; k < 4; k++) {
3147 GemmMicrokernelTester()
3148 .mr(1)
3149 .nr(8)
3150 .kr(1)
3151 .sr(4)
3152 .m(1)
3153 .n(8)
3154 .k(k)
3155 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3156 }
3157 }
3158
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)3159 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
3160 for (size_t k = 1; k < 4; k++) {
3161 GemmMicrokernelTester()
3162 .mr(1)
3163 .nr(8)
3164 .kr(1)
3165 .sr(4)
3166 .m(1)
3167 .n(8)
3168 .k(k)
3169 .a_stride(7)
3170 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3171 }
3172 }
3173
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)3174 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
3175 for (size_t k = 1; k < 4; k++) {
3176 for (uint32_t n = 1; n <= 8; n++) {
3177 for (uint32_t m = 1; m <= 1; m++) {
3178 GemmMicrokernelTester()
3179 .mr(1)
3180 .nr(8)
3181 .kr(1)
3182 .sr(4)
3183 .m(m)
3184 .n(n)
3185 .k(k)
3186 .iterations(1)
3187 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3188 }
3189 }
3190 }
3191 }
3192
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)3193 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
3194 for (size_t k = 5; k < 8; k++) {
3195 GemmMicrokernelTester()
3196 .mr(1)
3197 .nr(8)
3198 .kr(1)
3199 .sr(4)
3200 .m(1)
3201 .n(8)
3202 .k(k)
3203 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3204 }
3205 }
3206
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)3207 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
3208 for (size_t k = 5; k < 8; k++) {
3209 GemmMicrokernelTester()
3210 .mr(1)
3211 .nr(8)
3212 .kr(1)
3213 .sr(4)
3214 .m(1)
3215 .n(8)
3216 .k(k)
3217 .a_stride(11)
3218 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3219 }
3220 }
3221
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)3222 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
3223 for (size_t k = 5; k < 8; k++) {
3224 for (uint32_t n = 1; n <= 8; n++) {
3225 for (uint32_t m = 1; m <= 1; m++) {
3226 GemmMicrokernelTester()
3227 .mr(1)
3228 .nr(8)
3229 .kr(1)
3230 .sr(4)
3231 .m(m)
3232 .n(n)
3233 .k(k)
3234 .iterations(1)
3235 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3236 }
3237 }
3238 }
3239 }
3240
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4)3241 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
3242 for (size_t k = 8; k <= 40; k += 4) {
3243 GemmMicrokernelTester()
3244 .mr(1)
3245 .nr(8)
3246 .kr(1)
3247 .sr(4)
3248 .m(1)
3249 .n(8)
3250 .k(k)
3251 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3252 }
3253 }
3254
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)3255 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
3256 for (size_t k = 8; k <= 40; k += 4) {
3257 GemmMicrokernelTester()
3258 .mr(1)
3259 .nr(8)
3260 .kr(1)
3261 .sr(4)
3262 .m(1)
3263 .n(8)
3264 .k(k)
3265 .a_stride(43)
3266 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3267 }
3268 }
3269
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)3270 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
3271 for (size_t k = 8; k <= 40; k += 4) {
3272 for (uint32_t n = 1; n <= 8; n++) {
3273 for (uint32_t m = 1; m <= 1; m++) {
3274 GemmMicrokernelTester()
3275 .mr(1)
3276 .nr(8)
3277 .kr(1)
3278 .sr(4)
3279 .m(m)
3280 .n(n)
3281 .k(k)
3282 .iterations(1)
3283 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3284 }
3285 }
3286 }
3287 }
3288
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)3289 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
3290 for (uint32_t n = 9; n < 16; n++) {
3291 for (size_t k = 1; k <= 20; k += 5) {
3292 GemmMicrokernelTester()
3293 .mr(1)
3294 .nr(8)
3295 .kr(1)
3296 .sr(4)
3297 .m(1)
3298 .n(n)
3299 .k(k)
3300 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3301 }
3302 }
3303 }
3304
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)3305 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
3306 for (uint32_t n = 9; n < 16; n++) {
3307 for (size_t k = 1; k <= 20; k += 5) {
3308 GemmMicrokernelTester()
3309 .mr(1)
3310 .nr(8)
3311 .kr(1)
3312 .sr(4)
3313 .m(1)
3314 .n(n)
3315 .k(k)
3316 .cn_stride(11)
3317 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3318 }
3319 }
3320 }
3321
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)3322 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
3323 for (uint32_t n = 9; n < 16; n++) {
3324 for (size_t k = 1; k <= 20; k += 5) {
3325 GemmMicrokernelTester()
3326 .mr(1)
3327 .nr(8)
3328 .kr(1)
3329 .sr(4)
3330 .m(1)
3331 .n(n)
3332 .k(k)
3333 .a_stride(23)
3334 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3335 }
3336 }
3337 }
3338
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)3339 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
3340 for (uint32_t n = 9; n < 16; n++) {
3341 for (size_t k = 1; k <= 20; k += 5) {
3342 for (uint32_t m = 1; m <= 1; m++) {
3343 GemmMicrokernelTester()
3344 .mr(1)
3345 .nr(8)
3346 .kr(1)
3347 .sr(4)
3348 .m(m)
3349 .n(n)
3350 .k(k)
3351 .iterations(1)
3352 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3353 }
3354 }
3355 }
3356 }
3357
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8)3358 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
3359 for (uint32_t n = 16; n <= 24; n += 8) {
3360 for (size_t k = 1; k <= 20; k += 5) {
3361 GemmMicrokernelTester()
3362 .mr(1)
3363 .nr(8)
3364 .kr(1)
3365 .sr(4)
3366 .m(1)
3367 .n(n)
3368 .k(k)
3369 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3370 }
3371 }
3372 }
3373
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)3374 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
3375 for (uint32_t n = 16; n <= 24; n += 8) {
3376 for (size_t k = 1; k <= 20; k += 5) {
3377 GemmMicrokernelTester()
3378 .mr(1)
3379 .nr(8)
3380 .kr(1)
3381 .sr(4)
3382 .m(1)
3383 .n(n)
3384 .k(k)
3385 .cn_stride(11)
3386 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3387 }
3388 }
3389 }
3390
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)3391 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
3392 for (uint32_t n = 16; n <= 24; n += 8) {
3393 for (size_t k = 1; k <= 20; k += 5) {
3394 GemmMicrokernelTester()
3395 .mr(1)
3396 .nr(8)
3397 .kr(1)
3398 .sr(4)
3399 .m(1)
3400 .n(n)
3401 .k(k)
3402 .a_stride(23)
3403 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3404 }
3405 }
3406 }
3407
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)3408 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
3409 for (uint32_t n = 16; n <= 24; n += 8) {
3410 for (size_t k = 1; k <= 20; k += 5) {
3411 for (uint32_t m = 1; m <= 1; m++) {
3412 GemmMicrokernelTester()
3413 .mr(1)
3414 .nr(8)
3415 .kr(1)
3416 .sr(4)
3417 .m(m)
3418 .n(n)
3419 .k(k)
3420 .iterations(1)
3421 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3422 }
3423 }
3424 }
3425 }
3426
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)3427 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
3428 for (size_t k = 1; k <= 20; k += 5) {
3429 for (uint32_t n = 1; n <= 8; n++) {
3430 for (uint32_t m = 1; m <= 1; m++) {
3431 GemmMicrokernelTester()
3432 .mr(1)
3433 .nr(8)
3434 .kr(1)
3435 .sr(4)
3436 .m(m)
3437 .n(n)
3438 .k(k)
3439 .cm_stride(11)
3440 .iterations(1)
3441 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3442 }
3443 }
3444 }
3445 }
3446
TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm)3447 TEST(F32_GEMM_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
3448 GemmMicrokernelTester()
3449 .mr(1)
3450 .nr(8)
3451 .kr(1)
3452 .sr(4)
3453 .m(1)
3454 .n(8)
3455 .k(4)
3456 .cm_stride(11)
3457 .Test(xnn_f32_gemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3458 }
3459 #endif // XNN_ARCH_WASMRELAXEDSIMD
3460
3461
3462 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)3463 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
3464 GemmMicrokernelTester()
3465 .mr(3)
3466 .nr(8)
3467 .kr(1)
3468 .sr(1)
3469 .m(3)
3470 .n(8)
3471 .k(4)
3472 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3473 }
3474
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)3475 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
3476 GemmMicrokernelTester()
3477 .mr(3)
3478 .nr(8)
3479 .kr(1)
3480 .sr(1)
3481 .m(3)
3482 .n(8)
3483 .k(4)
3484 .cn_stride(11)
3485 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3486 }
3487
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)3488 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
3489 GemmMicrokernelTester()
3490 .mr(3)
3491 .nr(8)
3492 .kr(1)
3493 .sr(1)
3494 .m(3)
3495 .n(8)
3496 .k(4)
3497 .a_stride(7)
3498 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3499 }
3500
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)3501 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
3502 for (uint32_t n = 1; n <= 8; n++) {
3503 for (uint32_t m = 1; m <= 3; m++) {
3504 GemmMicrokernelTester()
3505 .mr(3)
3506 .nr(8)
3507 .kr(1)
3508 .sr(1)
3509 .m(m)
3510 .n(n)
3511 .k(4)
3512 .iterations(1)
3513 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3514 }
3515 }
3516 }
3517
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)3518 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
3519 for (uint32_t m = 1; m <= 3; m++) {
3520 GemmMicrokernelTester()
3521 .mr(3)
3522 .nr(8)
3523 .kr(1)
3524 .sr(1)
3525 .m(m)
3526 .n(8)
3527 .k(4)
3528 .iterations(1)
3529 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3530 }
3531 }
3532
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)3533 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
3534 for (uint32_t n = 1; n <= 8; n++) {
3535 GemmMicrokernelTester()
3536 .mr(3)
3537 .nr(8)
3538 .kr(1)
3539 .sr(1)
3540 .m(3)
3541 .n(n)
3542 .k(4)
3543 .iterations(1)
3544 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3545 }
3546 }
3547
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)3548 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
3549 for (size_t k = 1; k < 4; k++) {
3550 GemmMicrokernelTester()
3551 .mr(3)
3552 .nr(8)
3553 .kr(1)
3554 .sr(1)
3555 .m(3)
3556 .n(8)
3557 .k(k)
3558 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3559 }
3560 }
3561
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)3562 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
3563 for (size_t k = 1; k < 4; k++) {
3564 GemmMicrokernelTester()
3565 .mr(3)
3566 .nr(8)
3567 .kr(1)
3568 .sr(1)
3569 .m(3)
3570 .n(8)
3571 .k(k)
3572 .a_stride(7)
3573 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3574 }
3575 }
3576
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)3577 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
3578 for (size_t k = 1; k < 4; k++) {
3579 for (uint32_t n = 1; n <= 8; n++) {
3580 for (uint32_t m = 1; m <= 3; m++) {
3581 GemmMicrokernelTester()
3582 .mr(3)
3583 .nr(8)
3584 .kr(1)
3585 .sr(1)
3586 .m(m)
3587 .n(n)
3588 .k(k)
3589 .iterations(1)
3590 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3591 }
3592 }
3593 }
3594 }
3595
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)3596 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
3597 for (size_t k = 5; k < 8; k++) {
3598 GemmMicrokernelTester()
3599 .mr(3)
3600 .nr(8)
3601 .kr(1)
3602 .sr(1)
3603 .m(3)
3604 .n(8)
3605 .k(k)
3606 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3607 }
3608 }
3609
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)3610 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
3611 for (size_t k = 5; k < 8; k++) {
3612 GemmMicrokernelTester()
3613 .mr(3)
3614 .nr(8)
3615 .kr(1)
3616 .sr(1)
3617 .m(3)
3618 .n(8)
3619 .k(k)
3620 .a_stride(11)
3621 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3622 }
3623 }
3624
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)3625 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
3626 for (size_t k = 5; k < 8; k++) {
3627 for (uint32_t n = 1; n <= 8; n++) {
3628 for (uint32_t m = 1; m <= 3; m++) {
3629 GemmMicrokernelTester()
3630 .mr(3)
3631 .nr(8)
3632 .kr(1)
3633 .sr(1)
3634 .m(m)
3635 .n(n)
3636 .k(k)
3637 .iterations(1)
3638 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3639 }
3640 }
3641 }
3642 }
3643
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)3644 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
3645 for (size_t k = 8; k <= 40; k += 4) {
3646 GemmMicrokernelTester()
3647 .mr(3)
3648 .nr(8)
3649 .kr(1)
3650 .sr(1)
3651 .m(3)
3652 .n(8)
3653 .k(k)
3654 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3655 }
3656 }
3657
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)3658 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
3659 for (size_t k = 8; k <= 40; k += 4) {
3660 GemmMicrokernelTester()
3661 .mr(3)
3662 .nr(8)
3663 .kr(1)
3664 .sr(1)
3665 .m(3)
3666 .n(8)
3667 .k(k)
3668 .a_stride(43)
3669 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3670 }
3671 }
3672
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)3673 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
3674 for (size_t k = 8; k <= 40; k += 4) {
3675 for (uint32_t n = 1; n <= 8; n++) {
3676 for (uint32_t m = 1; m <= 3; m++) {
3677 GemmMicrokernelTester()
3678 .mr(3)
3679 .nr(8)
3680 .kr(1)
3681 .sr(1)
3682 .m(m)
3683 .n(n)
3684 .k(k)
3685 .iterations(1)
3686 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3687 }
3688 }
3689 }
3690 }
3691
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)3692 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
3693 for (uint32_t n = 9; n < 16; n++) {
3694 for (size_t k = 1; k <= 20; k += 5) {
3695 GemmMicrokernelTester()
3696 .mr(3)
3697 .nr(8)
3698 .kr(1)
3699 .sr(1)
3700 .m(3)
3701 .n(n)
3702 .k(k)
3703 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3704 }
3705 }
3706 }
3707
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)3708 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
3709 for (uint32_t n = 9; n < 16; n++) {
3710 for (size_t k = 1; k <= 20; k += 5) {
3711 GemmMicrokernelTester()
3712 .mr(3)
3713 .nr(8)
3714 .kr(1)
3715 .sr(1)
3716 .m(3)
3717 .n(n)
3718 .k(k)
3719 .cn_stride(11)
3720 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3721 }
3722 }
3723 }
3724
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)3725 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
3726 for (uint32_t n = 9; n < 16; n++) {
3727 for (size_t k = 1; k <= 20; k += 5) {
3728 GemmMicrokernelTester()
3729 .mr(3)
3730 .nr(8)
3731 .kr(1)
3732 .sr(1)
3733 .m(3)
3734 .n(n)
3735 .k(k)
3736 .a_stride(23)
3737 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3738 }
3739 }
3740 }
3741
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)3742 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
3743 for (uint32_t n = 9; n < 16; n++) {
3744 for (size_t k = 1; k <= 20; k += 5) {
3745 for (uint32_t m = 1; m <= 3; m++) {
3746 GemmMicrokernelTester()
3747 .mr(3)
3748 .nr(8)
3749 .kr(1)
3750 .sr(1)
3751 .m(m)
3752 .n(n)
3753 .k(k)
3754 .iterations(1)
3755 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3756 }
3757 }
3758 }
3759 }
3760
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)3761 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
3762 for (uint32_t n = 16; n <= 24; n += 8) {
3763 for (size_t k = 1; k <= 20; k += 5) {
3764 GemmMicrokernelTester()
3765 .mr(3)
3766 .nr(8)
3767 .kr(1)
3768 .sr(1)
3769 .m(3)
3770 .n(n)
3771 .k(k)
3772 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3773 }
3774 }
3775 }
3776
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)3777 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
3778 for (uint32_t n = 16; n <= 24; n += 8) {
3779 for (size_t k = 1; k <= 20; k += 5) {
3780 GemmMicrokernelTester()
3781 .mr(3)
3782 .nr(8)
3783 .kr(1)
3784 .sr(1)
3785 .m(3)
3786 .n(n)
3787 .k(k)
3788 .cn_stride(11)
3789 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3790 }
3791 }
3792 }
3793
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)3794 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
3795 for (uint32_t n = 16; n <= 24; n += 8) {
3796 for (size_t k = 1; k <= 20; k += 5) {
3797 GemmMicrokernelTester()
3798 .mr(3)
3799 .nr(8)
3800 .kr(1)
3801 .sr(1)
3802 .m(3)
3803 .n(n)
3804 .k(k)
3805 .a_stride(23)
3806 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3807 }
3808 }
3809 }
3810
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)3811 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
3812 for (uint32_t n = 16; n <= 24; n += 8) {
3813 for (size_t k = 1; k <= 20; k += 5) {
3814 for (uint32_t m = 1; m <= 3; m++) {
3815 GemmMicrokernelTester()
3816 .mr(3)
3817 .nr(8)
3818 .kr(1)
3819 .sr(1)
3820 .m(m)
3821 .n(n)
3822 .k(k)
3823 .iterations(1)
3824 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3825 }
3826 }
3827 }
3828 }
3829
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)3830 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
3831 for (size_t k = 1; k <= 20; k += 5) {
3832 for (uint32_t n = 1; n <= 8; n++) {
3833 for (uint32_t m = 1; m <= 3; m++) {
3834 GemmMicrokernelTester()
3835 .mr(3)
3836 .nr(8)
3837 .kr(1)
3838 .sr(1)
3839 .m(m)
3840 .n(n)
3841 .k(k)
3842 .cm_stride(11)
3843 .iterations(1)
3844 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3845 }
3846 }
3847 }
3848 }
3849
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)3850 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
3851 GemmMicrokernelTester()
3852 .mr(3)
3853 .nr(8)
3854 .kr(1)
3855 .sr(1)
3856 .m(3)
3857 .n(8)
3858 .k(4)
3859 .cm_stride(11)
3860 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3861 }
3862 #endif // XNN_ARCH_WASMRELAXEDSIMD
3863
3864
3865 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4)3866 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3867 GemmMicrokernelTester()
3868 .mr(4)
3869 .nr(2)
3870 .kr(4)
3871 .sr(1)
3872 .m(4)
3873 .n(2)
3874 .k(4)
3875 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3876 }
3877
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,strided_cn)3878 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, strided_cn) {
3879 GemmMicrokernelTester()
3880 .mr(4)
3881 .nr(2)
3882 .kr(4)
3883 .sr(1)
3884 .m(4)
3885 .n(2)
3886 .k(4)
3887 .cn_stride(5)
3888 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3889 }
3890
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)3891 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
3892 GemmMicrokernelTester()
3893 .mr(4)
3894 .nr(2)
3895 .kr(4)
3896 .sr(1)
3897 .m(4)
3898 .n(2)
3899 .k(4)
3900 .a_stride(7)
3901 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3902 }
3903
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)3904 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
3905 for (uint32_t n = 1; n <= 2; n++) {
3906 for (uint32_t m = 1; m <= 4; m++) {
3907 GemmMicrokernelTester()
3908 .mr(4)
3909 .nr(2)
3910 .kr(4)
3911 .sr(1)
3912 .m(m)
3913 .n(n)
3914 .k(4)
3915 .iterations(1)
3916 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3917 }
3918 }
3919 }
3920
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)3921 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
3922 for (uint32_t m = 1; m <= 4; m++) {
3923 GemmMicrokernelTester()
3924 .mr(4)
3925 .nr(2)
3926 .kr(4)
3927 .sr(1)
3928 .m(m)
3929 .n(2)
3930 .k(4)
3931 .iterations(1)
3932 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3933 }
3934 }
3935
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)3936 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
3937 for (uint32_t n = 1; n <= 2; n++) {
3938 GemmMicrokernelTester()
3939 .mr(4)
3940 .nr(2)
3941 .kr(4)
3942 .sr(1)
3943 .m(4)
3944 .n(n)
3945 .k(4)
3946 .iterations(1)
3947 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3948 }
3949 }
3950
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4)3951 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4) {
3952 for (size_t k = 1; k < 4; k++) {
3953 GemmMicrokernelTester()
3954 .mr(4)
3955 .nr(2)
3956 .kr(4)
3957 .sr(1)
3958 .m(4)
3959 .n(2)
3960 .k(k)
3961 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3962 }
3963 }
3964
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)3965 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
3966 for (size_t k = 1; k < 4; k++) {
3967 GemmMicrokernelTester()
3968 .mr(4)
3969 .nr(2)
3970 .kr(4)
3971 .sr(1)
3972 .m(4)
3973 .n(2)
3974 .k(k)
3975 .a_stride(7)
3976 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3977 }
3978 }
3979
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)3980 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
3981 for (size_t k = 1; k < 4; k++) {
3982 for (uint32_t n = 1; n <= 2; n++) {
3983 for (uint32_t m = 1; m <= 4; m++) {
3984 GemmMicrokernelTester()
3985 .mr(4)
3986 .nr(2)
3987 .kr(4)
3988 .sr(1)
3989 .m(m)
3990 .n(n)
3991 .k(k)
3992 .iterations(1)
3993 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
3994 }
3995 }
3996 }
3997 }
3998
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4)3999 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4000 for (size_t k = 5; k < 8; k++) {
4001 GemmMicrokernelTester()
4002 .mr(4)
4003 .nr(2)
4004 .kr(4)
4005 .sr(1)
4006 .m(4)
4007 .n(2)
4008 .k(k)
4009 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4010 }
4011 }
4012
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)4013 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
4014 for (size_t k = 5; k < 8; k++) {
4015 GemmMicrokernelTester()
4016 .mr(4)
4017 .nr(2)
4018 .kr(4)
4019 .sr(1)
4020 .m(4)
4021 .n(2)
4022 .k(k)
4023 .a_stride(11)
4024 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4025 }
4026 }
4027
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4028 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4029 for (size_t k = 5; k < 8; k++) {
4030 for (uint32_t n = 1; n <= 2; n++) {
4031 for (uint32_t m = 1; m <= 4; m++) {
4032 GemmMicrokernelTester()
4033 .mr(4)
4034 .nr(2)
4035 .kr(4)
4036 .sr(1)
4037 .m(m)
4038 .n(n)
4039 .k(k)
4040 .iterations(1)
4041 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4042 }
4043 }
4044 }
4045 }
4046
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4)4047 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4) {
4048 for (size_t k = 8; k <= 40; k += 4) {
4049 GemmMicrokernelTester()
4050 .mr(4)
4051 .nr(2)
4052 .kr(4)
4053 .sr(1)
4054 .m(4)
4055 .n(2)
4056 .k(k)
4057 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4058 }
4059 }
4060
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)4061 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
4062 for (size_t k = 8; k <= 40; k += 4) {
4063 GemmMicrokernelTester()
4064 .mr(4)
4065 .nr(2)
4066 .kr(4)
4067 .sr(1)
4068 .m(4)
4069 .n(2)
4070 .k(k)
4071 .a_stride(43)
4072 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4073 }
4074 }
4075
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4076 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4077 for (size_t k = 8; k <= 40; k += 4) {
4078 for (uint32_t n = 1; n <= 2; n++) {
4079 for (uint32_t m = 1; m <= 4; m++) {
4080 GemmMicrokernelTester()
4081 .mr(4)
4082 .nr(2)
4083 .kr(4)
4084 .sr(1)
4085 .m(m)
4086 .n(n)
4087 .k(k)
4088 .iterations(1)
4089 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4090 }
4091 }
4092 }
4093 }
4094
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2)4095 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2) {
4096 for (uint32_t n = 3; n < 4; n++) {
4097 for (size_t k = 1; k <= 20; k += 5) {
4098 GemmMicrokernelTester()
4099 .mr(4)
4100 .nr(2)
4101 .kr(4)
4102 .sr(1)
4103 .m(4)
4104 .n(n)
4105 .k(k)
4106 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4107 }
4108 }
4109 }
4110
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_strided_cn)4111 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_strided_cn) {
4112 for (uint32_t n = 3; n < 4; n++) {
4113 for (size_t k = 1; k <= 20; k += 5) {
4114 GemmMicrokernelTester()
4115 .mr(4)
4116 .nr(2)
4117 .kr(4)
4118 .sr(1)
4119 .m(4)
4120 .n(n)
4121 .k(k)
4122 .cn_stride(5)
4123 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4124 }
4125 }
4126 }
4127
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_strided_a)4128 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_strided_a) {
4129 for (uint32_t n = 3; n < 4; n++) {
4130 for (size_t k = 1; k <= 20; k += 5) {
4131 GemmMicrokernelTester()
4132 .mr(4)
4133 .nr(2)
4134 .kr(4)
4135 .sr(1)
4136 .m(4)
4137 .n(n)
4138 .k(k)
4139 .a_stride(23)
4140 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4141 }
4142 }
4143 }
4144
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_subtile)4145 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_subtile) {
4146 for (uint32_t n = 3; n < 4; n++) {
4147 for (size_t k = 1; k <= 20; k += 5) {
4148 for (uint32_t m = 1; m <= 4; m++) {
4149 GemmMicrokernelTester()
4150 .mr(4)
4151 .nr(2)
4152 .kr(4)
4153 .sr(1)
4154 .m(m)
4155 .n(n)
4156 .k(k)
4157 .iterations(1)
4158 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4159 }
4160 }
4161 }
4162 }
4163
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2)4164 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2) {
4165 for (uint32_t n = 4; n <= 6; n += 2) {
4166 for (size_t k = 1; k <= 20; k += 5) {
4167 GemmMicrokernelTester()
4168 .mr(4)
4169 .nr(2)
4170 .kr(4)
4171 .sr(1)
4172 .m(4)
4173 .n(n)
4174 .k(k)
4175 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4176 }
4177 }
4178 }
4179
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_strided_cn)4180 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_strided_cn) {
4181 for (uint32_t n = 4; n <= 6; n += 2) {
4182 for (size_t k = 1; k <= 20; k += 5) {
4183 GemmMicrokernelTester()
4184 .mr(4)
4185 .nr(2)
4186 .kr(4)
4187 .sr(1)
4188 .m(4)
4189 .n(n)
4190 .k(k)
4191 .cn_stride(5)
4192 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4193 }
4194 }
4195 }
4196
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_strided_a)4197 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_strided_a) {
4198 for (uint32_t n = 4; n <= 6; n += 2) {
4199 for (size_t k = 1; k <= 20; k += 5) {
4200 GemmMicrokernelTester()
4201 .mr(4)
4202 .nr(2)
4203 .kr(4)
4204 .sr(1)
4205 .m(4)
4206 .n(n)
4207 .k(k)
4208 .a_stride(23)
4209 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4210 }
4211 }
4212 }
4213
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_subtile)4214 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_subtile) {
4215 for (uint32_t n = 4; n <= 6; n += 2) {
4216 for (size_t k = 1; k <= 20; k += 5) {
4217 for (uint32_t m = 1; m <= 4; m++) {
4218 GemmMicrokernelTester()
4219 .mr(4)
4220 .nr(2)
4221 .kr(4)
4222 .sr(1)
4223 .m(m)
4224 .n(n)
4225 .k(k)
4226 .iterations(1)
4227 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4228 }
4229 }
4230 }
4231 }
4232
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4233 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4234 for (size_t k = 1; k <= 20; k += 5) {
4235 for (uint32_t n = 1; n <= 2; n++) {
4236 for (uint32_t m = 1; m <= 4; m++) {
4237 GemmMicrokernelTester()
4238 .mr(4)
4239 .nr(2)
4240 .kr(4)
4241 .sr(1)
4242 .m(m)
4243 .n(n)
4244 .k(k)
4245 .cm_stride(5)
4246 .iterations(1)
4247 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4248 }
4249 }
4250 }
4251 }
4252
TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm)4253 TEST(F32_GEMM_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm) {
4254 GemmMicrokernelTester()
4255 .mr(4)
4256 .nr(2)
4257 .kr(4)
4258 .sr(1)
4259 .m(4)
4260 .n(2)
4261 .k(4)
4262 .cm_stride(5)
4263 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4264 }
4265 #endif // XNN_ARCH_WASMRELAXEDSIMD
4266
4267
4268 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)4269 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
4270 GemmMicrokernelTester()
4271 .mr(4)
4272 .nr(8)
4273 .kr(1)
4274 .sr(1)
4275 .m(4)
4276 .n(8)
4277 .k(4)
4278 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4279 }
4280
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)4281 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
4282 GemmMicrokernelTester()
4283 .mr(4)
4284 .nr(8)
4285 .kr(1)
4286 .sr(1)
4287 .m(4)
4288 .n(8)
4289 .k(4)
4290 .cn_stride(11)
4291 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4292 }
4293
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)4294 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
4295 GemmMicrokernelTester()
4296 .mr(4)
4297 .nr(8)
4298 .kr(1)
4299 .sr(1)
4300 .m(4)
4301 .n(8)
4302 .k(4)
4303 .a_stride(7)
4304 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4305 }
4306
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)4307 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
4308 for (uint32_t n = 1; n <= 8; n++) {
4309 for (uint32_t m = 1; m <= 4; m++) {
4310 GemmMicrokernelTester()
4311 .mr(4)
4312 .nr(8)
4313 .kr(1)
4314 .sr(1)
4315 .m(m)
4316 .n(n)
4317 .k(4)
4318 .iterations(1)
4319 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4320 }
4321 }
4322 }
4323
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)4324 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
4325 for (uint32_t m = 1; m <= 4; m++) {
4326 GemmMicrokernelTester()
4327 .mr(4)
4328 .nr(8)
4329 .kr(1)
4330 .sr(1)
4331 .m(m)
4332 .n(8)
4333 .k(4)
4334 .iterations(1)
4335 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4336 }
4337 }
4338
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)4339 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
4340 for (uint32_t n = 1; n <= 8; n++) {
4341 GemmMicrokernelTester()
4342 .mr(4)
4343 .nr(8)
4344 .kr(1)
4345 .sr(1)
4346 .m(4)
4347 .n(n)
4348 .k(4)
4349 .iterations(1)
4350 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4351 }
4352 }
4353
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)4354 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
4355 for (size_t k = 1; k < 4; k++) {
4356 GemmMicrokernelTester()
4357 .mr(4)
4358 .nr(8)
4359 .kr(1)
4360 .sr(1)
4361 .m(4)
4362 .n(8)
4363 .k(k)
4364 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4365 }
4366 }
4367
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)4368 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
4369 for (size_t k = 1; k < 4; k++) {
4370 GemmMicrokernelTester()
4371 .mr(4)
4372 .nr(8)
4373 .kr(1)
4374 .sr(1)
4375 .m(4)
4376 .n(8)
4377 .k(k)
4378 .a_stride(7)
4379 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4380 }
4381 }
4382
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)4383 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
4384 for (size_t k = 1; k < 4; k++) {
4385 for (uint32_t n = 1; n <= 8; n++) {
4386 for (uint32_t m = 1; m <= 4; m++) {
4387 GemmMicrokernelTester()
4388 .mr(4)
4389 .nr(8)
4390 .kr(1)
4391 .sr(1)
4392 .m(m)
4393 .n(n)
4394 .k(k)
4395 .iterations(1)
4396 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4397 }
4398 }
4399 }
4400 }
4401
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)4402 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
4403 for (size_t k = 5; k < 8; k++) {
4404 GemmMicrokernelTester()
4405 .mr(4)
4406 .nr(8)
4407 .kr(1)
4408 .sr(1)
4409 .m(4)
4410 .n(8)
4411 .k(k)
4412 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4413 }
4414 }
4415
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)4416 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
4417 for (size_t k = 5; k < 8; k++) {
4418 GemmMicrokernelTester()
4419 .mr(4)
4420 .nr(8)
4421 .kr(1)
4422 .sr(1)
4423 .m(4)
4424 .n(8)
4425 .k(k)
4426 .a_stride(11)
4427 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4428 }
4429 }
4430
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)4431 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
4432 for (size_t k = 5; k < 8; k++) {
4433 for (uint32_t n = 1; n <= 8; n++) {
4434 for (uint32_t m = 1; m <= 4; m++) {
4435 GemmMicrokernelTester()
4436 .mr(4)
4437 .nr(8)
4438 .kr(1)
4439 .sr(1)
4440 .m(m)
4441 .n(n)
4442 .k(k)
4443 .iterations(1)
4444 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4445 }
4446 }
4447 }
4448 }
4449
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)4450 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
4451 for (size_t k = 8; k <= 40; k += 4) {
4452 GemmMicrokernelTester()
4453 .mr(4)
4454 .nr(8)
4455 .kr(1)
4456 .sr(1)
4457 .m(4)
4458 .n(8)
4459 .k(k)
4460 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4461 }
4462 }
4463
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)4464 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
4465 for (size_t k = 8; k <= 40; k += 4) {
4466 GemmMicrokernelTester()
4467 .mr(4)
4468 .nr(8)
4469 .kr(1)
4470 .sr(1)
4471 .m(4)
4472 .n(8)
4473 .k(k)
4474 .a_stride(43)
4475 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4476 }
4477 }
4478
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)4479 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
4480 for (size_t k = 8; k <= 40; k += 4) {
4481 for (uint32_t n = 1; n <= 8; n++) {
4482 for (uint32_t m = 1; m <= 4; m++) {
4483 GemmMicrokernelTester()
4484 .mr(4)
4485 .nr(8)
4486 .kr(1)
4487 .sr(1)
4488 .m(m)
4489 .n(n)
4490 .k(k)
4491 .iterations(1)
4492 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4493 }
4494 }
4495 }
4496 }
4497
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)4498 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
4499 for (uint32_t n = 9; n < 16; n++) {
4500 for (size_t k = 1; k <= 20; k += 5) {
4501 GemmMicrokernelTester()
4502 .mr(4)
4503 .nr(8)
4504 .kr(1)
4505 .sr(1)
4506 .m(4)
4507 .n(n)
4508 .k(k)
4509 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4510 }
4511 }
4512 }
4513
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)4514 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
4515 for (uint32_t n = 9; n < 16; n++) {
4516 for (size_t k = 1; k <= 20; k += 5) {
4517 GemmMicrokernelTester()
4518 .mr(4)
4519 .nr(8)
4520 .kr(1)
4521 .sr(1)
4522 .m(4)
4523 .n(n)
4524 .k(k)
4525 .cn_stride(11)
4526 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4527 }
4528 }
4529 }
4530
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)4531 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
4532 for (uint32_t n = 9; n < 16; n++) {
4533 for (size_t k = 1; k <= 20; k += 5) {
4534 GemmMicrokernelTester()
4535 .mr(4)
4536 .nr(8)
4537 .kr(1)
4538 .sr(1)
4539 .m(4)
4540 .n(n)
4541 .k(k)
4542 .a_stride(23)
4543 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4544 }
4545 }
4546 }
4547
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)4548 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
4549 for (uint32_t n = 9; n < 16; n++) {
4550 for (size_t k = 1; k <= 20; k += 5) {
4551 for (uint32_t m = 1; m <= 4; m++) {
4552 GemmMicrokernelTester()
4553 .mr(4)
4554 .nr(8)
4555 .kr(1)
4556 .sr(1)
4557 .m(m)
4558 .n(n)
4559 .k(k)
4560 .iterations(1)
4561 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4562 }
4563 }
4564 }
4565 }
4566
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)4567 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
4568 for (uint32_t n = 16; n <= 24; n += 8) {
4569 for (size_t k = 1; k <= 20; k += 5) {
4570 GemmMicrokernelTester()
4571 .mr(4)
4572 .nr(8)
4573 .kr(1)
4574 .sr(1)
4575 .m(4)
4576 .n(n)
4577 .k(k)
4578 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4579 }
4580 }
4581 }
4582
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)4583 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
4584 for (uint32_t n = 16; n <= 24; n += 8) {
4585 for (size_t k = 1; k <= 20; k += 5) {
4586 GemmMicrokernelTester()
4587 .mr(4)
4588 .nr(8)
4589 .kr(1)
4590 .sr(1)
4591 .m(4)
4592 .n(n)
4593 .k(k)
4594 .cn_stride(11)
4595 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4596 }
4597 }
4598 }
4599
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)4600 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
4601 for (uint32_t n = 16; n <= 24; n += 8) {
4602 for (size_t k = 1; k <= 20; k += 5) {
4603 GemmMicrokernelTester()
4604 .mr(4)
4605 .nr(8)
4606 .kr(1)
4607 .sr(1)
4608 .m(4)
4609 .n(n)
4610 .k(k)
4611 .a_stride(23)
4612 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4613 }
4614 }
4615 }
4616
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)4617 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
4618 for (uint32_t n = 16; n <= 24; n += 8) {
4619 for (size_t k = 1; k <= 20; k += 5) {
4620 for (uint32_t m = 1; m <= 4; m++) {
4621 GemmMicrokernelTester()
4622 .mr(4)
4623 .nr(8)
4624 .kr(1)
4625 .sr(1)
4626 .m(m)
4627 .n(n)
4628 .k(k)
4629 .iterations(1)
4630 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4631 }
4632 }
4633 }
4634 }
4635
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)4636 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
4637 for (size_t k = 1; k <= 20; k += 5) {
4638 for (uint32_t n = 1; n <= 8; n++) {
4639 for (uint32_t m = 1; m <= 4; m++) {
4640 GemmMicrokernelTester()
4641 .mr(4)
4642 .nr(8)
4643 .kr(1)
4644 .sr(1)
4645 .m(m)
4646 .n(n)
4647 .k(k)
4648 .cm_stride(11)
4649 .iterations(1)
4650 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4651 }
4652 }
4653 }
4654 }
4655
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)4656 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
4657 GemmMicrokernelTester()
4658 .mr(4)
4659 .nr(8)
4660 .kr(1)
4661 .sr(1)
4662 .m(4)
4663 .n(8)
4664 .k(4)
4665 .cm_stride(11)
4666 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4667 }
4668 #endif // XNN_ARCH_WASMRELAXEDSIMD
4669
4670
4671 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)4672 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
4673 GemmMicrokernelTester()
4674 .mr(5)
4675 .nr(8)
4676 .kr(1)
4677 .sr(1)
4678 .m(5)
4679 .n(8)
4680 .k(4)
4681 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4682 }
4683
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)4684 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
4685 GemmMicrokernelTester()
4686 .mr(5)
4687 .nr(8)
4688 .kr(1)
4689 .sr(1)
4690 .m(5)
4691 .n(8)
4692 .k(4)
4693 .cn_stride(11)
4694 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4695 }
4696
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)4697 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
4698 GemmMicrokernelTester()
4699 .mr(5)
4700 .nr(8)
4701 .kr(1)
4702 .sr(1)
4703 .m(5)
4704 .n(8)
4705 .k(4)
4706 .a_stride(7)
4707 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4708 }
4709
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)4710 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
4711 for (uint32_t n = 1; n <= 8; n++) {
4712 for (uint32_t m = 1; m <= 5; m++) {
4713 GemmMicrokernelTester()
4714 .mr(5)
4715 .nr(8)
4716 .kr(1)
4717 .sr(1)
4718 .m(m)
4719 .n(n)
4720 .k(4)
4721 .iterations(1)
4722 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4723 }
4724 }
4725 }
4726
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)4727 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
4728 for (uint32_t m = 1; m <= 5; m++) {
4729 GemmMicrokernelTester()
4730 .mr(5)
4731 .nr(8)
4732 .kr(1)
4733 .sr(1)
4734 .m(m)
4735 .n(8)
4736 .k(4)
4737 .iterations(1)
4738 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4739 }
4740 }
4741
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)4742 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
4743 for (uint32_t n = 1; n <= 8; n++) {
4744 GemmMicrokernelTester()
4745 .mr(5)
4746 .nr(8)
4747 .kr(1)
4748 .sr(1)
4749 .m(5)
4750 .n(n)
4751 .k(4)
4752 .iterations(1)
4753 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4754 }
4755 }
4756
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)4757 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
4758 for (size_t k = 1; k < 4; k++) {
4759 GemmMicrokernelTester()
4760 .mr(5)
4761 .nr(8)
4762 .kr(1)
4763 .sr(1)
4764 .m(5)
4765 .n(8)
4766 .k(k)
4767 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4768 }
4769 }
4770
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)4771 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
4772 for (size_t k = 1; k < 4; k++) {
4773 GemmMicrokernelTester()
4774 .mr(5)
4775 .nr(8)
4776 .kr(1)
4777 .sr(1)
4778 .m(5)
4779 .n(8)
4780 .k(k)
4781 .a_stride(7)
4782 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4783 }
4784 }
4785
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)4786 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
4787 for (size_t k = 1; k < 4; k++) {
4788 for (uint32_t n = 1; n <= 8; n++) {
4789 for (uint32_t m = 1; m <= 5; m++) {
4790 GemmMicrokernelTester()
4791 .mr(5)
4792 .nr(8)
4793 .kr(1)
4794 .sr(1)
4795 .m(m)
4796 .n(n)
4797 .k(k)
4798 .iterations(1)
4799 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4800 }
4801 }
4802 }
4803 }
4804
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)4805 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
4806 for (size_t k = 5; k < 8; k++) {
4807 GemmMicrokernelTester()
4808 .mr(5)
4809 .nr(8)
4810 .kr(1)
4811 .sr(1)
4812 .m(5)
4813 .n(8)
4814 .k(k)
4815 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4816 }
4817 }
4818
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)4819 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
4820 for (size_t k = 5; k < 8; k++) {
4821 GemmMicrokernelTester()
4822 .mr(5)
4823 .nr(8)
4824 .kr(1)
4825 .sr(1)
4826 .m(5)
4827 .n(8)
4828 .k(k)
4829 .a_stride(11)
4830 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4831 }
4832 }
4833
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)4834 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
4835 for (size_t k = 5; k < 8; k++) {
4836 for (uint32_t n = 1; n <= 8; n++) {
4837 for (uint32_t m = 1; m <= 5; m++) {
4838 GemmMicrokernelTester()
4839 .mr(5)
4840 .nr(8)
4841 .kr(1)
4842 .sr(1)
4843 .m(m)
4844 .n(n)
4845 .k(k)
4846 .iterations(1)
4847 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4848 }
4849 }
4850 }
4851 }
4852
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)4853 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
4854 for (size_t k = 8; k <= 40; k += 4) {
4855 GemmMicrokernelTester()
4856 .mr(5)
4857 .nr(8)
4858 .kr(1)
4859 .sr(1)
4860 .m(5)
4861 .n(8)
4862 .k(k)
4863 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4864 }
4865 }
4866
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)4867 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
4868 for (size_t k = 8; k <= 40; k += 4) {
4869 GemmMicrokernelTester()
4870 .mr(5)
4871 .nr(8)
4872 .kr(1)
4873 .sr(1)
4874 .m(5)
4875 .n(8)
4876 .k(k)
4877 .a_stride(43)
4878 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4879 }
4880 }
4881
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)4882 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
4883 for (size_t k = 8; k <= 40; k += 4) {
4884 for (uint32_t n = 1; n <= 8; n++) {
4885 for (uint32_t m = 1; m <= 5; m++) {
4886 GemmMicrokernelTester()
4887 .mr(5)
4888 .nr(8)
4889 .kr(1)
4890 .sr(1)
4891 .m(m)
4892 .n(n)
4893 .k(k)
4894 .iterations(1)
4895 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4896 }
4897 }
4898 }
4899 }
4900
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)4901 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
4902 for (uint32_t n = 9; n < 16; n++) {
4903 for (size_t k = 1; k <= 20; k += 5) {
4904 GemmMicrokernelTester()
4905 .mr(5)
4906 .nr(8)
4907 .kr(1)
4908 .sr(1)
4909 .m(5)
4910 .n(n)
4911 .k(k)
4912 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4913 }
4914 }
4915 }
4916
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)4917 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
4918 for (uint32_t n = 9; n < 16; n++) {
4919 for (size_t k = 1; k <= 20; k += 5) {
4920 GemmMicrokernelTester()
4921 .mr(5)
4922 .nr(8)
4923 .kr(1)
4924 .sr(1)
4925 .m(5)
4926 .n(n)
4927 .k(k)
4928 .cn_stride(11)
4929 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4930 }
4931 }
4932 }
4933
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)4934 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
4935 for (uint32_t n = 9; n < 16; n++) {
4936 for (size_t k = 1; k <= 20; k += 5) {
4937 GemmMicrokernelTester()
4938 .mr(5)
4939 .nr(8)
4940 .kr(1)
4941 .sr(1)
4942 .m(5)
4943 .n(n)
4944 .k(k)
4945 .a_stride(23)
4946 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4947 }
4948 }
4949 }
4950
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)4951 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
4952 for (uint32_t n = 9; n < 16; n++) {
4953 for (size_t k = 1; k <= 20; k += 5) {
4954 for (uint32_t m = 1; m <= 5; m++) {
4955 GemmMicrokernelTester()
4956 .mr(5)
4957 .nr(8)
4958 .kr(1)
4959 .sr(1)
4960 .m(m)
4961 .n(n)
4962 .k(k)
4963 .iterations(1)
4964 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4965 }
4966 }
4967 }
4968 }
4969
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)4970 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
4971 for (uint32_t n = 16; n <= 24; n += 8) {
4972 for (size_t k = 1; k <= 20; k += 5) {
4973 GemmMicrokernelTester()
4974 .mr(5)
4975 .nr(8)
4976 .kr(1)
4977 .sr(1)
4978 .m(5)
4979 .n(n)
4980 .k(k)
4981 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4982 }
4983 }
4984 }
4985
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)4986 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
4987 for (uint32_t n = 16; n <= 24; n += 8) {
4988 for (size_t k = 1; k <= 20; k += 5) {
4989 GemmMicrokernelTester()
4990 .mr(5)
4991 .nr(8)
4992 .kr(1)
4993 .sr(1)
4994 .m(5)
4995 .n(n)
4996 .k(k)
4997 .cn_stride(11)
4998 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4999 }
5000 }
5001 }
5002
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)5003 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
5004 for (uint32_t n = 16; n <= 24; n += 8) {
5005 for (size_t k = 1; k <= 20; k += 5) {
5006 GemmMicrokernelTester()
5007 .mr(5)
5008 .nr(8)
5009 .kr(1)
5010 .sr(1)
5011 .m(5)
5012 .n(n)
5013 .k(k)
5014 .a_stride(23)
5015 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5016 }
5017 }
5018 }
5019
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5020 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5021 for (uint32_t n = 16; n <= 24; n += 8) {
5022 for (size_t k = 1; k <= 20; k += 5) {
5023 for (uint32_t m = 1; m <= 5; m++) {
5024 GemmMicrokernelTester()
5025 .mr(5)
5026 .nr(8)
5027 .kr(1)
5028 .sr(1)
5029 .m(m)
5030 .n(n)
5031 .k(k)
5032 .iterations(1)
5033 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5034 }
5035 }
5036 }
5037 }
5038
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5039 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5040 for (size_t k = 1; k <= 20; k += 5) {
5041 for (uint32_t n = 1; n <= 8; n++) {
5042 for (uint32_t m = 1; m <= 5; m++) {
5043 GemmMicrokernelTester()
5044 .mr(5)
5045 .nr(8)
5046 .kr(1)
5047 .sr(1)
5048 .m(m)
5049 .n(n)
5050 .k(k)
5051 .cm_stride(11)
5052 .iterations(1)
5053 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5054 }
5055 }
5056 }
5057 }
5058
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)5059 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
5060 GemmMicrokernelTester()
5061 .mr(5)
5062 .nr(8)
5063 .kr(1)
5064 .sr(1)
5065 .m(5)
5066 .n(8)
5067 .k(4)
5068 .cm_stride(11)
5069 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5070 }
5071 #endif // XNN_ARCH_WASMRELAXEDSIMD
5072
5073
5074 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)5075 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
5076 GemmMicrokernelTester()
5077 .mr(5)
5078 .nr(8)
5079 .kr(1)
5080 .sr(4)
5081 .m(5)
5082 .n(8)
5083 .k(4)
5084 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5085 }
5086
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,strided_cn)5087 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
5088 GemmMicrokernelTester()
5089 .mr(5)
5090 .nr(8)
5091 .kr(1)
5092 .sr(4)
5093 .m(5)
5094 .n(8)
5095 .k(4)
5096 .cn_stride(11)
5097 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5098 }
5099
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)5100 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
5101 GemmMicrokernelTester()
5102 .mr(5)
5103 .nr(8)
5104 .kr(1)
5105 .sr(4)
5106 .m(5)
5107 .n(8)
5108 .k(4)
5109 .a_stride(7)
5110 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5111 }
5112
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)5113 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
5114 for (uint32_t n = 1; n <= 8; n++) {
5115 for (uint32_t m = 1; m <= 5; m++) {
5116 GemmMicrokernelTester()
5117 .mr(5)
5118 .nr(8)
5119 .kr(1)
5120 .sr(4)
5121 .m(m)
5122 .n(n)
5123 .k(4)
5124 .iterations(1)
5125 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5126 }
5127 }
5128 }
5129
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)5130 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
5131 for (uint32_t m = 1; m <= 5; m++) {
5132 GemmMicrokernelTester()
5133 .mr(5)
5134 .nr(8)
5135 .kr(1)
5136 .sr(4)
5137 .m(m)
5138 .n(8)
5139 .k(4)
5140 .iterations(1)
5141 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5142 }
5143 }
5144
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)5145 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
5146 for (uint32_t n = 1; n <= 8; n++) {
5147 GemmMicrokernelTester()
5148 .mr(5)
5149 .nr(8)
5150 .kr(1)
5151 .sr(4)
5152 .m(5)
5153 .n(n)
5154 .k(4)
5155 .iterations(1)
5156 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5157 }
5158 }
5159
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)5160 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
5161 for (size_t k = 1; k < 4; k++) {
5162 GemmMicrokernelTester()
5163 .mr(5)
5164 .nr(8)
5165 .kr(1)
5166 .sr(4)
5167 .m(5)
5168 .n(8)
5169 .k(k)
5170 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5171 }
5172 }
5173
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)5174 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
5175 for (size_t k = 1; k < 4; k++) {
5176 GemmMicrokernelTester()
5177 .mr(5)
5178 .nr(8)
5179 .kr(1)
5180 .sr(4)
5181 .m(5)
5182 .n(8)
5183 .k(k)
5184 .a_stride(7)
5185 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5186 }
5187 }
5188
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)5189 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
5190 for (size_t k = 1; k < 4; k++) {
5191 for (uint32_t n = 1; n <= 8; n++) {
5192 for (uint32_t m = 1; m <= 5; m++) {
5193 GemmMicrokernelTester()
5194 .mr(5)
5195 .nr(8)
5196 .kr(1)
5197 .sr(4)
5198 .m(m)
5199 .n(n)
5200 .k(k)
5201 .iterations(1)
5202 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5203 }
5204 }
5205 }
5206 }
5207
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)5208 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
5209 for (size_t k = 5; k < 8; k++) {
5210 GemmMicrokernelTester()
5211 .mr(5)
5212 .nr(8)
5213 .kr(1)
5214 .sr(4)
5215 .m(5)
5216 .n(8)
5217 .k(k)
5218 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5219 }
5220 }
5221
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)5222 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
5223 for (size_t k = 5; k < 8; k++) {
5224 GemmMicrokernelTester()
5225 .mr(5)
5226 .nr(8)
5227 .kr(1)
5228 .sr(4)
5229 .m(5)
5230 .n(8)
5231 .k(k)
5232 .a_stride(11)
5233 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5234 }
5235 }
5236
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)5237 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
5238 for (size_t k = 5; k < 8; k++) {
5239 for (uint32_t n = 1; n <= 8; n++) {
5240 for (uint32_t m = 1; m <= 5; m++) {
5241 GemmMicrokernelTester()
5242 .mr(5)
5243 .nr(8)
5244 .kr(1)
5245 .sr(4)
5246 .m(m)
5247 .n(n)
5248 .k(k)
5249 .iterations(1)
5250 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5251 }
5252 }
5253 }
5254 }
5255
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4)5256 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
5257 for (size_t k = 8; k <= 40; k += 4) {
5258 GemmMicrokernelTester()
5259 .mr(5)
5260 .nr(8)
5261 .kr(1)
5262 .sr(4)
5263 .m(5)
5264 .n(8)
5265 .k(k)
5266 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5267 }
5268 }
5269
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)5270 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
5271 for (size_t k = 8; k <= 40; k += 4) {
5272 GemmMicrokernelTester()
5273 .mr(5)
5274 .nr(8)
5275 .kr(1)
5276 .sr(4)
5277 .m(5)
5278 .n(8)
5279 .k(k)
5280 .a_stride(43)
5281 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5282 }
5283 }
5284
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)5285 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
5286 for (size_t k = 8; k <= 40; k += 4) {
5287 for (uint32_t n = 1; n <= 8; n++) {
5288 for (uint32_t m = 1; m <= 5; m++) {
5289 GemmMicrokernelTester()
5290 .mr(5)
5291 .nr(8)
5292 .kr(1)
5293 .sr(4)
5294 .m(m)
5295 .n(n)
5296 .k(k)
5297 .iterations(1)
5298 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5299 }
5300 }
5301 }
5302 }
5303
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)5304 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
5305 for (uint32_t n = 9; n < 16; n++) {
5306 for (size_t k = 1; k <= 20; k += 5) {
5307 GemmMicrokernelTester()
5308 .mr(5)
5309 .nr(8)
5310 .kr(1)
5311 .sr(4)
5312 .m(5)
5313 .n(n)
5314 .k(k)
5315 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5316 }
5317 }
5318 }
5319
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)5320 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
5321 for (uint32_t n = 9; n < 16; n++) {
5322 for (size_t k = 1; k <= 20; k += 5) {
5323 GemmMicrokernelTester()
5324 .mr(5)
5325 .nr(8)
5326 .kr(1)
5327 .sr(4)
5328 .m(5)
5329 .n(n)
5330 .k(k)
5331 .cn_stride(11)
5332 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5333 }
5334 }
5335 }
5336
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)5337 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
5338 for (uint32_t n = 9; n < 16; n++) {
5339 for (size_t k = 1; k <= 20; k += 5) {
5340 GemmMicrokernelTester()
5341 .mr(5)
5342 .nr(8)
5343 .kr(1)
5344 .sr(4)
5345 .m(5)
5346 .n(n)
5347 .k(k)
5348 .a_stride(23)
5349 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5350 }
5351 }
5352 }
5353
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)5354 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
5355 for (uint32_t n = 9; n < 16; n++) {
5356 for (size_t k = 1; k <= 20; k += 5) {
5357 for (uint32_t m = 1; m <= 5; m++) {
5358 GemmMicrokernelTester()
5359 .mr(5)
5360 .nr(8)
5361 .kr(1)
5362 .sr(4)
5363 .m(m)
5364 .n(n)
5365 .k(k)
5366 .iterations(1)
5367 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5368 }
5369 }
5370 }
5371 }
5372
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8)5373 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
5374 for (uint32_t n = 16; n <= 24; n += 8) {
5375 for (size_t k = 1; k <= 20; k += 5) {
5376 GemmMicrokernelTester()
5377 .mr(5)
5378 .nr(8)
5379 .kr(1)
5380 .sr(4)
5381 .m(5)
5382 .n(n)
5383 .k(k)
5384 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5385 }
5386 }
5387 }
5388
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)5389 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
5390 for (uint32_t n = 16; n <= 24; n += 8) {
5391 for (size_t k = 1; k <= 20; k += 5) {
5392 GemmMicrokernelTester()
5393 .mr(5)
5394 .nr(8)
5395 .kr(1)
5396 .sr(4)
5397 .m(5)
5398 .n(n)
5399 .k(k)
5400 .cn_stride(11)
5401 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5402 }
5403 }
5404 }
5405
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)5406 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
5407 for (uint32_t n = 16; n <= 24; n += 8) {
5408 for (size_t k = 1; k <= 20; k += 5) {
5409 GemmMicrokernelTester()
5410 .mr(5)
5411 .nr(8)
5412 .kr(1)
5413 .sr(4)
5414 .m(5)
5415 .n(n)
5416 .k(k)
5417 .a_stride(23)
5418 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5419 }
5420 }
5421 }
5422
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)5423 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
5424 for (uint32_t n = 16; n <= 24; n += 8) {
5425 for (size_t k = 1; k <= 20; k += 5) {
5426 for (uint32_t m = 1; m <= 5; m++) {
5427 GemmMicrokernelTester()
5428 .mr(5)
5429 .nr(8)
5430 .kr(1)
5431 .sr(4)
5432 .m(m)
5433 .n(n)
5434 .k(k)
5435 .iterations(1)
5436 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5437 }
5438 }
5439 }
5440 }
5441
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)5442 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
5443 for (size_t k = 1; k <= 20; k += 5) {
5444 for (uint32_t n = 1; n <= 8; n++) {
5445 for (uint32_t m = 1; m <= 5; m++) {
5446 GemmMicrokernelTester()
5447 .mr(5)
5448 .nr(8)
5449 .kr(1)
5450 .sr(4)
5451 .m(m)
5452 .n(n)
5453 .k(k)
5454 .cm_stride(11)
5455 .iterations(1)
5456 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5457 }
5458 }
5459 }
5460 }
5461
TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm)5462 TEST(F32_GEMM_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
5463 GemmMicrokernelTester()
5464 .mr(5)
5465 .nr(8)
5466 .kr(1)
5467 .sr(4)
5468 .m(5)
5469 .n(8)
5470 .k(4)
5471 .cm_stride(11)
5472 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
5473 }
5474 #endif // XNN_ARCH_WASMRELAXEDSIMD
5475
5476
5477 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)5478 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
5479 GemmMicrokernelTester()
5480 .mr(6)
5481 .nr(8)
5482 .kr(1)
5483 .sr(1)
5484 .m(6)
5485 .n(8)
5486 .k(4)
5487 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5488 }
5489
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)5490 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
5491 GemmMicrokernelTester()
5492 .mr(6)
5493 .nr(8)
5494 .kr(1)
5495 .sr(1)
5496 .m(6)
5497 .n(8)
5498 .k(4)
5499 .cn_stride(11)
5500 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5501 }
5502
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)5503 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
5504 GemmMicrokernelTester()
5505 .mr(6)
5506 .nr(8)
5507 .kr(1)
5508 .sr(1)
5509 .m(6)
5510 .n(8)
5511 .k(4)
5512 .a_stride(7)
5513 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5514 }
5515
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)5516 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
5517 for (uint32_t n = 1; n <= 8; n++) {
5518 for (uint32_t m = 1; m <= 6; m++) {
5519 GemmMicrokernelTester()
5520 .mr(6)
5521 .nr(8)
5522 .kr(1)
5523 .sr(1)
5524 .m(m)
5525 .n(n)
5526 .k(4)
5527 .iterations(1)
5528 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5529 }
5530 }
5531 }
5532
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)5533 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
5534 for (uint32_t m = 1; m <= 6; m++) {
5535 GemmMicrokernelTester()
5536 .mr(6)
5537 .nr(8)
5538 .kr(1)
5539 .sr(1)
5540 .m(m)
5541 .n(8)
5542 .k(4)
5543 .iterations(1)
5544 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5545 }
5546 }
5547
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)5548 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
5549 for (uint32_t n = 1; n <= 8; n++) {
5550 GemmMicrokernelTester()
5551 .mr(6)
5552 .nr(8)
5553 .kr(1)
5554 .sr(1)
5555 .m(6)
5556 .n(n)
5557 .k(4)
5558 .iterations(1)
5559 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5560 }
5561 }
5562
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)5563 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
5564 for (size_t k = 1; k < 4; k++) {
5565 GemmMicrokernelTester()
5566 .mr(6)
5567 .nr(8)
5568 .kr(1)
5569 .sr(1)
5570 .m(6)
5571 .n(8)
5572 .k(k)
5573 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5574 }
5575 }
5576
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)5577 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
5578 for (size_t k = 1; k < 4; k++) {
5579 GemmMicrokernelTester()
5580 .mr(6)
5581 .nr(8)
5582 .kr(1)
5583 .sr(1)
5584 .m(6)
5585 .n(8)
5586 .k(k)
5587 .a_stride(7)
5588 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5589 }
5590 }
5591
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)5592 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
5593 for (size_t k = 1; k < 4; k++) {
5594 for (uint32_t n = 1; n <= 8; n++) {
5595 for (uint32_t m = 1; m <= 6; m++) {
5596 GemmMicrokernelTester()
5597 .mr(6)
5598 .nr(8)
5599 .kr(1)
5600 .sr(1)
5601 .m(m)
5602 .n(n)
5603 .k(k)
5604 .iterations(1)
5605 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5606 }
5607 }
5608 }
5609 }
5610
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)5611 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
5612 for (size_t k = 5; k < 8; k++) {
5613 GemmMicrokernelTester()
5614 .mr(6)
5615 .nr(8)
5616 .kr(1)
5617 .sr(1)
5618 .m(6)
5619 .n(8)
5620 .k(k)
5621 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5622 }
5623 }
5624
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)5625 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
5626 for (size_t k = 5; k < 8; k++) {
5627 GemmMicrokernelTester()
5628 .mr(6)
5629 .nr(8)
5630 .kr(1)
5631 .sr(1)
5632 .m(6)
5633 .n(8)
5634 .k(k)
5635 .a_stride(11)
5636 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5637 }
5638 }
5639
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)5640 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
5641 for (size_t k = 5; k < 8; k++) {
5642 for (uint32_t n = 1; n <= 8; n++) {
5643 for (uint32_t m = 1; m <= 6; m++) {
5644 GemmMicrokernelTester()
5645 .mr(6)
5646 .nr(8)
5647 .kr(1)
5648 .sr(1)
5649 .m(m)
5650 .n(n)
5651 .k(k)
5652 .iterations(1)
5653 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5654 }
5655 }
5656 }
5657 }
5658
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)5659 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
5660 for (size_t k = 8; k <= 40; k += 4) {
5661 GemmMicrokernelTester()
5662 .mr(6)
5663 .nr(8)
5664 .kr(1)
5665 .sr(1)
5666 .m(6)
5667 .n(8)
5668 .k(k)
5669 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5670 }
5671 }
5672
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)5673 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
5674 for (size_t k = 8; k <= 40; k += 4) {
5675 GemmMicrokernelTester()
5676 .mr(6)
5677 .nr(8)
5678 .kr(1)
5679 .sr(1)
5680 .m(6)
5681 .n(8)
5682 .k(k)
5683 .a_stride(43)
5684 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5685 }
5686 }
5687
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)5688 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
5689 for (size_t k = 8; k <= 40; k += 4) {
5690 for (uint32_t n = 1; n <= 8; n++) {
5691 for (uint32_t m = 1; m <= 6; m++) {
5692 GemmMicrokernelTester()
5693 .mr(6)
5694 .nr(8)
5695 .kr(1)
5696 .sr(1)
5697 .m(m)
5698 .n(n)
5699 .k(k)
5700 .iterations(1)
5701 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5702 }
5703 }
5704 }
5705 }
5706
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)5707 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
5708 for (uint32_t n = 9; n < 16; n++) {
5709 for (size_t k = 1; k <= 20; k += 5) {
5710 GemmMicrokernelTester()
5711 .mr(6)
5712 .nr(8)
5713 .kr(1)
5714 .sr(1)
5715 .m(6)
5716 .n(n)
5717 .k(k)
5718 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5719 }
5720 }
5721 }
5722
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)5723 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
5724 for (uint32_t n = 9; n < 16; n++) {
5725 for (size_t k = 1; k <= 20; k += 5) {
5726 GemmMicrokernelTester()
5727 .mr(6)
5728 .nr(8)
5729 .kr(1)
5730 .sr(1)
5731 .m(6)
5732 .n(n)
5733 .k(k)
5734 .cn_stride(11)
5735 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5736 }
5737 }
5738 }
5739
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)5740 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
5741 for (uint32_t n = 9; n < 16; n++) {
5742 for (size_t k = 1; k <= 20; k += 5) {
5743 GemmMicrokernelTester()
5744 .mr(6)
5745 .nr(8)
5746 .kr(1)
5747 .sr(1)
5748 .m(6)
5749 .n(n)
5750 .k(k)
5751 .a_stride(23)
5752 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5753 }
5754 }
5755 }
5756
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)5757 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
5758 for (uint32_t n = 9; n < 16; n++) {
5759 for (size_t k = 1; k <= 20; k += 5) {
5760 for (uint32_t m = 1; m <= 6; m++) {
5761 GemmMicrokernelTester()
5762 .mr(6)
5763 .nr(8)
5764 .kr(1)
5765 .sr(1)
5766 .m(m)
5767 .n(n)
5768 .k(k)
5769 .iterations(1)
5770 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5771 }
5772 }
5773 }
5774 }
5775
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)5776 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
5777 for (uint32_t n = 16; n <= 24; n += 8) {
5778 for (size_t k = 1; k <= 20; k += 5) {
5779 GemmMicrokernelTester()
5780 .mr(6)
5781 .nr(8)
5782 .kr(1)
5783 .sr(1)
5784 .m(6)
5785 .n(n)
5786 .k(k)
5787 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5788 }
5789 }
5790 }
5791
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)5792 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
5793 for (uint32_t n = 16; n <= 24; n += 8) {
5794 for (size_t k = 1; k <= 20; k += 5) {
5795 GemmMicrokernelTester()
5796 .mr(6)
5797 .nr(8)
5798 .kr(1)
5799 .sr(1)
5800 .m(6)
5801 .n(n)
5802 .k(k)
5803 .cn_stride(11)
5804 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5805 }
5806 }
5807 }
5808
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)5809 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
5810 for (uint32_t n = 16; n <= 24; n += 8) {
5811 for (size_t k = 1; k <= 20; k += 5) {
5812 GemmMicrokernelTester()
5813 .mr(6)
5814 .nr(8)
5815 .kr(1)
5816 .sr(1)
5817 .m(6)
5818 .n(n)
5819 .k(k)
5820 .a_stride(23)
5821 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5822 }
5823 }
5824 }
5825
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5826 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5827 for (uint32_t n = 16; n <= 24; n += 8) {
5828 for (size_t k = 1; k <= 20; k += 5) {
5829 for (uint32_t m = 1; m <= 6; m++) {
5830 GemmMicrokernelTester()
5831 .mr(6)
5832 .nr(8)
5833 .kr(1)
5834 .sr(1)
5835 .m(m)
5836 .n(n)
5837 .k(k)
5838 .iterations(1)
5839 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5840 }
5841 }
5842 }
5843 }
5844
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5845 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5846 for (size_t k = 1; k <= 20; k += 5) {
5847 for (uint32_t n = 1; n <= 8; n++) {
5848 for (uint32_t m = 1; m <= 6; m++) {
5849 GemmMicrokernelTester()
5850 .mr(6)
5851 .nr(8)
5852 .kr(1)
5853 .sr(1)
5854 .m(m)
5855 .n(n)
5856 .k(k)
5857 .cm_stride(11)
5858 .iterations(1)
5859 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5860 }
5861 }
5862 }
5863 }
5864
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)5865 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
5866 GemmMicrokernelTester()
5867 .mr(6)
5868 .nr(8)
5869 .kr(1)
5870 .sr(1)
5871 .m(6)
5872 .n(8)
5873 .k(4)
5874 .cm_stride(11)
5875 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5876 }
5877 #endif // XNN_ARCH_WASMRELAXEDSIMD
5878
5879
5880 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)5881 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
5882 GemmMicrokernelTester()
5883 .mr(6)
5884 .nr(8)
5885 .kr(1)
5886 .sr(4)
5887 .m(6)
5888 .n(8)
5889 .k(4)
5890 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5891 }
5892
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,strided_cn)5893 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
5894 GemmMicrokernelTester()
5895 .mr(6)
5896 .nr(8)
5897 .kr(1)
5898 .sr(4)
5899 .m(6)
5900 .n(8)
5901 .k(4)
5902 .cn_stride(11)
5903 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5904 }
5905
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)5906 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
5907 GemmMicrokernelTester()
5908 .mr(6)
5909 .nr(8)
5910 .kr(1)
5911 .sr(4)
5912 .m(6)
5913 .n(8)
5914 .k(4)
5915 .a_stride(7)
5916 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5917 }
5918
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)5919 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
5920 for (uint32_t n = 1; n <= 8; n++) {
5921 for (uint32_t m = 1; m <= 6; m++) {
5922 GemmMicrokernelTester()
5923 .mr(6)
5924 .nr(8)
5925 .kr(1)
5926 .sr(4)
5927 .m(m)
5928 .n(n)
5929 .k(4)
5930 .iterations(1)
5931 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5932 }
5933 }
5934 }
5935
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)5936 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
5937 for (uint32_t m = 1; m <= 6; m++) {
5938 GemmMicrokernelTester()
5939 .mr(6)
5940 .nr(8)
5941 .kr(1)
5942 .sr(4)
5943 .m(m)
5944 .n(8)
5945 .k(4)
5946 .iterations(1)
5947 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5948 }
5949 }
5950
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)5951 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
5952 for (uint32_t n = 1; n <= 8; n++) {
5953 GemmMicrokernelTester()
5954 .mr(6)
5955 .nr(8)
5956 .kr(1)
5957 .sr(4)
5958 .m(6)
5959 .n(n)
5960 .k(4)
5961 .iterations(1)
5962 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5963 }
5964 }
5965
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)5966 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
5967 for (size_t k = 1; k < 4; k++) {
5968 GemmMicrokernelTester()
5969 .mr(6)
5970 .nr(8)
5971 .kr(1)
5972 .sr(4)
5973 .m(6)
5974 .n(8)
5975 .k(k)
5976 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5977 }
5978 }
5979
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)5980 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
5981 for (size_t k = 1; k < 4; k++) {
5982 GemmMicrokernelTester()
5983 .mr(6)
5984 .nr(8)
5985 .kr(1)
5986 .sr(4)
5987 .m(6)
5988 .n(8)
5989 .k(k)
5990 .a_stride(7)
5991 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
5992 }
5993 }
5994
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)5995 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
5996 for (size_t k = 1; k < 4; k++) {
5997 for (uint32_t n = 1; n <= 8; n++) {
5998 for (uint32_t m = 1; m <= 6; m++) {
5999 GemmMicrokernelTester()
6000 .mr(6)
6001 .nr(8)
6002 .kr(1)
6003 .sr(4)
6004 .m(m)
6005 .n(n)
6006 .k(k)
6007 .iterations(1)
6008 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6009 }
6010 }
6011 }
6012 }
6013
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)6014 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
6015 for (size_t k = 5; k < 8; k++) {
6016 GemmMicrokernelTester()
6017 .mr(6)
6018 .nr(8)
6019 .kr(1)
6020 .sr(4)
6021 .m(6)
6022 .n(8)
6023 .k(k)
6024 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6025 }
6026 }
6027
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)6028 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
6029 for (size_t k = 5; k < 8; k++) {
6030 GemmMicrokernelTester()
6031 .mr(6)
6032 .nr(8)
6033 .kr(1)
6034 .sr(4)
6035 .m(6)
6036 .n(8)
6037 .k(k)
6038 .a_stride(11)
6039 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6040 }
6041 }
6042
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)6043 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
6044 for (size_t k = 5; k < 8; k++) {
6045 for (uint32_t n = 1; n <= 8; n++) {
6046 for (uint32_t m = 1; m <= 6; m++) {
6047 GemmMicrokernelTester()
6048 .mr(6)
6049 .nr(8)
6050 .kr(1)
6051 .sr(4)
6052 .m(m)
6053 .n(n)
6054 .k(k)
6055 .iterations(1)
6056 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6057 }
6058 }
6059 }
6060 }
6061
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4)6062 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
6063 for (size_t k = 8; k <= 40; k += 4) {
6064 GemmMicrokernelTester()
6065 .mr(6)
6066 .nr(8)
6067 .kr(1)
6068 .sr(4)
6069 .m(6)
6070 .n(8)
6071 .k(k)
6072 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6073 }
6074 }
6075
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)6076 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
6077 for (size_t k = 8; k <= 40; k += 4) {
6078 GemmMicrokernelTester()
6079 .mr(6)
6080 .nr(8)
6081 .kr(1)
6082 .sr(4)
6083 .m(6)
6084 .n(8)
6085 .k(k)
6086 .a_stride(43)
6087 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6088 }
6089 }
6090
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)6091 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
6092 for (size_t k = 8; k <= 40; k += 4) {
6093 for (uint32_t n = 1; n <= 8; n++) {
6094 for (uint32_t m = 1; m <= 6; m++) {
6095 GemmMicrokernelTester()
6096 .mr(6)
6097 .nr(8)
6098 .kr(1)
6099 .sr(4)
6100 .m(m)
6101 .n(n)
6102 .k(k)
6103 .iterations(1)
6104 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6105 }
6106 }
6107 }
6108 }
6109
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)6110 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
6111 for (uint32_t n = 9; n < 16; n++) {
6112 for (size_t k = 1; k <= 20; k += 5) {
6113 GemmMicrokernelTester()
6114 .mr(6)
6115 .nr(8)
6116 .kr(1)
6117 .sr(4)
6118 .m(6)
6119 .n(n)
6120 .k(k)
6121 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6122 }
6123 }
6124 }
6125
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)6126 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
6127 for (uint32_t n = 9; n < 16; n++) {
6128 for (size_t k = 1; k <= 20; k += 5) {
6129 GemmMicrokernelTester()
6130 .mr(6)
6131 .nr(8)
6132 .kr(1)
6133 .sr(4)
6134 .m(6)
6135 .n(n)
6136 .k(k)
6137 .cn_stride(11)
6138 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6139 }
6140 }
6141 }
6142
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)6143 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
6144 for (uint32_t n = 9; n < 16; n++) {
6145 for (size_t k = 1; k <= 20; k += 5) {
6146 GemmMicrokernelTester()
6147 .mr(6)
6148 .nr(8)
6149 .kr(1)
6150 .sr(4)
6151 .m(6)
6152 .n(n)
6153 .k(k)
6154 .a_stride(23)
6155 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6156 }
6157 }
6158 }
6159
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)6160 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
6161 for (uint32_t n = 9; n < 16; n++) {
6162 for (size_t k = 1; k <= 20; k += 5) {
6163 for (uint32_t m = 1; m <= 6; m++) {
6164 GemmMicrokernelTester()
6165 .mr(6)
6166 .nr(8)
6167 .kr(1)
6168 .sr(4)
6169 .m(m)
6170 .n(n)
6171 .k(k)
6172 .iterations(1)
6173 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6174 }
6175 }
6176 }
6177 }
6178
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8)6179 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
6180 for (uint32_t n = 16; n <= 24; n += 8) {
6181 for (size_t k = 1; k <= 20; k += 5) {
6182 GemmMicrokernelTester()
6183 .mr(6)
6184 .nr(8)
6185 .kr(1)
6186 .sr(4)
6187 .m(6)
6188 .n(n)
6189 .k(k)
6190 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6191 }
6192 }
6193 }
6194
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)6195 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
6196 for (uint32_t n = 16; n <= 24; n += 8) {
6197 for (size_t k = 1; k <= 20; k += 5) {
6198 GemmMicrokernelTester()
6199 .mr(6)
6200 .nr(8)
6201 .kr(1)
6202 .sr(4)
6203 .m(6)
6204 .n(n)
6205 .k(k)
6206 .cn_stride(11)
6207 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6208 }
6209 }
6210 }
6211
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)6212 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
6213 for (uint32_t n = 16; n <= 24; n += 8) {
6214 for (size_t k = 1; k <= 20; k += 5) {
6215 GemmMicrokernelTester()
6216 .mr(6)
6217 .nr(8)
6218 .kr(1)
6219 .sr(4)
6220 .m(6)
6221 .n(n)
6222 .k(k)
6223 .a_stride(23)
6224 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6225 }
6226 }
6227 }
6228
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)6229 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
6230 for (uint32_t n = 16; n <= 24; n += 8) {
6231 for (size_t k = 1; k <= 20; k += 5) {
6232 for (uint32_t m = 1; m <= 6; m++) {
6233 GemmMicrokernelTester()
6234 .mr(6)
6235 .nr(8)
6236 .kr(1)
6237 .sr(4)
6238 .m(m)
6239 .n(n)
6240 .k(k)
6241 .iterations(1)
6242 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6243 }
6244 }
6245 }
6246 }
6247
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)6248 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
6249 for (size_t k = 1; k <= 20; k += 5) {
6250 for (uint32_t n = 1; n <= 8; n++) {
6251 for (uint32_t m = 1; m <= 6; m++) {
6252 GemmMicrokernelTester()
6253 .mr(6)
6254 .nr(8)
6255 .kr(1)
6256 .sr(4)
6257 .m(m)
6258 .n(n)
6259 .k(k)
6260 .cm_stride(11)
6261 .iterations(1)
6262 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6263 }
6264 }
6265 }
6266 }
6267
TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm)6268 TEST(F32_GEMM_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
6269 GemmMicrokernelTester()
6270 .mr(6)
6271 .nr(8)
6272 .kr(1)
6273 .sr(4)
6274 .m(6)
6275 .n(8)
6276 .k(4)
6277 .cm_stride(11)
6278 .Test(xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6279 }
6280 #endif // XNN_ARCH_WASMRELAXEDSIMD
6281
6282
TEST(F32_GEMM_1X4__SCALAR,k_eq_1)6283 TEST(F32_GEMM_1X4__SCALAR, k_eq_1) {
6284 GemmMicrokernelTester()
6285 .mr(1)
6286 .nr(4)
6287 .kr(1)
6288 .sr(1)
6289 .m(1)
6290 .n(4)
6291 .k(1)
6292 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6293 }
6294
TEST(F32_GEMM_1X4__SCALAR,strided_cn)6295 TEST(F32_GEMM_1X4__SCALAR, strided_cn) {
6296 GemmMicrokernelTester()
6297 .mr(1)
6298 .nr(4)
6299 .kr(1)
6300 .sr(1)
6301 .m(1)
6302 .n(4)
6303 .k(1)
6304 .cn_stride(7)
6305 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6306 }
6307
TEST(F32_GEMM_1X4__SCALAR,k_eq_1_strided_a)6308 TEST(F32_GEMM_1X4__SCALAR, k_eq_1_strided_a) {
6309 GemmMicrokernelTester()
6310 .mr(1)
6311 .nr(4)
6312 .kr(1)
6313 .sr(1)
6314 .m(1)
6315 .n(4)
6316 .k(1)
6317 .a_stride(3)
6318 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6319 }
6320
TEST(F32_GEMM_1X4__SCALAR,k_eq_1_subtile)6321 TEST(F32_GEMM_1X4__SCALAR, k_eq_1_subtile) {
6322 for (uint32_t n = 1; n <= 4; n++) {
6323 for (uint32_t m = 1; m <= 1; m++) {
6324 GemmMicrokernelTester()
6325 .mr(1)
6326 .nr(4)
6327 .kr(1)
6328 .sr(1)
6329 .m(m)
6330 .n(n)
6331 .k(1)
6332 .iterations(1)
6333 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6334 }
6335 }
6336 }
6337
TEST(F32_GEMM_1X4__SCALAR,k_eq_1_subtile_m)6338 TEST(F32_GEMM_1X4__SCALAR, k_eq_1_subtile_m) {
6339 for (uint32_t m = 1; m <= 1; m++) {
6340 GemmMicrokernelTester()
6341 .mr(1)
6342 .nr(4)
6343 .kr(1)
6344 .sr(1)
6345 .m(m)
6346 .n(4)
6347 .k(1)
6348 .iterations(1)
6349 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6350 }
6351 }
6352
TEST(F32_GEMM_1X4__SCALAR,k_eq_1_subtile_n)6353 TEST(F32_GEMM_1X4__SCALAR, k_eq_1_subtile_n) {
6354 for (uint32_t n = 1; n <= 4; n++) {
6355 GemmMicrokernelTester()
6356 .mr(1)
6357 .nr(4)
6358 .kr(1)
6359 .sr(1)
6360 .m(1)
6361 .n(n)
6362 .k(1)
6363 .iterations(1)
6364 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6365 }
6366 }
6367
TEST(F32_GEMM_1X4__SCALAR,k_gt_1)6368 TEST(F32_GEMM_1X4__SCALAR, k_gt_1) {
6369 for (size_t k = 2; k < 10; k++) {
6370 GemmMicrokernelTester()
6371 .mr(1)
6372 .nr(4)
6373 .kr(1)
6374 .sr(1)
6375 .m(1)
6376 .n(4)
6377 .k(k)
6378 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6379 }
6380 }
6381
TEST(F32_GEMM_1X4__SCALAR,k_gt_1_strided_a)6382 TEST(F32_GEMM_1X4__SCALAR, k_gt_1_strided_a) {
6383 for (size_t k = 2; k < 10; k++) {
6384 GemmMicrokernelTester()
6385 .mr(1)
6386 .nr(4)
6387 .kr(1)
6388 .sr(1)
6389 .m(1)
6390 .n(4)
6391 .k(k)
6392 .a_stride(11)
6393 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6394 }
6395 }
6396
TEST(F32_GEMM_1X4__SCALAR,k_gt_1_subtile)6397 TEST(F32_GEMM_1X4__SCALAR, k_gt_1_subtile) {
6398 for (size_t k = 2; k < 10; k++) {
6399 for (uint32_t n = 1; n <= 4; n++) {
6400 for (uint32_t m = 1; m <= 1; m++) {
6401 GemmMicrokernelTester()
6402 .mr(1)
6403 .nr(4)
6404 .kr(1)
6405 .sr(1)
6406 .m(m)
6407 .n(n)
6408 .k(k)
6409 .iterations(1)
6410 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6411 }
6412 }
6413 }
6414 }
6415
TEST(F32_GEMM_1X4__SCALAR,n_gt_4)6416 TEST(F32_GEMM_1X4__SCALAR, n_gt_4) {
6417 for (uint32_t n = 5; n < 8; n++) {
6418 for (size_t k = 1; k <= 5; k += 2) {
6419 GemmMicrokernelTester()
6420 .mr(1)
6421 .nr(4)
6422 .kr(1)
6423 .sr(1)
6424 .m(1)
6425 .n(n)
6426 .k(k)
6427 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6428 }
6429 }
6430 }
6431
TEST(F32_GEMM_1X4__SCALAR,n_gt_4_strided_cn)6432 TEST(F32_GEMM_1X4__SCALAR, n_gt_4_strided_cn) {
6433 for (uint32_t n = 5; n < 8; n++) {
6434 for (size_t k = 1; k <= 5; k += 2) {
6435 GemmMicrokernelTester()
6436 .mr(1)
6437 .nr(4)
6438 .kr(1)
6439 .sr(1)
6440 .m(1)
6441 .n(n)
6442 .k(k)
6443 .cn_stride(7)
6444 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6445 }
6446 }
6447 }
6448
TEST(F32_GEMM_1X4__SCALAR,n_gt_4_strided_a)6449 TEST(F32_GEMM_1X4__SCALAR, n_gt_4_strided_a) {
6450 for (uint32_t n = 5; n < 8; n++) {
6451 for (size_t k = 1; k <= 5; k += 2) {
6452 GemmMicrokernelTester()
6453 .mr(1)
6454 .nr(4)
6455 .kr(1)
6456 .sr(1)
6457 .m(1)
6458 .n(n)
6459 .k(k)
6460 .a_stride(7)
6461 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6462 }
6463 }
6464 }
6465
TEST(F32_GEMM_1X4__SCALAR,n_gt_4_subtile)6466 TEST(F32_GEMM_1X4__SCALAR, n_gt_4_subtile) {
6467 for (uint32_t n = 5; n < 8; n++) {
6468 for (size_t k = 1; k <= 5; k += 2) {
6469 for (uint32_t m = 1; m <= 1; m++) {
6470 GemmMicrokernelTester()
6471 .mr(1)
6472 .nr(4)
6473 .kr(1)
6474 .sr(1)
6475 .m(m)
6476 .n(n)
6477 .k(k)
6478 .iterations(1)
6479 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6480 }
6481 }
6482 }
6483 }
6484
TEST(F32_GEMM_1X4__SCALAR,n_div_4)6485 TEST(F32_GEMM_1X4__SCALAR, n_div_4) {
6486 for (uint32_t n = 8; n <= 12; n += 4) {
6487 for (size_t k = 1; k <= 5; k += 2) {
6488 GemmMicrokernelTester()
6489 .mr(1)
6490 .nr(4)
6491 .kr(1)
6492 .sr(1)
6493 .m(1)
6494 .n(n)
6495 .k(k)
6496 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6497 }
6498 }
6499 }
6500
TEST(F32_GEMM_1X4__SCALAR,n_div_4_strided_cn)6501 TEST(F32_GEMM_1X4__SCALAR, n_div_4_strided_cn) {
6502 for (uint32_t n = 8; n <= 12; n += 4) {
6503 for (size_t k = 1; k <= 5; k += 2) {
6504 GemmMicrokernelTester()
6505 .mr(1)
6506 .nr(4)
6507 .kr(1)
6508 .sr(1)
6509 .m(1)
6510 .n(n)
6511 .k(k)
6512 .cn_stride(7)
6513 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6514 }
6515 }
6516 }
6517
TEST(F32_GEMM_1X4__SCALAR,n_div_4_strided_a)6518 TEST(F32_GEMM_1X4__SCALAR, n_div_4_strided_a) {
6519 for (uint32_t n = 8; n <= 12; n += 4) {
6520 for (size_t k = 1; k <= 5; k += 2) {
6521 GemmMicrokernelTester()
6522 .mr(1)
6523 .nr(4)
6524 .kr(1)
6525 .sr(1)
6526 .m(1)
6527 .n(n)
6528 .k(k)
6529 .a_stride(7)
6530 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6531 }
6532 }
6533 }
6534
TEST(F32_GEMM_1X4__SCALAR,n_div_4_subtile)6535 TEST(F32_GEMM_1X4__SCALAR, n_div_4_subtile) {
6536 for (uint32_t n = 8; n <= 12; n += 4) {
6537 for (size_t k = 1; k <= 5; k += 2) {
6538 for (uint32_t m = 1; m <= 1; m++) {
6539 GemmMicrokernelTester()
6540 .mr(1)
6541 .nr(4)
6542 .kr(1)
6543 .sr(1)
6544 .m(m)
6545 .n(n)
6546 .k(k)
6547 .iterations(1)
6548 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6549 }
6550 }
6551 }
6552 }
6553
TEST(F32_GEMM_1X4__SCALAR,strided_cm_subtile)6554 TEST(F32_GEMM_1X4__SCALAR, strided_cm_subtile) {
6555 for (size_t k = 1; k <= 5; k += 2) {
6556 for (uint32_t n = 1; n <= 4; n++) {
6557 for (uint32_t m = 1; m <= 1; m++) {
6558 GemmMicrokernelTester()
6559 .mr(1)
6560 .nr(4)
6561 .kr(1)
6562 .sr(1)
6563 .m(m)
6564 .n(n)
6565 .k(k)
6566 .cm_stride(7)
6567 .iterations(1)
6568 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6569 }
6570 }
6571 }
6572 }
6573
TEST(F32_GEMM_1X4__SCALAR,strided_cm)6574 TEST(F32_GEMM_1X4__SCALAR, strided_cm) {
6575 GemmMicrokernelTester()
6576 .mr(1)
6577 .nr(4)
6578 .kr(1)
6579 .sr(1)
6580 .m(1)
6581 .n(4)
6582 .k(1)
6583 .cm_stride(7)
6584 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
6585 }
6586
6587
TEST(F32_GEMM_4X2__SCALAR,k_eq_1)6588 TEST(F32_GEMM_4X2__SCALAR, k_eq_1) {
6589 GemmMicrokernelTester()
6590 .mr(4)
6591 .nr(2)
6592 .kr(1)
6593 .sr(1)
6594 .m(4)
6595 .n(2)
6596 .k(1)
6597 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6598 }
6599
TEST(F32_GEMM_4X2__SCALAR,strided_cn)6600 TEST(F32_GEMM_4X2__SCALAR, strided_cn) {
6601 GemmMicrokernelTester()
6602 .mr(4)
6603 .nr(2)
6604 .kr(1)
6605 .sr(1)
6606 .m(4)
6607 .n(2)
6608 .k(1)
6609 .cn_stride(5)
6610 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6611 }
6612
TEST(F32_GEMM_4X2__SCALAR,k_eq_1_strided_a)6613 TEST(F32_GEMM_4X2__SCALAR, k_eq_1_strided_a) {
6614 GemmMicrokernelTester()
6615 .mr(4)
6616 .nr(2)
6617 .kr(1)
6618 .sr(1)
6619 .m(4)
6620 .n(2)
6621 .k(1)
6622 .a_stride(3)
6623 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6624 }
6625
TEST(F32_GEMM_4X2__SCALAR,k_eq_1_subtile)6626 TEST(F32_GEMM_4X2__SCALAR, k_eq_1_subtile) {
6627 for (uint32_t n = 1; n <= 2; n++) {
6628 for (uint32_t m = 1; m <= 4; m++) {
6629 GemmMicrokernelTester()
6630 .mr(4)
6631 .nr(2)
6632 .kr(1)
6633 .sr(1)
6634 .m(m)
6635 .n(n)
6636 .k(1)
6637 .iterations(1)
6638 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6639 }
6640 }
6641 }
6642
TEST(F32_GEMM_4X2__SCALAR,k_eq_1_subtile_m)6643 TEST(F32_GEMM_4X2__SCALAR, k_eq_1_subtile_m) {
6644 for (uint32_t m = 1; m <= 4; m++) {
6645 GemmMicrokernelTester()
6646 .mr(4)
6647 .nr(2)
6648 .kr(1)
6649 .sr(1)
6650 .m(m)
6651 .n(2)
6652 .k(1)
6653 .iterations(1)
6654 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6655 }
6656 }
6657
TEST(F32_GEMM_4X2__SCALAR,k_eq_1_subtile_n)6658 TEST(F32_GEMM_4X2__SCALAR, k_eq_1_subtile_n) {
6659 for (uint32_t n = 1; n <= 2; n++) {
6660 GemmMicrokernelTester()
6661 .mr(4)
6662 .nr(2)
6663 .kr(1)
6664 .sr(1)
6665 .m(4)
6666 .n(n)
6667 .k(1)
6668 .iterations(1)
6669 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6670 }
6671 }
6672
TEST(F32_GEMM_4X2__SCALAR,k_gt_1)6673 TEST(F32_GEMM_4X2__SCALAR, k_gt_1) {
6674 for (size_t k = 2; k < 10; k++) {
6675 GemmMicrokernelTester()
6676 .mr(4)
6677 .nr(2)
6678 .kr(1)
6679 .sr(1)
6680 .m(4)
6681 .n(2)
6682 .k(k)
6683 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6684 }
6685 }
6686
TEST(F32_GEMM_4X2__SCALAR,k_gt_1_strided_a)6687 TEST(F32_GEMM_4X2__SCALAR, k_gt_1_strided_a) {
6688 for (size_t k = 2; k < 10; k++) {
6689 GemmMicrokernelTester()
6690 .mr(4)
6691 .nr(2)
6692 .kr(1)
6693 .sr(1)
6694 .m(4)
6695 .n(2)
6696 .k(k)
6697 .a_stride(11)
6698 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6699 }
6700 }
6701
TEST(F32_GEMM_4X2__SCALAR,k_gt_1_subtile)6702 TEST(F32_GEMM_4X2__SCALAR, k_gt_1_subtile) {
6703 for (size_t k = 2; k < 10; k++) {
6704 for (uint32_t n = 1; n <= 2; n++) {
6705 for (uint32_t m = 1; m <= 4; m++) {
6706 GemmMicrokernelTester()
6707 .mr(4)
6708 .nr(2)
6709 .kr(1)
6710 .sr(1)
6711 .m(m)
6712 .n(n)
6713 .k(k)
6714 .iterations(1)
6715 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6716 }
6717 }
6718 }
6719 }
6720
TEST(F32_GEMM_4X2__SCALAR,n_gt_2)6721 TEST(F32_GEMM_4X2__SCALAR, n_gt_2) {
6722 for (uint32_t n = 3; n < 4; n++) {
6723 for (size_t k = 1; k <= 5; k += 2) {
6724 GemmMicrokernelTester()
6725 .mr(4)
6726 .nr(2)
6727 .kr(1)
6728 .sr(1)
6729 .m(4)
6730 .n(n)
6731 .k(k)
6732 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6733 }
6734 }
6735 }
6736
TEST(F32_GEMM_4X2__SCALAR,n_gt_2_strided_cn)6737 TEST(F32_GEMM_4X2__SCALAR, n_gt_2_strided_cn) {
6738 for (uint32_t n = 3; n < 4; n++) {
6739 for (size_t k = 1; k <= 5; k += 2) {
6740 GemmMicrokernelTester()
6741 .mr(4)
6742 .nr(2)
6743 .kr(1)
6744 .sr(1)
6745 .m(4)
6746 .n(n)
6747 .k(k)
6748 .cn_stride(5)
6749 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6750 }
6751 }
6752 }
6753
TEST(F32_GEMM_4X2__SCALAR,n_gt_2_strided_a)6754 TEST(F32_GEMM_4X2__SCALAR, n_gt_2_strided_a) {
6755 for (uint32_t n = 3; n < 4; n++) {
6756 for (size_t k = 1; k <= 5; k += 2) {
6757 GemmMicrokernelTester()
6758 .mr(4)
6759 .nr(2)
6760 .kr(1)
6761 .sr(1)
6762 .m(4)
6763 .n(n)
6764 .k(k)
6765 .a_stride(7)
6766 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6767 }
6768 }
6769 }
6770
TEST(F32_GEMM_4X2__SCALAR,n_gt_2_subtile)6771 TEST(F32_GEMM_4X2__SCALAR, n_gt_2_subtile) {
6772 for (uint32_t n = 3; n < 4; n++) {
6773 for (size_t k = 1; k <= 5; k += 2) {
6774 for (uint32_t m = 1; m <= 4; m++) {
6775 GemmMicrokernelTester()
6776 .mr(4)
6777 .nr(2)
6778 .kr(1)
6779 .sr(1)
6780 .m(m)
6781 .n(n)
6782 .k(k)
6783 .iterations(1)
6784 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6785 }
6786 }
6787 }
6788 }
6789
TEST(F32_GEMM_4X2__SCALAR,n_div_2)6790 TEST(F32_GEMM_4X2__SCALAR, n_div_2) {
6791 for (uint32_t n = 4; n <= 6; n += 2) {
6792 for (size_t k = 1; k <= 5; k += 2) {
6793 GemmMicrokernelTester()
6794 .mr(4)
6795 .nr(2)
6796 .kr(1)
6797 .sr(1)
6798 .m(4)
6799 .n(n)
6800 .k(k)
6801 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6802 }
6803 }
6804 }
6805
TEST(F32_GEMM_4X2__SCALAR,n_div_2_strided_cn)6806 TEST(F32_GEMM_4X2__SCALAR, n_div_2_strided_cn) {
6807 for (uint32_t n = 4; n <= 6; n += 2) {
6808 for (size_t k = 1; k <= 5; k += 2) {
6809 GemmMicrokernelTester()
6810 .mr(4)
6811 .nr(2)
6812 .kr(1)
6813 .sr(1)
6814 .m(4)
6815 .n(n)
6816 .k(k)
6817 .cn_stride(5)
6818 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6819 }
6820 }
6821 }
6822
TEST(F32_GEMM_4X2__SCALAR,n_div_2_strided_a)6823 TEST(F32_GEMM_4X2__SCALAR, n_div_2_strided_a) {
6824 for (uint32_t n = 4; n <= 6; n += 2) {
6825 for (size_t k = 1; k <= 5; k += 2) {
6826 GemmMicrokernelTester()
6827 .mr(4)
6828 .nr(2)
6829 .kr(1)
6830 .sr(1)
6831 .m(4)
6832 .n(n)
6833 .k(k)
6834 .a_stride(7)
6835 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6836 }
6837 }
6838 }
6839
TEST(F32_GEMM_4X2__SCALAR,n_div_2_subtile)6840 TEST(F32_GEMM_4X2__SCALAR, n_div_2_subtile) {
6841 for (uint32_t n = 4; n <= 6; n += 2) {
6842 for (size_t k = 1; k <= 5; k += 2) {
6843 for (uint32_t m = 1; m <= 4; m++) {
6844 GemmMicrokernelTester()
6845 .mr(4)
6846 .nr(2)
6847 .kr(1)
6848 .sr(1)
6849 .m(m)
6850 .n(n)
6851 .k(k)
6852 .iterations(1)
6853 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6854 }
6855 }
6856 }
6857 }
6858
TEST(F32_GEMM_4X2__SCALAR,strided_cm_subtile)6859 TEST(F32_GEMM_4X2__SCALAR, strided_cm_subtile) {
6860 for (size_t k = 1; k <= 5; k += 2) {
6861 for (uint32_t n = 1; n <= 2; n++) {
6862 for (uint32_t m = 1; m <= 4; m++) {
6863 GemmMicrokernelTester()
6864 .mr(4)
6865 .nr(2)
6866 .kr(1)
6867 .sr(1)
6868 .m(m)
6869 .n(n)
6870 .k(k)
6871 .cm_stride(5)
6872 .iterations(1)
6873 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6874 }
6875 }
6876 }
6877 }
6878
TEST(F32_GEMM_4X2__SCALAR,strided_cm)6879 TEST(F32_GEMM_4X2__SCALAR, strided_cm) {
6880 GemmMicrokernelTester()
6881 .mr(4)
6882 .nr(2)
6883 .kr(1)
6884 .sr(1)
6885 .m(4)
6886 .n(2)
6887 .k(1)
6888 .cm_stride(5)
6889 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
6890 }
6891
6892
TEST(F32_GEMM_4X4__SCALAR,k_eq_1)6893 TEST(F32_GEMM_4X4__SCALAR, k_eq_1) {
6894 GemmMicrokernelTester()
6895 .mr(4)
6896 .nr(4)
6897 .kr(1)
6898 .sr(1)
6899 .m(4)
6900 .n(4)
6901 .k(1)
6902 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
6903 }
6904
TEST(F32_GEMM_4X4__SCALAR,strided_cn)6905 TEST(F32_GEMM_4X4__SCALAR, strided_cn) {
6906 GemmMicrokernelTester()
6907 .mr(4)
6908 .nr(4)
6909 .kr(1)
6910 .sr(1)
6911 .m(4)
6912 .n(4)
6913 .k(1)
6914 .cn_stride(7)
6915 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
6916 }
6917
TEST(F32_GEMM_4X4__SCALAR,k_eq_1_strided_a)6918 TEST(F32_GEMM_4X4__SCALAR, k_eq_1_strided_a) {
6919 GemmMicrokernelTester()
6920 .mr(4)
6921 .nr(4)
6922 .kr(1)
6923 .sr(1)
6924 .m(4)
6925 .n(4)
6926 .k(1)
6927 .a_stride(3)
6928 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
6929 }
6930
TEST(F32_GEMM_4X4__SCALAR,k_eq_1_subtile)6931 TEST(F32_GEMM_4X4__SCALAR, k_eq_1_subtile) {
6932 for (uint32_t n = 1; n <= 4; n++) {
6933 for (uint32_t m = 1; m <= 4; m++) {
6934 GemmMicrokernelTester()
6935 .mr(4)
6936 .nr(4)
6937 .kr(1)
6938 .sr(1)
6939 .m(m)
6940 .n(n)
6941 .k(1)
6942 .iterations(1)
6943 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
6944 }
6945 }
6946 }
6947
TEST(F32_GEMM_4X4__SCALAR,k_eq_1_subtile_m)6948 TEST(F32_GEMM_4X4__SCALAR, k_eq_1_subtile_m) {
6949 for (uint32_t m = 1; m <= 4; m++) {
6950 GemmMicrokernelTester()
6951 .mr(4)
6952 .nr(4)
6953 .kr(1)
6954 .sr(1)
6955 .m(m)
6956 .n(4)
6957 .k(1)
6958 .iterations(1)
6959 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
6960 }
6961 }
6962
TEST(F32_GEMM_4X4__SCALAR,k_eq_1_subtile_n)6963 TEST(F32_GEMM_4X4__SCALAR, k_eq_1_subtile_n) {
6964 for (uint32_t n = 1; n <= 4; n++) {
6965 GemmMicrokernelTester()
6966 .mr(4)
6967 .nr(4)
6968 .kr(1)
6969 .sr(1)
6970 .m(4)
6971 .n(n)
6972 .k(1)
6973 .iterations(1)
6974 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
6975 }
6976 }
6977
TEST(F32_GEMM_4X4__SCALAR,k_gt_1)6978 TEST(F32_GEMM_4X4__SCALAR, k_gt_1) {
6979 for (size_t k = 2; k < 10; k++) {
6980 GemmMicrokernelTester()
6981 .mr(4)
6982 .nr(4)
6983 .kr(1)
6984 .sr(1)
6985 .m(4)
6986 .n(4)
6987 .k(k)
6988 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
6989 }
6990 }
6991
TEST(F32_GEMM_4X4__SCALAR,k_gt_1_strided_a)6992 TEST(F32_GEMM_4X4__SCALAR, k_gt_1_strided_a) {
6993 for (size_t k = 2; k < 10; k++) {
6994 GemmMicrokernelTester()
6995 .mr(4)
6996 .nr(4)
6997 .kr(1)
6998 .sr(1)
6999 .m(4)
7000 .n(4)
7001 .k(k)
7002 .a_stride(11)
7003 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7004 }
7005 }
7006
TEST(F32_GEMM_4X4__SCALAR,k_gt_1_subtile)7007 TEST(F32_GEMM_4X4__SCALAR, k_gt_1_subtile) {
7008 for (size_t k = 2; k < 10; k++) {
7009 for (uint32_t n = 1; n <= 4; n++) {
7010 for (uint32_t m = 1; m <= 4; m++) {
7011 GemmMicrokernelTester()
7012 .mr(4)
7013 .nr(4)
7014 .kr(1)
7015 .sr(1)
7016 .m(m)
7017 .n(n)
7018 .k(k)
7019 .iterations(1)
7020 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7021 }
7022 }
7023 }
7024 }
7025
TEST(F32_GEMM_4X4__SCALAR,n_gt_4)7026 TEST(F32_GEMM_4X4__SCALAR, n_gt_4) {
7027 for (uint32_t n = 5; n < 8; n++) {
7028 for (size_t k = 1; k <= 5; k += 2) {
7029 GemmMicrokernelTester()
7030 .mr(4)
7031 .nr(4)
7032 .kr(1)
7033 .sr(1)
7034 .m(4)
7035 .n(n)
7036 .k(k)
7037 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7038 }
7039 }
7040 }
7041
TEST(F32_GEMM_4X4__SCALAR,n_gt_4_strided_cn)7042 TEST(F32_GEMM_4X4__SCALAR, n_gt_4_strided_cn) {
7043 for (uint32_t n = 5; n < 8; n++) {
7044 for (size_t k = 1; k <= 5; k += 2) {
7045 GemmMicrokernelTester()
7046 .mr(4)
7047 .nr(4)
7048 .kr(1)
7049 .sr(1)
7050 .m(4)
7051 .n(n)
7052 .k(k)
7053 .cn_stride(7)
7054 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7055 }
7056 }
7057 }
7058
TEST(F32_GEMM_4X4__SCALAR,n_gt_4_strided_a)7059 TEST(F32_GEMM_4X4__SCALAR, n_gt_4_strided_a) {
7060 for (uint32_t n = 5; n < 8; n++) {
7061 for (size_t k = 1; k <= 5; k += 2) {
7062 GemmMicrokernelTester()
7063 .mr(4)
7064 .nr(4)
7065 .kr(1)
7066 .sr(1)
7067 .m(4)
7068 .n(n)
7069 .k(k)
7070 .a_stride(7)
7071 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7072 }
7073 }
7074 }
7075
TEST(F32_GEMM_4X4__SCALAR,n_gt_4_subtile)7076 TEST(F32_GEMM_4X4__SCALAR, n_gt_4_subtile) {
7077 for (uint32_t n = 5; n < 8; n++) {
7078 for (size_t k = 1; k <= 5; k += 2) {
7079 for (uint32_t m = 1; m <= 4; m++) {
7080 GemmMicrokernelTester()
7081 .mr(4)
7082 .nr(4)
7083 .kr(1)
7084 .sr(1)
7085 .m(m)
7086 .n(n)
7087 .k(k)
7088 .iterations(1)
7089 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7090 }
7091 }
7092 }
7093 }
7094
TEST(F32_GEMM_4X4__SCALAR,n_div_4)7095 TEST(F32_GEMM_4X4__SCALAR, n_div_4) {
7096 for (uint32_t n = 8; n <= 12; n += 4) {
7097 for (size_t k = 1; k <= 5; k += 2) {
7098 GemmMicrokernelTester()
7099 .mr(4)
7100 .nr(4)
7101 .kr(1)
7102 .sr(1)
7103 .m(4)
7104 .n(n)
7105 .k(k)
7106 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7107 }
7108 }
7109 }
7110
TEST(F32_GEMM_4X4__SCALAR,n_div_4_strided_cn)7111 TEST(F32_GEMM_4X4__SCALAR, n_div_4_strided_cn) {
7112 for (uint32_t n = 8; n <= 12; n += 4) {
7113 for (size_t k = 1; k <= 5; k += 2) {
7114 GemmMicrokernelTester()
7115 .mr(4)
7116 .nr(4)
7117 .kr(1)
7118 .sr(1)
7119 .m(4)
7120 .n(n)
7121 .k(k)
7122 .cn_stride(7)
7123 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7124 }
7125 }
7126 }
7127
TEST(F32_GEMM_4X4__SCALAR,n_div_4_strided_a)7128 TEST(F32_GEMM_4X4__SCALAR, n_div_4_strided_a) {
7129 for (uint32_t n = 8; n <= 12; n += 4) {
7130 for (size_t k = 1; k <= 5; k += 2) {
7131 GemmMicrokernelTester()
7132 .mr(4)
7133 .nr(4)
7134 .kr(1)
7135 .sr(1)
7136 .m(4)
7137 .n(n)
7138 .k(k)
7139 .a_stride(7)
7140 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7141 }
7142 }
7143 }
7144
TEST(F32_GEMM_4X4__SCALAR,n_div_4_subtile)7145 TEST(F32_GEMM_4X4__SCALAR, n_div_4_subtile) {
7146 for (uint32_t n = 8; n <= 12; n += 4) {
7147 for (size_t k = 1; k <= 5; k += 2) {
7148 for (uint32_t m = 1; m <= 4; m++) {
7149 GemmMicrokernelTester()
7150 .mr(4)
7151 .nr(4)
7152 .kr(1)
7153 .sr(1)
7154 .m(m)
7155 .n(n)
7156 .k(k)
7157 .iterations(1)
7158 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7159 }
7160 }
7161 }
7162 }
7163
TEST(F32_GEMM_4X4__SCALAR,strided_cm_subtile)7164 TEST(F32_GEMM_4X4__SCALAR, strided_cm_subtile) {
7165 for (size_t k = 1; k <= 5; k += 2) {
7166 for (uint32_t n = 1; n <= 4; n++) {
7167 for (uint32_t m = 1; m <= 4; m++) {
7168 GemmMicrokernelTester()
7169 .mr(4)
7170 .nr(4)
7171 .kr(1)
7172 .sr(1)
7173 .m(m)
7174 .n(n)
7175 .k(k)
7176 .cm_stride(7)
7177 .iterations(1)
7178 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7179 }
7180 }
7181 }
7182 }
7183
TEST(F32_GEMM_4X4__SCALAR,strided_cm)7184 TEST(F32_GEMM_4X4__SCALAR, strided_cm) {
7185 GemmMicrokernelTester()
7186 .mr(4)
7187 .nr(4)
7188 .kr(1)
7189 .sr(1)
7190 .m(4)
7191 .n(4)
7192 .k(1)
7193 .cm_stride(7)
7194 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
7195 }
7196