1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-gemm-relu.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1)28 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1) {
29 GemmMicrokernelTester()
30 .mr(3)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(3)
35 .n(8)
36 .k(1)
37 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
38 }
39
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,strided_cn)40 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(3)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(3)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
51 }
52
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)53 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
54 GemmMicrokernelTester()
55 .mr(3)
56 .nr(8)
57 .kr(1)
58 .sr(1)
59 .m(3)
60 .n(8)
61 .k(1)
62 .a_stride(3)
63 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
64 }
65
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)66 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
67 for (uint32_t n = 1; n <= 8; n++) {
68 for (uint32_t m = 1; m <= 3; m++) {
69 GemmMicrokernelTester()
70 .mr(3)
71 .nr(8)
72 .kr(1)
73 .sr(1)
74 .m(m)
75 .n(n)
76 .k(1)
77 .iterations(1)
78 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
79 }
80 }
81 }
82
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)83 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
84 for (uint32_t m = 1; m <= 3; m++) {
85 GemmMicrokernelTester()
86 .mr(3)
87 .nr(8)
88 .kr(1)
89 .sr(1)
90 .m(m)
91 .n(8)
92 .k(1)
93 .iterations(1)
94 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
95 }
96 }
97
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)98 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
99 for (uint32_t n = 1; n <= 8; n++) {
100 GemmMicrokernelTester()
101 .mr(3)
102 .nr(8)
103 .kr(1)
104 .sr(1)
105 .m(3)
106 .n(n)
107 .k(1)
108 .iterations(1)
109 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
110 }
111 }
112
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_gt_1)113 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_gt_1) {
114 for (size_t k = 2; k < 10; k++) {
115 GemmMicrokernelTester()
116 .mr(3)
117 .nr(8)
118 .kr(1)
119 .sr(1)
120 .m(3)
121 .n(8)
122 .k(k)
123 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
124 }
125 }
126
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)127 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
128 for (size_t k = 2; k < 10; k++) {
129 GemmMicrokernelTester()
130 .mr(3)
131 .nr(8)
132 .kr(1)
133 .sr(1)
134 .m(3)
135 .n(8)
136 .k(k)
137 .a_stride(11)
138 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
139 }
140 }
141
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)142 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
143 for (size_t k = 2; k < 10; k++) {
144 for (uint32_t n = 1; n <= 8; n++) {
145 for (uint32_t m = 1; m <= 3; m++) {
146 GemmMicrokernelTester()
147 .mr(3)
148 .nr(8)
149 .kr(1)
150 .sr(1)
151 .m(m)
152 .n(n)
153 .k(k)
154 .iterations(1)
155 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
156 }
157 }
158 }
159 }
160
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8)161 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8) {
162 for (uint32_t n = 9; n < 16; n++) {
163 for (size_t k = 1; k <= 5; k += 2) {
164 GemmMicrokernelTester()
165 .mr(3)
166 .nr(8)
167 .kr(1)
168 .sr(1)
169 .m(3)
170 .n(n)
171 .k(k)
172 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
173 }
174 }
175 }
176
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)177 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
178 for (uint32_t n = 9; n < 16; n++) {
179 for (size_t k = 1; k <= 5; k += 2) {
180 GemmMicrokernelTester()
181 .mr(3)
182 .nr(8)
183 .kr(1)
184 .sr(1)
185 .m(3)
186 .n(n)
187 .k(k)
188 .cn_stride(11)
189 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
190 }
191 }
192 }
193
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)194 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
195 for (uint32_t n = 9; n < 16; n++) {
196 for (size_t k = 1; k <= 5; k += 2) {
197 GemmMicrokernelTester()
198 .mr(3)
199 .nr(8)
200 .kr(1)
201 .sr(1)
202 .m(3)
203 .n(n)
204 .k(k)
205 .a_stride(7)
206 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
207 }
208 }
209 }
210
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)211 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
212 for (uint32_t n = 9; n < 16; n++) {
213 for (size_t k = 1; k <= 5; k += 2) {
214 for (uint32_t m = 1; m <= 3; m++) {
215 GemmMicrokernelTester()
216 .mr(3)
217 .nr(8)
218 .kr(1)
219 .sr(1)
220 .m(m)
221 .n(n)
222 .k(k)
223 .iterations(1)
224 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
225 }
226 }
227 }
228 }
229
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8)230 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8) {
231 for (uint32_t n = 16; n <= 24; n += 8) {
232 for (size_t k = 1; k <= 5; k += 2) {
233 GemmMicrokernelTester()
234 .mr(3)
235 .nr(8)
236 .kr(1)
237 .sr(1)
238 .m(3)
239 .n(n)
240 .k(k)
241 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
242 }
243 }
244 }
245
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)246 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
247 for (uint32_t n = 16; n <= 24; n += 8) {
248 for (size_t k = 1; k <= 5; k += 2) {
249 GemmMicrokernelTester()
250 .mr(3)
251 .nr(8)
252 .kr(1)
253 .sr(1)
254 .m(3)
255 .n(n)
256 .k(k)
257 .cn_stride(11)
258 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
259 }
260 }
261 }
262
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)263 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
264 for (uint32_t n = 16; n <= 24; n += 8) {
265 for (size_t k = 1; k <= 5; k += 2) {
266 GemmMicrokernelTester()
267 .mr(3)
268 .nr(8)
269 .kr(1)
270 .sr(1)
271 .m(3)
272 .n(n)
273 .k(k)
274 .a_stride(7)
275 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
276 }
277 }
278 }
279
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)280 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
281 for (uint32_t n = 16; n <= 24; n += 8) {
282 for (size_t k = 1; k <= 5; k += 2) {
283 for (uint32_t m = 1; m <= 3; m++) {
284 GemmMicrokernelTester()
285 .mr(3)
286 .nr(8)
287 .kr(1)
288 .sr(1)
289 .m(m)
290 .n(n)
291 .k(k)
292 .iterations(1)
293 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
294 }
295 }
296 }
297 }
298
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)299 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
300 for (size_t k = 1; k <= 5; k += 2) {
301 for (uint32_t n = 1; n <= 8; n++) {
302 for (uint32_t m = 1; m <= 3; m++) {
303 GemmMicrokernelTester()
304 .mr(3)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .cm_stride(11)
312 .iterations(1)
313 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
314 }
315 }
316 }
317 }
318
TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT,strided_cm)319 TEST(F32_GEMM_RELU_3X8__WASMSIMD_LOADSPLAT, strided_cm) {
320 GemmMicrokernelTester()
321 .mr(3)
322 .nr(8)
323 .kr(1)
324 .sr(1)
325 .m(3)
326 .n(8)
327 .k(1)
328 .cm_stride(11)
329 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_loadsplat);
330 }
331 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
332
333
334 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4)335 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4) {
336 GemmMicrokernelTester()
337 .mr(3)
338 .nr(8)
339 .kr(1)
340 .sr(1)
341 .m(3)
342 .n(8)
343 .k(4)
344 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
345 }
346
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,strided_cn)347 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, strided_cn) {
348 GemmMicrokernelTester()
349 .mr(3)
350 .nr(8)
351 .kr(1)
352 .sr(1)
353 .m(3)
354 .n(8)
355 .k(4)
356 .cn_stride(11)
357 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
358 }
359
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4_strided_a)360 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
361 GemmMicrokernelTester()
362 .mr(3)
363 .nr(8)
364 .kr(1)
365 .sr(1)
366 .m(3)
367 .n(8)
368 .k(4)
369 .a_stride(7)
370 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
371 }
372
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4_subtile)373 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
374 for (uint32_t n = 1; n <= 8; n++) {
375 for (uint32_t m = 1; m <= 3; m++) {
376 GemmMicrokernelTester()
377 .mr(3)
378 .nr(8)
379 .kr(1)
380 .sr(1)
381 .m(m)
382 .n(n)
383 .k(4)
384 .iterations(1)
385 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
386 }
387 }
388 }
389
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)390 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
391 for (uint32_t m = 1; m <= 3; m++) {
392 GemmMicrokernelTester()
393 .mr(3)
394 .nr(8)
395 .kr(1)
396 .sr(1)
397 .m(m)
398 .n(8)
399 .k(4)
400 .iterations(1)
401 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
402 }
403 }
404
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)405 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
406 for (uint32_t n = 1; n <= 8; n++) {
407 GemmMicrokernelTester()
408 .mr(3)
409 .nr(8)
410 .kr(1)
411 .sr(1)
412 .m(3)
413 .n(n)
414 .k(4)
415 .iterations(1)
416 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
417 }
418 }
419
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_lt_4)420 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_lt_4) {
421 for (size_t k = 1; k < 4; k++) {
422 GemmMicrokernelTester()
423 .mr(3)
424 .nr(8)
425 .kr(1)
426 .sr(1)
427 .m(3)
428 .n(8)
429 .k(k)
430 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
431 }
432 }
433
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_lt_4_strided_a)434 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
435 for (size_t k = 1; k < 4; k++) {
436 GemmMicrokernelTester()
437 .mr(3)
438 .nr(8)
439 .kr(1)
440 .sr(1)
441 .m(3)
442 .n(8)
443 .k(k)
444 .a_stride(7)
445 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
446 }
447 }
448
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_lt_4_subtile)449 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
450 for (size_t k = 1; k < 4; k++) {
451 for (uint32_t n = 1; n <= 8; n++) {
452 for (uint32_t m = 1; m <= 3; m++) {
453 GemmMicrokernelTester()
454 .mr(3)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(m)
459 .n(n)
460 .k(k)
461 .iterations(1)
462 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
463 }
464 }
465 }
466 }
467
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_gt_4)468 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_gt_4) {
469 for (size_t k = 5; k < 8; k++) {
470 GemmMicrokernelTester()
471 .mr(3)
472 .nr(8)
473 .kr(1)
474 .sr(1)
475 .m(3)
476 .n(8)
477 .k(k)
478 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
479 }
480 }
481
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_gt_4_strided_a)482 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
483 for (size_t k = 5; k < 8; k++) {
484 GemmMicrokernelTester()
485 .mr(3)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(3)
490 .n(8)
491 .k(k)
492 .a_stride(11)
493 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
494 }
495 }
496
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_gt_4_subtile)497 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
498 for (size_t k = 5; k < 8; k++) {
499 for (uint32_t n = 1; n <= 8; n++) {
500 for (uint32_t m = 1; m <= 3; m++) {
501 GemmMicrokernelTester()
502 .mr(3)
503 .nr(8)
504 .kr(1)
505 .sr(1)
506 .m(m)
507 .n(n)
508 .k(k)
509 .iterations(1)
510 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
511 }
512 }
513 }
514 }
515
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_div_4)516 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_div_4) {
517 for (size_t k = 8; k <= 40; k += 4) {
518 GemmMicrokernelTester()
519 .mr(3)
520 .nr(8)
521 .kr(1)
522 .sr(1)
523 .m(3)
524 .n(8)
525 .k(k)
526 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
527 }
528 }
529
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_div_4_strided_a)530 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
531 for (size_t k = 8; k <= 40; k += 4) {
532 GemmMicrokernelTester()
533 .mr(3)
534 .nr(8)
535 .kr(1)
536 .sr(1)
537 .m(3)
538 .n(8)
539 .k(k)
540 .a_stride(43)
541 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
542 }
543 }
544
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,k_div_4_subtile)545 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, k_div_4_subtile) {
546 for (size_t k = 8; k <= 40; k += 4) {
547 for (uint32_t n = 1; n <= 8; n++) {
548 for (uint32_t m = 1; m <= 3; m++) {
549 GemmMicrokernelTester()
550 .mr(3)
551 .nr(8)
552 .kr(1)
553 .sr(1)
554 .m(m)
555 .n(n)
556 .k(k)
557 .iterations(1)
558 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
559 }
560 }
561 }
562 }
563
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8)564 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8) {
565 for (uint32_t n = 9; n < 16; n++) {
566 for (size_t k = 1; k <= 20; k += 5) {
567 GemmMicrokernelTester()
568 .mr(3)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(3)
573 .n(n)
574 .k(k)
575 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
576 }
577 }
578 }
579
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)580 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
581 for (uint32_t n = 9; n < 16; n++) {
582 for (size_t k = 1; k <= 20; k += 5) {
583 GemmMicrokernelTester()
584 .mr(3)
585 .nr(8)
586 .kr(1)
587 .sr(1)
588 .m(3)
589 .n(n)
590 .k(k)
591 .cn_stride(11)
592 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
593 }
594 }
595 }
596
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8_strided_a)597 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
598 for (uint32_t n = 9; n < 16; n++) {
599 for (size_t k = 1; k <= 20; k += 5) {
600 GemmMicrokernelTester()
601 .mr(3)
602 .nr(8)
603 .kr(1)
604 .sr(1)
605 .m(3)
606 .n(n)
607 .k(k)
608 .a_stride(23)
609 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
610 }
611 }
612 }
613
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8_subtile)614 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
615 for (uint32_t n = 9; n < 16; n++) {
616 for (size_t k = 1; k <= 20; k += 5) {
617 for (uint32_t m = 1; m <= 3; m++) {
618 GemmMicrokernelTester()
619 .mr(3)
620 .nr(8)
621 .kr(1)
622 .sr(1)
623 .m(m)
624 .n(n)
625 .k(k)
626 .iterations(1)
627 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
628 }
629 }
630 }
631 }
632
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8)633 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8) {
634 for (uint32_t n = 16; n <= 24; n += 8) {
635 for (size_t k = 1; k <= 20; k += 5) {
636 GemmMicrokernelTester()
637 .mr(3)
638 .nr(8)
639 .kr(1)
640 .sr(1)
641 .m(3)
642 .n(n)
643 .k(k)
644 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
645 }
646 }
647 }
648
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8_strided_cn)649 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
650 for (uint32_t n = 16; n <= 24; n += 8) {
651 for (size_t k = 1; k <= 20; k += 5) {
652 GemmMicrokernelTester()
653 .mr(3)
654 .nr(8)
655 .kr(1)
656 .sr(1)
657 .m(3)
658 .n(n)
659 .k(k)
660 .cn_stride(11)
661 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
662 }
663 }
664 }
665
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8_strided_a)666 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
667 for (uint32_t n = 16; n <= 24; n += 8) {
668 for (size_t k = 1; k <= 20; k += 5) {
669 GemmMicrokernelTester()
670 .mr(3)
671 .nr(8)
672 .kr(1)
673 .sr(1)
674 .m(3)
675 .n(n)
676 .k(k)
677 .a_stride(23)
678 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
679 }
680 }
681 }
682
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8_subtile)683 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8_subtile) {
684 for (uint32_t n = 16; n <= 24; n += 8) {
685 for (size_t k = 1; k <= 20; k += 5) {
686 for (uint32_t m = 1; m <= 3; m++) {
687 GemmMicrokernelTester()
688 .mr(3)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(m)
693 .n(n)
694 .k(k)
695 .iterations(1)
696 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
697 }
698 }
699 }
700 }
701
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,strided_cm_subtile)702 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, strided_cm_subtile) {
703 for (size_t k = 1; k <= 20; k += 5) {
704 for (uint32_t n = 1; n <= 8; n++) {
705 for (uint32_t m = 1; m <= 3; m++) {
706 GemmMicrokernelTester()
707 .mr(3)
708 .nr(8)
709 .kr(1)
710 .sr(1)
711 .m(m)
712 .n(n)
713 .k(k)
714 .cm_stride(11)
715 .iterations(1)
716 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
717 }
718 }
719 }
720 }
721
TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT,strided_cm)722 TEST(F32_GEMM_RELU_3X8__WASMSIMD_SPLAT, strided_cm) {
723 GemmMicrokernelTester()
724 .mr(3)
725 .nr(8)
726 .kr(1)
727 .sr(1)
728 .m(3)
729 .n(8)
730 .k(4)
731 .cm_stride(11)
732 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmsimd_splat);
733 }
734 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
735
736
737 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_eq_4)738 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_eq_4) {
739 GemmMicrokernelTester()
740 .mr(3)
741 .nr(8)
742 .kr(1)
743 .sr(4)
744 .m(3)
745 .n(8)
746 .k(4)
747 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
748 }
749
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,strided_cn)750 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, strided_cn) {
751 GemmMicrokernelTester()
752 .mr(3)
753 .nr(8)
754 .kr(1)
755 .sr(4)
756 .m(3)
757 .n(8)
758 .k(4)
759 .cn_stride(11)
760 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
761 }
762
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_eq_4_strided_a)763 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_eq_4_strided_a) {
764 GemmMicrokernelTester()
765 .mr(3)
766 .nr(8)
767 .kr(1)
768 .sr(4)
769 .m(3)
770 .n(8)
771 .k(4)
772 .a_stride(7)
773 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
774 }
775
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_eq_4_subtile)776 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_eq_4_subtile) {
777 for (uint32_t n = 1; n <= 8; n++) {
778 for (uint32_t m = 1; m <= 3; m++) {
779 GemmMicrokernelTester()
780 .mr(3)
781 .nr(8)
782 .kr(1)
783 .sr(4)
784 .m(m)
785 .n(n)
786 .k(4)
787 .iterations(1)
788 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
789 }
790 }
791 }
792
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_eq_4_subtile_m)793 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_eq_4_subtile_m) {
794 for (uint32_t m = 1; m <= 3; m++) {
795 GemmMicrokernelTester()
796 .mr(3)
797 .nr(8)
798 .kr(1)
799 .sr(4)
800 .m(m)
801 .n(8)
802 .k(4)
803 .iterations(1)
804 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
805 }
806 }
807
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_eq_4_subtile_n)808 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_eq_4_subtile_n) {
809 for (uint32_t n = 1; n <= 8; n++) {
810 GemmMicrokernelTester()
811 .mr(3)
812 .nr(8)
813 .kr(1)
814 .sr(4)
815 .m(3)
816 .n(n)
817 .k(4)
818 .iterations(1)
819 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
820 }
821 }
822
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_lt_4)823 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_lt_4) {
824 for (size_t k = 1; k < 4; k++) {
825 GemmMicrokernelTester()
826 .mr(3)
827 .nr(8)
828 .kr(1)
829 .sr(4)
830 .m(3)
831 .n(8)
832 .k(k)
833 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
834 }
835 }
836
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_lt_4_strided_a)837 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_lt_4_strided_a) {
838 for (size_t k = 1; k < 4; k++) {
839 GemmMicrokernelTester()
840 .mr(3)
841 .nr(8)
842 .kr(1)
843 .sr(4)
844 .m(3)
845 .n(8)
846 .k(k)
847 .a_stride(7)
848 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
849 }
850 }
851
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_lt_4_subtile)852 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_lt_4_subtile) {
853 for (size_t k = 1; k < 4; k++) {
854 for (uint32_t n = 1; n <= 8; n++) {
855 for (uint32_t m = 1; m <= 3; m++) {
856 GemmMicrokernelTester()
857 .mr(3)
858 .nr(8)
859 .kr(1)
860 .sr(4)
861 .m(m)
862 .n(n)
863 .k(k)
864 .iterations(1)
865 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
866 }
867 }
868 }
869 }
870
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_gt_4)871 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_gt_4) {
872 for (size_t k = 5; k < 8; k++) {
873 GemmMicrokernelTester()
874 .mr(3)
875 .nr(8)
876 .kr(1)
877 .sr(4)
878 .m(3)
879 .n(8)
880 .k(k)
881 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
882 }
883 }
884
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_gt_4_strided_a)885 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_gt_4_strided_a) {
886 for (size_t k = 5; k < 8; k++) {
887 GemmMicrokernelTester()
888 .mr(3)
889 .nr(8)
890 .kr(1)
891 .sr(4)
892 .m(3)
893 .n(8)
894 .k(k)
895 .a_stride(11)
896 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
897 }
898 }
899
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_gt_4_subtile)900 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_gt_4_subtile) {
901 for (size_t k = 5; k < 8; k++) {
902 for (uint32_t n = 1; n <= 8; n++) {
903 for (uint32_t m = 1; m <= 3; m++) {
904 GemmMicrokernelTester()
905 .mr(3)
906 .nr(8)
907 .kr(1)
908 .sr(4)
909 .m(m)
910 .n(n)
911 .k(k)
912 .iterations(1)
913 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
914 }
915 }
916 }
917 }
918
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_div_4)919 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_div_4) {
920 for (size_t k = 8; k <= 40; k += 4) {
921 GemmMicrokernelTester()
922 .mr(3)
923 .nr(8)
924 .kr(1)
925 .sr(4)
926 .m(3)
927 .n(8)
928 .k(k)
929 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
930 }
931 }
932
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_div_4_strided_a)933 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_div_4_strided_a) {
934 for (size_t k = 8; k <= 40; k += 4) {
935 GemmMicrokernelTester()
936 .mr(3)
937 .nr(8)
938 .kr(1)
939 .sr(4)
940 .m(3)
941 .n(8)
942 .k(k)
943 .a_stride(43)
944 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
945 }
946 }
947
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,k_div_4_subtile)948 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, k_div_4_subtile) {
949 for (size_t k = 8; k <= 40; k += 4) {
950 for (uint32_t n = 1; n <= 8; n++) {
951 for (uint32_t m = 1; m <= 3; m++) {
952 GemmMicrokernelTester()
953 .mr(3)
954 .nr(8)
955 .kr(1)
956 .sr(4)
957 .m(m)
958 .n(n)
959 .k(k)
960 .iterations(1)
961 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
962 }
963 }
964 }
965 }
966
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_gt_8)967 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_gt_8) {
968 for (uint32_t n = 9; n < 16; n++) {
969 for (size_t k = 1; k <= 20; k += 5) {
970 GemmMicrokernelTester()
971 .mr(3)
972 .nr(8)
973 .kr(1)
974 .sr(4)
975 .m(3)
976 .n(n)
977 .k(k)
978 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
979 }
980 }
981 }
982
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_gt_8_strided_cn)983 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_gt_8_strided_cn) {
984 for (uint32_t n = 9; n < 16; n++) {
985 for (size_t k = 1; k <= 20; k += 5) {
986 GemmMicrokernelTester()
987 .mr(3)
988 .nr(8)
989 .kr(1)
990 .sr(4)
991 .m(3)
992 .n(n)
993 .k(k)
994 .cn_stride(11)
995 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
996 }
997 }
998 }
999
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_gt_8_strided_a)1000 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_gt_8_strided_a) {
1001 for (uint32_t n = 9; n < 16; n++) {
1002 for (size_t k = 1; k <= 20; k += 5) {
1003 GemmMicrokernelTester()
1004 .mr(3)
1005 .nr(8)
1006 .kr(1)
1007 .sr(4)
1008 .m(3)
1009 .n(n)
1010 .k(k)
1011 .a_stride(23)
1012 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1013 }
1014 }
1015 }
1016
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_gt_8_subtile)1017 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_gt_8_subtile) {
1018 for (uint32_t n = 9; n < 16; n++) {
1019 for (size_t k = 1; k <= 20; k += 5) {
1020 for (uint32_t m = 1; m <= 3; m++) {
1021 GemmMicrokernelTester()
1022 .mr(3)
1023 .nr(8)
1024 .kr(1)
1025 .sr(4)
1026 .m(m)
1027 .n(n)
1028 .k(k)
1029 .iterations(1)
1030 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1031 }
1032 }
1033 }
1034 }
1035
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_div_8)1036 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_div_8) {
1037 for (uint32_t n = 16; n <= 24; n += 8) {
1038 for (size_t k = 1; k <= 20; k += 5) {
1039 GemmMicrokernelTester()
1040 .mr(3)
1041 .nr(8)
1042 .kr(1)
1043 .sr(4)
1044 .m(3)
1045 .n(n)
1046 .k(k)
1047 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1048 }
1049 }
1050 }
1051
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_div_8_strided_cn)1052 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_div_8_strided_cn) {
1053 for (uint32_t n = 16; n <= 24; n += 8) {
1054 for (size_t k = 1; k <= 20; k += 5) {
1055 GemmMicrokernelTester()
1056 .mr(3)
1057 .nr(8)
1058 .kr(1)
1059 .sr(4)
1060 .m(3)
1061 .n(n)
1062 .k(k)
1063 .cn_stride(11)
1064 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1065 }
1066 }
1067 }
1068
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_div_8_strided_a)1069 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_div_8_strided_a) {
1070 for (uint32_t n = 16; n <= 24; n += 8) {
1071 for (size_t k = 1; k <= 20; k += 5) {
1072 GemmMicrokernelTester()
1073 .mr(3)
1074 .nr(8)
1075 .kr(1)
1076 .sr(4)
1077 .m(3)
1078 .n(n)
1079 .k(k)
1080 .a_stride(23)
1081 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1082 }
1083 }
1084 }
1085
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,n_div_8_subtile)1086 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, n_div_8_subtile) {
1087 for (uint32_t n = 16; n <= 24; n += 8) {
1088 for (size_t k = 1; k <= 20; k += 5) {
1089 for (uint32_t m = 1; m <= 3; m++) {
1090 GemmMicrokernelTester()
1091 .mr(3)
1092 .nr(8)
1093 .kr(1)
1094 .sr(4)
1095 .m(m)
1096 .n(n)
1097 .k(k)
1098 .iterations(1)
1099 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1100 }
1101 }
1102 }
1103 }
1104
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,strided_cm_subtile)1105 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, strided_cm_subtile) {
1106 for (size_t k = 1; k <= 20; k += 5) {
1107 for (uint32_t n = 1; n <= 8; n++) {
1108 for (uint32_t m = 1; m <= 3; m++) {
1109 GemmMicrokernelTester()
1110 .mr(3)
1111 .nr(8)
1112 .kr(1)
1113 .sr(4)
1114 .m(m)
1115 .n(n)
1116 .k(k)
1117 .cm_stride(11)
1118 .iterations(1)
1119 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1120 }
1121 }
1122 }
1123 }
1124
TEST(F32_GEMM_RELU_3X8S4__WASMSIMD,strided_cm)1125 TEST(F32_GEMM_RELU_3X8S4__WASMSIMD, strided_cm) {
1126 GemmMicrokernelTester()
1127 .mr(3)
1128 .nr(8)
1129 .kr(1)
1130 .sr(4)
1131 .m(3)
1132 .n(8)
1133 .k(4)
1134 .cm_stride(11)
1135 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmsimd);
1136 }
1137 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1138
1139
1140 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4)1141 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4) {
1142 GemmMicrokernelTester()
1143 .mr(4)
1144 .nr(2)
1145 .kr(4)
1146 .sr(1)
1147 .m(4)
1148 .n(2)
1149 .k(4)
1150 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1151 }
1152
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,strided_cn)1153 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cn) {
1154 GemmMicrokernelTester()
1155 .mr(4)
1156 .nr(2)
1157 .kr(4)
1158 .sr(1)
1159 .m(4)
1160 .n(2)
1161 .k(4)
1162 .cn_stride(5)
1163 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1164 }
1165
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_strided_a)1166 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_strided_a) {
1167 GemmMicrokernelTester()
1168 .mr(4)
1169 .nr(2)
1170 .kr(4)
1171 .sr(1)
1172 .m(4)
1173 .n(2)
1174 .k(4)
1175 .a_stride(7)
1176 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1177 }
1178
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile)1179 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile) {
1180 for (uint32_t n = 1; n <= 2; n++) {
1181 for (uint32_t m = 1; m <= 4; m++) {
1182 GemmMicrokernelTester()
1183 .mr(4)
1184 .nr(2)
1185 .kr(4)
1186 .sr(1)
1187 .m(m)
1188 .n(n)
1189 .k(4)
1190 .iterations(1)
1191 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1192 }
1193 }
1194 }
1195
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile_m)1196 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
1197 for (uint32_t m = 1; m <= 4; m++) {
1198 GemmMicrokernelTester()
1199 .mr(4)
1200 .nr(2)
1201 .kr(4)
1202 .sr(1)
1203 .m(m)
1204 .n(2)
1205 .k(4)
1206 .iterations(1)
1207 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1208 }
1209 }
1210
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile_n)1211 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
1212 for (uint32_t n = 1; n <= 2; n++) {
1213 GemmMicrokernelTester()
1214 .mr(4)
1215 .nr(2)
1216 .kr(4)
1217 .sr(1)
1218 .m(4)
1219 .n(n)
1220 .k(4)
1221 .iterations(1)
1222 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1223 }
1224 }
1225
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_lt_4)1226 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4) {
1227 for (size_t k = 1; k < 4; k++) {
1228 GemmMicrokernelTester()
1229 .mr(4)
1230 .nr(2)
1231 .kr(4)
1232 .sr(1)
1233 .m(4)
1234 .n(2)
1235 .k(k)
1236 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1237 }
1238 }
1239
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_lt_4_strided_a)1240 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4_strided_a) {
1241 for (size_t k = 1; k < 4; k++) {
1242 GemmMicrokernelTester()
1243 .mr(4)
1244 .nr(2)
1245 .kr(4)
1246 .sr(1)
1247 .m(4)
1248 .n(2)
1249 .k(k)
1250 .a_stride(7)
1251 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1252 }
1253 }
1254
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_lt_4_subtile)1255 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4_subtile) {
1256 for (size_t k = 1; k < 4; k++) {
1257 for (uint32_t n = 1; n <= 2; n++) {
1258 for (uint32_t m = 1; m <= 4; m++) {
1259 GemmMicrokernelTester()
1260 .mr(4)
1261 .nr(2)
1262 .kr(4)
1263 .sr(1)
1264 .m(m)
1265 .n(n)
1266 .k(k)
1267 .iterations(1)
1268 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1269 }
1270 }
1271 }
1272 }
1273
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_gt_4)1274 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4) {
1275 for (size_t k = 5; k < 8; k++) {
1276 GemmMicrokernelTester()
1277 .mr(4)
1278 .nr(2)
1279 .kr(4)
1280 .sr(1)
1281 .m(4)
1282 .n(2)
1283 .k(k)
1284 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1285 }
1286 }
1287
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_gt_4_strided_a)1288 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4_strided_a) {
1289 for (size_t k = 5; k < 8; k++) {
1290 GemmMicrokernelTester()
1291 .mr(4)
1292 .nr(2)
1293 .kr(4)
1294 .sr(1)
1295 .m(4)
1296 .n(2)
1297 .k(k)
1298 .a_stride(11)
1299 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1300 }
1301 }
1302
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_gt_4_subtile)1303 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4_subtile) {
1304 for (size_t k = 5; k < 8; k++) {
1305 for (uint32_t n = 1; n <= 2; n++) {
1306 for (uint32_t m = 1; m <= 4; m++) {
1307 GemmMicrokernelTester()
1308 .mr(4)
1309 .nr(2)
1310 .kr(4)
1311 .sr(1)
1312 .m(m)
1313 .n(n)
1314 .k(k)
1315 .iterations(1)
1316 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1317 }
1318 }
1319 }
1320 }
1321
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_div_4)1322 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4) {
1323 for (size_t k = 8; k <= 40; k += 4) {
1324 GemmMicrokernelTester()
1325 .mr(4)
1326 .nr(2)
1327 .kr(4)
1328 .sr(1)
1329 .m(4)
1330 .n(2)
1331 .k(k)
1332 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1333 }
1334 }
1335
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_div_4_strided_a)1336 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4_strided_a) {
1337 for (size_t k = 8; k <= 40; k += 4) {
1338 GemmMicrokernelTester()
1339 .mr(4)
1340 .nr(2)
1341 .kr(4)
1342 .sr(1)
1343 .m(4)
1344 .n(2)
1345 .k(k)
1346 .a_stride(43)
1347 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1348 }
1349 }
1350
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_div_4_subtile)1351 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4_subtile) {
1352 for (size_t k = 8; k <= 40; k += 4) {
1353 for (uint32_t n = 1; n <= 2; n++) {
1354 for (uint32_t m = 1; m <= 4; m++) {
1355 GemmMicrokernelTester()
1356 .mr(4)
1357 .nr(2)
1358 .kr(4)
1359 .sr(1)
1360 .m(m)
1361 .n(n)
1362 .k(k)
1363 .iterations(1)
1364 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1365 }
1366 }
1367 }
1368 }
1369
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2)1370 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2) {
1371 for (uint32_t n = 3; n < 4; n++) {
1372 for (size_t k = 1; k <= 20; k += 5) {
1373 GemmMicrokernelTester()
1374 .mr(4)
1375 .nr(2)
1376 .kr(4)
1377 .sr(1)
1378 .m(4)
1379 .n(n)
1380 .k(k)
1381 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1382 }
1383 }
1384 }
1385
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2_strided_cn)1386 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
1387 for (uint32_t n = 3; n < 4; n++) {
1388 for (size_t k = 1; k <= 20; k += 5) {
1389 GemmMicrokernelTester()
1390 .mr(4)
1391 .nr(2)
1392 .kr(4)
1393 .sr(1)
1394 .m(4)
1395 .n(n)
1396 .k(k)
1397 .cn_stride(5)
1398 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1399 }
1400 }
1401 }
1402
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2_strided_a)1403 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_a) {
1404 for (uint32_t n = 3; n < 4; n++) {
1405 for (size_t k = 1; k <= 20; k += 5) {
1406 GemmMicrokernelTester()
1407 .mr(4)
1408 .nr(2)
1409 .kr(4)
1410 .sr(1)
1411 .m(4)
1412 .n(n)
1413 .k(k)
1414 .a_stride(23)
1415 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1416 }
1417 }
1418 }
1419
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2_subtile)1420 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_subtile) {
1421 for (uint32_t n = 3; n < 4; n++) {
1422 for (size_t k = 1; k <= 20; k += 5) {
1423 for (uint32_t m = 1; m <= 4; m++) {
1424 GemmMicrokernelTester()
1425 .mr(4)
1426 .nr(2)
1427 .kr(4)
1428 .sr(1)
1429 .m(m)
1430 .n(n)
1431 .k(k)
1432 .iterations(1)
1433 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1434 }
1435 }
1436 }
1437 }
1438
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2)1439 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2) {
1440 for (uint32_t n = 4; n <= 6; n += 2) {
1441 for (size_t k = 1; k <= 20; k += 5) {
1442 GemmMicrokernelTester()
1443 .mr(4)
1444 .nr(2)
1445 .kr(4)
1446 .sr(1)
1447 .m(4)
1448 .n(n)
1449 .k(k)
1450 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1451 }
1452 }
1453 }
1454
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2_strided_cn)1455 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_cn) {
1456 for (uint32_t n = 4; n <= 6; n += 2) {
1457 for (size_t k = 1; k <= 20; k += 5) {
1458 GemmMicrokernelTester()
1459 .mr(4)
1460 .nr(2)
1461 .kr(4)
1462 .sr(1)
1463 .m(4)
1464 .n(n)
1465 .k(k)
1466 .cn_stride(5)
1467 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1468 }
1469 }
1470 }
1471
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2_strided_a)1472 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_a) {
1473 for (uint32_t n = 4; n <= 6; n += 2) {
1474 for (size_t k = 1; k <= 20; k += 5) {
1475 GemmMicrokernelTester()
1476 .mr(4)
1477 .nr(2)
1478 .kr(4)
1479 .sr(1)
1480 .m(4)
1481 .n(n)
1482 .k(k)
1483 .a_stride(23)
1484 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1485 }
1486 }
1487 }
1488
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2_subtile)1489 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_subtile) {
1490 for (uint32_t n = 4; n <= 6; n += 2) {
1491 for (size_t k = 1; k <= 20; k += 5) {
1492 for (uint32_t m = 1; m <= 4; m++) {
1493 GemmMicrokernelTester()
1494 .mr(4)
1495 .nr(2)
1496 .kr(4)
1497 .sr(1)
1498 .m(m)
1499 .n(n)
1500 .k(k)
1501 .iterations(1)
1502 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1503 }
1504 }
1505 }
1506 }
1507
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,strided_cm_subtile)1508 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cm_subtile) {
1509 for (size_t k = 1; k <= 20; k += 5) {
1510 for (uint32_t n = 1; n <= 2; n++) {
1511 for (uint32_t m = 1; m <= 4; m++) {
1512 GemmMicrokernelTester()
1513 .mr(4)
1514 .nr(2)
1515 .kr(4)
1516 .sr(1)
1517 .m(m)
1518 .n(n)
1519 .k(k)
1520 .cm_stride(5)
1521 .iterations(1)
1522 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1523 }
1524 }
1525 }
1526 }
1527
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,strided_cm)1528 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cm) {
1529 GemmMicrokernelTester()
1530 .mr(4)
1531 .nr(2)
1532 .kr(4)
1533 .sr(1)
1534 .m(4)
1535 .n(2)
1536 .k(4)
1537 .cm_stride(5)
1538 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1539 }
1540 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1541
1542
1543 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1)1544 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1) {
1545 GemmMicrokernelTester()
1546 .mr(4)
1547 .nr(8)
1548 .kr(1)
1549 .sr(1)
1550 .m(4)
1551 .n(8)
1552 .k(1)
1553 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1554 }
1555
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,strided_cn)1556 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, strided_cn) {
1557 GemmMicrokernelTester()
1558 .mr(4)
1559 .nr(8)
1560 .kr(1)
1561 .sr(1)
1562 .m(4)
1563 .n(8)
1564 .k(1)
1565 .cn_stride(11)
1566 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1567 }
1568
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)1569 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
1570 GemmMicrokernelTester()
1571 .mr(4)
1572 .nr(8)
1573 .kr(1)
1574 .sr(1)
1575 .m(4)
1576 .n(8)
1577 .k(1)
1578 .a_stride(3)
1579 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1580 }
1581
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)1582 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
1583 for (uint32_t n = 1; n <= 8; n++) {
1584 for (uint32_t m = 1; m <= 4; m++) {
1585 GemmMicrokernelTester()
1586 .mr(4)
1587 .nr(8)
1588 .kr(1)
1589 .sr(1)
1590 .m(m)
1591 .n(n)
1592 .k(1)
1593 .iterations(1)
1594 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1595 }
1596 }
1597 }
1598
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)1599 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
1600 for (uint32_t m = 1; m <= 4; m++) {
1601 GemmMicrokernelTester()
1602 .mr(4)
1603 .nr(8)
1604 .kr(1)
1605 .sr(1)
1606 .m(m)
1607 .n(8)
1608 .k(1)
1609 .iterations(1)
1610 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1611 }
1612 }
1613
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)1614 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
1615 for (uint32_t n = 1; n <= 8; n++) {
1616 GemmMicrokernelTester()
1617 .mr(4)
1618 .nr(8)
1619 .kr(1)
1620 .sr(1)
1621 .m(4)
1622 .n(n)
1623 .k(1)
1624 .iterations(1)
1625 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1626 }
1627 }
1628
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_gt_1)1629 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_gt_1) {
1630 for (size_t k = 2; k < 10; k++) {
1631 GemmMicrokernelTester()
1632 .mr(4)
1633 .nr(8)
1634 .kr(1)
1635 .sr(1)
1636 .m(4)
1637 .n(8)
1638 .k(k)
1639 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1640 }
1641 }
1642
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)1643 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
1644 for (size_t k = 2; k < 10; k++) {
1645 GemmMicrokernelTester()
1646 .mr(4)
1647 .nr(8)
1648 .kr(1)
1649 .sr(1)
1650 .m(4)
1651 .n(8)
1652 .k(k)
1653 .a_stride(11)
1654 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1655 }
1656 }
1657
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)1658 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
1659 for (size_t k = 2; k < 10; k++) {
1660 for (uint32_t n = 1; n <= 8; n++) {
1661 for (uint32_t m = 1; m <= 4; m++) {
1662 GemmMicrokernelTester()
1663 .mr(4)
1664 .nr(8)
1665 .kr(1)
1666 .sr(1)
1667 .m(m)
1668 .n(n)
1669 .k(k)
1670 .iterations(1)
1671 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1672 }
1673 }
1674 }
1675 }
1676
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8)1677 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8) {
1678 for (uint32_t n = 9; n < 16; n++) {
1679 for (size_t k = 1; k <= 5; k += 2) {
1680 GemmMicrokernelTester()
1681 .mr(4)
1682 .nr(8)
1683 .kr(1)
1684 .sr(1)
1685 .m(4)
1686 .n(n)
1687 .k(k)
1688 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1689 }
1690 }
1691 }
1692
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)1693 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
1694 for (uint32_t n = 9; n < 16; n++) {
1695 for (size_t k = 1; k <= 5; k += 2) {
1696 GemmMicrokernelTester()
1697 .mr(4)
1698 .nr(8)
1699 .kr(1)
1700 .sr(1)
1701 .m(4)
1702 .n(n)
1703 .k(k)
1704 .cn_stride(11)
1705 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1706 }
1707 }
1708 }
1709
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)1710 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
1711 for (uint32_t n = 9; n < 16; n++) {
1712 for (size_t k = 1; k <= 5; k += 2) {
1713 GemmMicrokernelTester()
1714 .mr(4)
1715 .nr(8)
1716 .kr(1)
1717 .sr(1)
1718 .m(4)
1719 .n(n)
1720 .k(k)
1721 .a_stride(7)
1722 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1723 }
1724 }
1725 }
1726
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)1727 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
1728 for (uint32_t n = 9; n < 16; n++) {
1729 for (size_t k = 1; k <= 5; k += 2) {
1730 for (uint32_t m = 1; m <= 4; m++) {
1731 GemmMicrokernelTester()
1732 .mr(4)
1733 .nr(8)
1734 .kr(1)
1735 .sr(1)
1736 .m(m)
1737 .n(n)
1738 .k(k)
1739 .iterations(1)
1740 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1741 }
1742 }
1743 }
1744 }
1745
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8)1746 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8) {
1747 for (uint32_t n = 16; n <= 24; n += 8) {
1748 for (size_t k = 1; k <= 5; k += 2) {
1749 GemmMicrokernelTester()
1750 .mr(4)
1751 .nr(8)
1752 .kr(1)
1753 .sr(1)
1754 .m(4)
1755 .n(n)
1756 .k(k)
1757 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1758 }
1759 }
1760 }
1761
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)1762 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
1763 for (uint32_t n = 16; n <= 24; n += 8) {
1764 for (size_t k = 1; k <= 5; k += 2) {
1765 GemmMicrokernelTester()
1766 .mr(4)
1767 .nr(8)
1768 .kr(1)
1769 .sr(1)
1770 .m(4)
1771 .n(n)
1772 .k(k)
1773 .cn_stride(11)
1774 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1775 }
1776 }
1777 }
1778
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)1779 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
1780 for (uint32_t n = 16; n <= 24; n += 8) {
1781 for (size_t k = 1; k <= 5; k += 2) {
1782 GemmMicrokernelTester()
1783 .mr(4)
1784 .nr(8)
1785 .kr(1)
1786 .sr(1)
1787 .m(4)
1788 .n(n)
1789 .k(k)
1790 .a_stride(7)
1791 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1792 }
1793 }
1794 }
1795
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)1796 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
1797 for (uint32_t n = 16; n <= 24; n += 8) {
1798 for (size_t k = 1; k <= 5; k += 2) {
1799 for (uint32_t m = 1; m <= 4; m++) {
1800 GemmMicrokernelTester()
1801 .mr(4)
1802 .nr(8)
1803 .kr(1)
1804 .sr(1)
1805 .m(m)
1806 .n(n)
1807 .k(k)
1808 .iterations(1)
1809 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1810 }
1811 }
1812 }
1813 }
1814
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)1815 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
1816 for (size_t k = 1; k <= 5; k += 2) {
1817 for (uint32_t n = 1; n <= 8; n++) {
1818 for (uint32_t m = 1; m <= 4; m++) {
1819 GemmMicrokernelTester()
1820 .mr(4)
1821 .nr(8)
1822 .kr(1)
1823 .sr(1)
1824 .m(m)
1825 .n(n)
1826 .k(k)
1827 .cm_stride(11)
1828 .iterations(1)
1829 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1830 }
1831 }
1832 }
1833 }
1834
TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT,strided_cm)1835 TEST(F32_GEMM_RELU_4X8__WASMSIMD_LOADSPLAT, strided_cm) {
1836 GemmMicrokernelTester()
1837 .mr(4)
1838 .nr(8)
1839 .kr(1)
1840 .sr(1)
1841 .m(4)
1842 .n(8)
1843 .k(1)
1844 .cm_stride(11)
1845 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1846 }
1847 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1848
1849
1850 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_eq_4)1851 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_eq_4) {
1852 GemmMicrokernelTester()
1853 .mr(4)
1854 .nr(8)
1855 .kr(1)
1856 .sr(4)
1857 .m(4)
1858 .n(8)
1859 .k(4)
1860 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1861 }
1862
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,strided_cn)1863 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, strided_cn) {
1864 GemmMicrokernelTester()
1865 .mr(4)
1866 .nr(8)
1867 .kr(1)
1868 .sr(4)
1869 .m(4)
1870 .n(8)
1871 .k(4)
1872 .cn_stride(11)
1873 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1874 }
1875
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_eq_4_strided_a)1876 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_eq_4_strided_a) {
1877 GemmMicrokernelTester()
1878 .mr(4)
1879 .nr(8)
1880 .kr(1)
1881 .sr(4)
1882 .m(4)
1883 .n(8)
1884 .k(4)
1885 .a_stride(7)
1886 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1887 }
1888
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_eq_4_subtile)1889 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_eq_4_subtile) {
1890 for (uint32_t n = 1; n <= 8; n++) {
1891 for (uint32_t m = 1; m <= 4; m++) {
1892 GemmMicrokernelTester()
1893 .mr(4)
1894 .nr(8)
1895 .kr(1)
1896 .sr(4)
1897 .m(m)
1898 .n(n)
1899 .k(4)
1900 .iterations(1)
1901 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1902 }
1903 }
1904 }
1905
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_eq_4_subtile_m)1906 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_eq_4_subtile_m) {
1907 for (uint32_t m = 1; m <= 4; m++) {
1908 GemmMicrokernelTester()
1909 .mr(4)
1910 .nr(8)
1911 .kr(1)
1912 .sr(4)
1913 .m(m)
1914 .n(8)
1915 .k(4)
1916 .iterations(1)
1917 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1918 }
1919 }
1920
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_eq_4_subtile_n)1921 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_eq_4_subtile_n) {
1922 for (uint32_t n = 1; n <= 8; n++) {
1923 GemmMicrokernelTester()
1924 .mr(4)
1925 .nr(8)
1926 .kr(1)
1927 .sr(4)
1928 .m(4)
1929 .n(n)
1930 .k(4)
1931 .iterations(1)
1932 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1933 }
1934 }
1935
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_lt_4)1936 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_lt_4) {
1937 for (size_t k = 1; k < 4; k++) {
1938 GemmMicrokernelTester()
1939 .mr(4)
1940 .nr(8)
1941 .kr(1)
1942 .sr(4)
1943 .m(4)
1944 .n(8)
1945 .k(k)
1946 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1947 }
1948 }
1949
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_lt_4_strided_a)1950 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_lt_4_strided_a) {
1951 for (size_t k = 1; k < 4; k++) {
1952 GemmMicrokernelTester()
1953 .mr(4)
1954 .nr(8)
1955 .kr(1)
1956 .sr(4)
1957 .m(4)
1958 .n(8)
1959 .k(k)
1960 .a_stride(7)
1961 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1962 }
1963 }
1964
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_lt_4_subtile)1965 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_lt_4_subtile) {
1966 for (size_t k = 1; k < 4; k++) {
1967 for (uint32_t n = 1; n <= 8; n++) {
1968 for (uint32_t m = 1; m <= 4; m++) {
1969 GemmMicrokernelTester()
1970 .mr(4)
1971 .nr(8)
1972 .kr(1)
1973 .sr(4)
1974 .m(m)
1975 .n(n)
1976 .k(k)
1977 .iterations(1)
1978 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1979 }
1980 }
1981 }
1982 }
1983
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_gt_4)1984 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_gt_4) {
1985 for (size_t k = 5; k < 8; k++) {
1986 GemmMicrokernelTester()
1987 .mr(4)
1988 .nr(8)
1989 .kr(1)
1990 .sr(4)
1991 .m(4)
1992 .n(8)
1993 .k(k)
1994 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
1995 }
1996 }
1997
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_gt_4_strided_a)1998 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_gt_4_strided_a) {
1999 for (size_t k = 5; k < 8; k++) {
2000 GemmMicrokernelTester()
2001 .mr(4)
2002 .nr(8)
2003 .kr(1)
2004 .sr(4)
2005 .m(4)
2006 .n(8)
2007 .k(k)
2008 .a_stride(11)
2009 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2010 }
2011 }
2012
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_gt_4_subtile)2013 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_gt_4_subtile) {
2014 for (size_t k = 5; k < 8; k++) {
2015 for (uint32_t n = 1; n <= 8; n++) {
2016 for (uint32_t m = 1; m <= 4; m++) {
2017 GemmMicrokernelTester()
2018 .mr(4)
2019 .nr(8)
2020 .kr(1)
2021 .sr(4)
2022 .m(m)
2023 .n(n)
2024 .k(k)
2025 .iterations(1)
2026 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2027 }
2028 }
2029 }
2030 }
2031
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_div_4)2032 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_div_4) {
2033 for (size_t k = 8; k <= 40; k += 4) {
2034 GemmMicrokernelTester()
2035 .mr(4)
2036 .nr(8)
2037 .kr(1)
2038 .sr(4)
2039 .m(4)
2040 .n(8)
2041 .k(k)
2042 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2043 }
2044 }
2045
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_div_4_strided_a)2046 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_div_4_strided_a) {
2047 for (size_t k = 8; k <= 40; k += 4) {
2048 GemmMicrokernelTester()
2049 .mr(4)
2050 .nr(8)
2051 .kr(1)
2052 .sr(4)
2053 .m(4)
2054 .n(8)
2055 .k(k)
2056 .a_stride(43)
2057 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2058 }
2059 }
2060
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,k_div_4_subtile)2061 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, k_div_4_subtile) {
2062 for (size_t k = 8; k <= 40; k += 4) {
2063 for (uint32_t n = 1; n <= 8; n++) {
2064 for (uint32_t m = 1; m <= 4; m++) {
2065 GemmMicrokernelTester()
2066 .mr(4)
2067 .nr(8)
2068 .kr(1)
2069 .sr(4)
2070 .m(m)
2071 .n(n)
2072 .k(k)
2073 .iterations(1)
2074 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2075 }
2076 }
2077 }
2078 }
2079
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_gt_8)2080 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_gt_8) {
2081 for (uint32_t n = 9; n < 16; n++) {
2082 for (size_t k = 1; k <= 20; k += 5) {
2083 GemmMicrokernelTester()
2084 .mr(4)
2085 .nr(8)
2086 .kr(1)
2087 .sr(4)
2088 .m(4)
2089 .n(n)
2090 .k(k)
2091 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2092 }
2093 }
2094 }
2095
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_gt_8_strided_cn)2096 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_gt_8_strided_cn) {
2097 for (uint32_t n = 9; n < 16; n++) {
2098 for (size_t k = 1; k <= 20; k += 5) {
2099 GemmMicrokernelTester()
2100 .mr(4)
2101 .nr(8)
2102 .kr(1)
2103 .sr(4)
2104 .m(4)
2105 .n(n)
2106 .k(k)
2107 .cn_stride(11)
2108 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2109 }
2110 }
2111 }
2112
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_gt_8_strided_a)2113 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_gt_8_strided_a) {
2114 for (uint32_t n = 9; n < 16; n++) {
2115 for (size_t k = 1; k <= 20; k += 5) {
2116 GemmMicrokernelTester()
2117 .mr(4)
2118 .nr(8)
2119 .kr(1)
2120 .sr(4)
2121 .m(4)
2122 .n(n)
2123 .k(k)
2124 .a_stride(23)
2125 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2126 }
2127 }
2128 }
2129
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_gt_8_subtile)2130 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_gt_8_subtile) {
2131 for (uint32_t n = 9; n < 16; n++) {
2132 for (size_t k = 1; k <= 20; k += 5) {
2133 for (uint32_t m = 1; m <= 4; m++) {
2134 GemmMicrokernelTester()
2135 .mr(4)
2136 .nr(8)
2137 .kr(1)
2138 .sr(4)
2139 .m(m)
2140 .n(n)
2141 .k(k)
2142 .iterations(1)
2143 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2144 }
2145 }
2146 }
2147 }
2148
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_div_8)2149 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_div_8) {
2150 for (uint32_t n = 16; n <= 24; n += 8) {
2151 for (size_t k = 1; k <= 20; k += 5) {
2152 GemmMicrokernelTester()
2153 .mr(4)
2154 .nr(8)
2155 .kr(1)
2156 .sr(4)
2157 .m(4)
2158 .n(n)
2159 .k(k)
2160 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2161 }
2162 }
2163 }
2164
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_div_8_strided_cn)2165 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_div_8_strided_cn) {
2166 for (uint32_t n = 16; n <= 24; n += 8) {
2167 for (size_t k = 1; k <= 20; k += 5) {
2168 GemmMicrokernelTester()
2169 .mr(4)
2170 .nr(8)
2171 .kr(1)
2172 .sr(4)
2173 .m(4)
2174 .n(n)
2175 .k(k)
2176 .cn_stride(11)
2177 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2178 }
2179 }
2180 }
2181
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_div_8_strided_a)2182 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_div_8_strided_a) {
2183 for (uint32_t n = 16; n <= 24; n += 8) {
2184 for (size_t k = 1; k <= 20; k += 5) {
2185 GemmMicrokernelTester()
2186 .mr(4)
2187 .nr(8)
2188 .kr(1)
2189 .sr(4)
2190 .m(4)
2191 .n(n)
2192 .k(k)
2193 .a_stride(23)
2194 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2195 }
2196 }
2197 }
2198
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,n_div_8_subtile)2199 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, n_div_8_subtile) {
2200 for (uint32_t n = 16; n <= 24; n += 8) {
2201 for (size_t k = 1; k <= 20; k += 5) {
2202 for (uint32_t m = 1; m <= 4; m++) {
2203 GemmMicrokernelTester()
2204 .mr(4)
2205 .nr(8)
2206 .kr(1)
2207 .sr(4)
2208 .m(m)
2209 .n(n)
2210 .k(k)
2211 .iterations(1)
2212 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2213 }
2214 }
2215 }
2216 }
2217
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,strided_cm_subtile)2218 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, strided_cm_subtile) {
2219 for (size_t k = 1; k <= 20; k += 5) {
2220 for (uint32_t n = 1; n <= 8; n++) {
2221 for (uint32_t m = 1; m <= 4; m++) {
2222 GemmMicrokernelTester()
2223 .mr(4)
2224 .nr(8)
2225 .kr(1)
2226 .sr(4)
2227 .m(m)
2228 .n(n)
2229 .k(k)
2230 .cm_stride(11)
2231 .iterations(1)
2232 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2233 }
2234 }
2235 }
2236 }
2237
TEST(F32_GEMM_RELU_4X8S4__WASMSIMD,strided_cm)2238 TEST(F32_GEMM_RELU_4X8S4__WASMSIMD, strided_cm) {
2239 GemmMicrokernelTester()
2240 .mr(4)
2241 .nr(8)
2242 .kr(1)
2243 .sr(4)
2244 .m(4)
2245 .n(8)
2246 .k(4)
2247 .cm_stride(11)
2248 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmsimd);
2249 }
2250 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2251
2252
2253 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1)2254 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1) {
2255 GemmMicrokernelTester()
2256 .mr(5)
2257 .nr(8)
2258 .kr(1)
2259 .sr(1)
2260 .m(5)
2261 .n(8)
2262 .k(1)
2263 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2264 }
2265
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,strided_cn)2266 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, strided_cn) {
2267 GemmMicrokernelTester()
2268 .mr(5)
2269 .nr(8)
2270 .kr(1)
2271 .sr(1)
2272 .m(5)
2273 .n(8)
2274 .k(1)
2275 .cn_stride(11)
2276 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2277 }
2278
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)2279 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
2280 GemmMicrokernelTester()
2281 .mr(5)
2282 .nr(8)
2283 .kr(1)
2284 .sr(1)
2285 .m(5)
2286 .n(8)
2287 .k(1)
2288 .a_stride(3)
2289 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2290 }
2291
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)2292 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
2293 for (uint32_t n = 1; n <= 8; n++) {
2294 for (uint32_t m = 1; m <= 5; m++) {
2295 GemmMicrokernelTester()
2296 .mr(5)
2297 .nr(8)
2298 .kr(1)
2299 .sr(1)
2300 .m(m)
2301 .n(n)
2302 .k(1)
2303 .iterations(1)
2304 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2305 }
2306 }
2307 }
2308
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2309 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2310 for (uint32_t m = 1; m <= 5; m++) {
2311 GemmMicrokernelTester()
2312 .mr(5)
2313 .nr(8)
2314 .kr(1)
2315 .sr(1)
2316 .m(m)
2317 .n(8)
2318 .k(1)
2319 .iterations(1)
2320 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2321 }
2322 }
2323
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2324 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2325 for (uint32_t n = 1; n <= 8; n++) {
2326 GemmMicrokernelTester()
2327 .mr(5)
2328 .nr(8)
2329 .kr(1)
2330 .sr(1)
2331 .m(5)
2332 .n(n)
2333 .k(1)
2334 .iterations(1)
2335 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2336 }
2337 }
2338
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_gt_1)2339 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2340 for (size_t k = 2; k < 10; k++) {
2341 GemmMicrokernelTester()
2342 .mr(5)
2343 .nr(8)
2344 .kr(1)
2345 .sr(1)
2346 .m(5)
2347 .n(8)
2348 .k(k)
2349 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2350 }
2351 }
2352
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)2353 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
2354 for (size_t k = 2; k < 10; k++) {
2355 GemmMicrokernelTester()
2356 .mr(5)
2357 .nr(8)
2358 .kr(1)
2359 .sr(1)
2360 .m(5)
2361 .n(8)
2362 .k(k)
2363 .a_stride(11)
2364 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2365 }
2366 }
2367
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2368 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2369 for (size_t k = 2; k < 10; k++) {
2370 for (uint32_t n = 1; n <= 8; n++) {
2371 for (uint32_t m = 1; m <= 5; m++) {
2372 GemmMicrokernelTester()
2373 .mr(5)
2374 .nr(8)
2375 .kr(1)
2376 .sr(1)
2377 .m(m)
2378 .n(n)
2379 .k(k)
2380 .iterations(1)
2381 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2382 }
2383 }
2384 }
2385 }
2386
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8)2387 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2388 for (uint32_t n = 9; n < 16; n++) {
2389 for (size_t k = 1; k <= 5; k += 2) {
2390 GemmMicrokernelTester()
2391 .mr(5)
2392 .nr(8)
2393 .kr(1)
2394 .sr(1)
2395 .m(5)
2396 .n(n)
2397 .k(k)
2398 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2399 }
2400 }
2401 }
2402
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2403 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2404 for (uint32_t n = 9; n < 16; n++) {
2405 for (size_t k = 1; k <= 5; k += 2) {
2406 GemmMicrokernelTester()
2407 .mr(5)
2408 .nr(8)
2409 .kr(1)
2410 .sr(1)
2411 .m(5)
2412 .n(n)
2413 .k(k)
2414 .cn_stride(11)
2415 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2416 }
2417 }
2418 }
2419
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)2420 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
2421 for (uint32_t n = 9; n < 16; n++) {
2422 for (size_t k = 1; k <= 5; k += 2) {
2423 GemmMicrokernelTester()
2424 .mr(5)
2425 .nr(8)
2426 .kr(1)
2427 .sr(1)
2428 .m(5)
2429 .n(n)
2430 .k(k)
2431 .a_stride(7)
2432 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2433 }
2434 }
2435 }
2436
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2437 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2438 for (uint32_t n = 9; n < 16; n++) {
2439 for (size_t k = 1; k <= 5; k += 2) {
2440 for (uint32_t m = 1; m <= 5; m++) {
2441 GemmMicrokernelTester()
2442 .mr(5)
2443 .nr(8)
2444 .kr(1)
2445 .sr(1)
2446 .m(m)
2447 .n(n)
2448 .k(k)
2449 .iterations(1)
2450 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2451 }
2452 }
2453 }
2454 }
2455
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8)2456 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8) {
2457 for (uint32_t n = 16; n <= 24; n += 8) {
2458 for (size_t k = 1; k <= 5; k += 2) {
2459 GemmMicrokernelTester()
2460 .mr(5)
2461 .nr(8)
2462 .kr(1)
2463 .sr(1)
2464 .m(5)
2465 .n(n)
2466 .k(k)
2467 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2468 }
2469 }
2470 }
2471
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2472 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2473 for (uint32_t n = 16; n <= 24; n += 8) {
2474 for (size_t k = 1; k <= 5; k += 2) {
2475 GemmMicrokernelTester()
2476 .mr(5)
2477 .nr(8)
2478 .kr(1)
2479 .sr(1)
2480 .m(5)
2481 .n(n)
2482 .k(k)
2483 .cn_stride(11)
2484 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2485 }
2486 }
2487 }
2488
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)2489 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
2490 for (uint32_t n = 16; n <= 24; n += 8) {
2491 for (size_t k = 1; k <= 5; k += 2) {
2492 GemmMicrokernelTester()
2493 .mr(5)
2494 .nr(8)
2495 .kr(1)
2496 .sr(1)
2497 .m(5)
2498 .n(n)
2499 .k(k)
2500 .a_stride(7)
2501 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2502 }
2503 }
2504 }
2505
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2506 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2507 for (uint32_t n = 16; n <= 24; n += 8) {
2508 for (size_t k = 1; k <= 5; k += 2) {
2509 for (uint32_t m = 1; m <= 5; m++) {
2510 GemmMicrokernelTester()
2511 .mr(5)
2512 .nr(8)
2513 .kr(1)
2514 .sr(1)
2515 .m(m)
2516 .n(n)
2517 .k(k)
2518 .iterations(1)
2519 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2520 }
2521 }
2522 }
2523 }
2524
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2525 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2526 for (size_t k = 1; k <= 5; k += 2) {
2527 for (uint32_t n = 1; n <= 8; n++) {
2528 for (uint32_t m = 1; m <= 5; m++) {
2529 GemmMicrokernelTester()
2530 .mr(5)
2531 .nr(8)
2532 .kr(1)
2533 .sr(1)
2534 .m(m)
2535 .n(n)
2536 .k(k)
2537 .cm_stride(11)
2538 .iterations(1)
2539 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2540 }
2541 }
2542 }
2543 }
2544
TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT,strided_cm)2545 TEST(F32_GEMM_RELU_5X8__WASMSIMD_LOADSPLAT, strided_cm) {
2546 GemmMicrokernelTester()
2547 .mr(5)
2548 .nr(8)
2549 .kr(1)
2550 .sr(1)
2551 .m(5)
2552 .n(8)
2553 .k(1)
2554 .cm_stride(11)
2555 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2556 }
2557 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2558
2559
2560 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_eq_4)2561 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_eq_4) {
2562 GemmMicrokernelTester()
2563 .mr(5)
2564 .nr(8)
2565 .kr(1)
2566 .sr(4)
2567 .m(5)
2568 .n(8)
2569 .k(4)
2570 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2571 }
2572
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,strided_cn)2573 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, strided_cn) {
2574 GemmMicrokernelTester()
2575 .mr(5)
2576 .nr(8)
2577 .kr(1)
2578 .sr(4)
2579 .m(5)
2580 .n(8)
2581 .k(4)
2582 .cn_stride(11)
2583 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2584 }
2585
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_eq_4_strided_a)2586 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_eq_4_strided_a) {
2587 GemmMicrokernelTester()
2588 .mr(5)
2589 .nr(8)
2590 .kr(1)
2591 .sr(4)
2592 .m(5)
2593 .n(8)
2594 .k(4)
2595 .a_stride(7)
2596 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2597 }
2598
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_eq_4_subtile)2599 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_eq_4_subtile) {
2600 for (uint32_t n = 1; n <= 8; n++) {
2601 for (uint32_t m = 1; m <= 5; m++) {
2602 GemmMicrokernelTester()
2603 .mr(5)
2604 .nr(8)
2605 .kr(1)
2606 .sr(4)
2607 .m(m)
2608 .n(n)
2609 .k(4)
2610 .iterations(1)
2611 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2612 }
2613 }
2614 }
2615
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_eq_4_subtile_m)2616 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_eq_4_subtile_m) {
2617 for (uint32_t m = 1; m <= 5; m++) {
2618 GemmMicrokernelTester()
2619 .mr(5)
2620 .nr(8)
2621 .kr(1)
2622 .sr(4)
2623 .m(m)
2624 .n(8)
2625 .k(4)
2626 .iterations(1)
2627 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2628 }
2629 }
2630
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_eq_4_subtile_n)2631 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_eq_4_subtile_n) {
2632 for (uint32_t n = 1; n <= 8; n++) {
2633 GemmMicrokernelTester()
2634 .mr(5)
2635 .nr(8)
2636 .kr(1)
2637 .sr(4)
2638 .m(5)
2639 .n(n)
2640 .k(4)
2641 .iterations(1)
2642 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2643 }
2644 }
2645
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_lt_4)2646 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_lt_4) {
2647 for (size_t k = 1; k < 4; k++) {
2648 GemmMicrokernelTester()
2649 .mr(5)
2650 .nr(8)
2651 .kr(1)
2652 .sr(4)
2653 .m(5)
2654 .n(8)
2655 .k(k)
2656 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2657 }
2658 }
2659
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_lt_4_strided_a)2660 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_lt_4_strided_a) {
2661 for (size_t k = 1; k < 4; k++) {
2662 GemmMicrokernelTester()
2663 .mr(5)
2664 .nr(8)
2665 .kr(1)
2666 .sr(4)
2667 .m(5)
2668 .n(8)
2669 .k(k)
2670 .a_stride(7)
2671 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2672 }
2673 }
2674
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_lt_4_subtile)2675 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_lt_4_subtile) {
2676 for (size_t k = 1; k < 4; k++) {
2677 for (uint32_t n = 1; n <= 8; n++) {
2678 for (uint32_t m = 1; m <= 5; m++) {
2679 GemmMicrokernelTester()
2680 .mr(5)
2681 .nr(8)
2682 .kr(1)
2683 .sr(4)
2684 .m(m)
2685 .n(n)
2686 .k(k)
2687 .iterations(1)
2688 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2689 }
2690 }
2691 }
2692 }
2693
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_gt_4)2694 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_gt_4) {
2695 for (size_t k = 5; k < 8; k++) {
2696 GemmMicrokernelTester()
2697 .mr(5)
2698 .nr(8)
2699 .kr(1)
2700 .sr(4)
2701 .m(5)
2702 .n(8)
2703 .k(k)
2704 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2705 }
2706 }
2707
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_gt_4_strided_a)2708 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_gt_4_strided_a) {
2709 for (size_t k = 5; k < 8; k++) {
2710 GemmMicrokernelTester()
2711 .mr(5)
2712 .nr(8)
2713 .kr(1)
2714 .sr(4)
2715 .m(5)
2716 .n(8)
2717 .k(k)
2718 .a_stride(11)
2719 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2720 }
2721 }
2722
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_gt_4_subtile)2723 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_gt_4_subtile) {
2724 for (size_t k = 5; k < 8; k++) {
2725 for (uint32_t n = 1; n <= 8; n++) {
2726 for (uint32_t m = 1; m <= 5; m++) {
2727 GemmMicrokernelTester()
2728 .mr(5)
2729 .nr(8)
2730 .kr(1)
2731 .sr(4)
2732 .m(m)
2733 .n(n)
2734 .k(k)
2735 .iterations(1)
2736 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2737 }
2738 }
2739 }
2740 }
2741
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_div_4)2742 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_div_4) {
2743 for (size_t k = 8; k <= 40; k += 4) {
2744 GemmMicrokernelTester()
2745 .mr(5)
2746 .nr(8)
2747 .kr(1)
2748 .sr(4)
2749 .m(5)
2750 .n(8)
2751 .k(k)
2752 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2753 }
2754 }
2755
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_div_4_strided_a)2756 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_div_4_strided_a) {
2757 for (size_t k = 8; k <= 40; k += 4) {
2758 GemmMicrokernelTester()
2759 .mr(5)
2760 .nr(8)
2761 .kr(1)
2762 .sr(4)
2763 .m(5)
2764 .n(8)
2765 .k(k)
2766 .a_stride(43)
2767 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2768 }
2769 }
2770
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,k_div_4_subtile)2771 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, k_div_4_subtile) {
2772 for (size_t k = 8; k <= 40; k += 4) {
2773 for (uint32_t n = 1; n <= 8; n++) {
2774 for (uint32_t m = 1; m <= 5; m++) {
2775 GemmMicrokernelTester()
2776 .mr(5)
2777 .nr(8)
2778 .kr(1)
2779 .sr(4)
2780 .m(m)
2781 .n(n)
2782 .k(k)
2783 .iterations(1)
2784 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2785 }
2786 }
2787 }
2788 }
2789
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_gt_8)2790 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_gt_8) {
2791 for (uint32_t n = 9; n < 16; n++) {
2792 for (size_t k = 1; k <= 20; k += 5) {
2793 GemmMicrokernelTester()
2794 .mr(5)
2795 .nr(8)
2796 .kr(1)
2797 .sr(4)
2798 .m(5)
2799 .n(n)
2800 .k(k)
2801 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2802 }
2803 }
2804 }
2805
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_gt_8_strided_cn)2806 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_gt_8_strided_cn) {
2807 for (uint32_t n = 9; n < 16; n++) {
2808 for (size_t k = 1; k <= 20; k += 5) {
2809 GemmMicrokernelTester()
2810 .mr(5)
2811 .nr(8)
2812 .kr(1)
2813 .sr(4)
2814 .m(5)
2815 .n(n)
2816 .k(k)
2817 .cn_stride(11)
2818 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2819 }
2820 }
2821 }
2822
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_gt_8_strided_a)2823 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_gt_8_strided_a) {
2824 for (uint32_t n = 9; n < 16; n++) {
2825 for (size_t k = 1; k <= 20; k += 5) {
2826 GemmMicrokernelTester()
2827 .mr(5)
2828 .nr(8)
2829 .kr(1)
2830 .sr(4)
2831 .m(5)
2832 .n(n)
2833 .k(k)
2834 .a_stride(23)
2835 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2836 }
2837 }
2838 }
2839
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_gt_8_subtile)2840 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_gt_8_subtile) {
2841 for (uint32_t n = 9; n < 16; n++) {
2842 for (size_t k = 1; k <= 20; k += 5) {
2843 for (uint32_t m = 1; m <= 5; m++) {
2844 GemmMicrokernelTester()
2845 .mr(5)
2846 .nr(8)
2847 .kr(1)
2848 .sr(4)
2849 .m(m)
2850 .n(n)
2851 .k(k)
2852 .iterations(1)
2853 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2854 }
2855 }
2856 }
2857 }
2858
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_div_8)2859 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_div_8) {
2860 for (uint32_t n = 16; n <= 24; n += 8) {
2861 for (size_t k = 1; k <= 20; k += 5) {
2862 GemmMicrokernelTester()
2863 .mr(5)
2864 .nr(8)
2865 .kr(1)
2866 .sr(4)
2867 .m(5)
2868 .n(n)
2869 .k(k)
2870 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2871 }
2872 }
2873 }
2874
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_div_8_strided_cn)2875 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_div_8_strided_cn) {
2876 for (uint32_t n = 16; n <= 24; n += 8) {
2877 for (size_t k = 1; k <= 20; k += 5) {
2878 GemmMicrokernelTester()
2879 .mr(5)
2880 .nr(8)
2881 .kr(1)
2882 .sr(4)
2883 .m(5)
2884 .n(n)
2885 .k(k)
2886 .cn_stride(11)
2887 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2888 }
2889 }
2890 }
2891
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_div_8_strided_a)2892 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_div_8_strided_a) {
2893 for (uint32_t n = 16; n <= 24; n += 8) {
2894 for (size_t k = 1; k <= 20; k += 5) {
2895 GemmMicrokernelTester()
2896 .mr(5)
2897 .nr(8)
2898 .kr(1)
2899 .sr(4)
2900 .m(5)
2901 .n(n)
2902 .k(k)
2903 .a_stride(23)
2904 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2905 }
2906 }
2907 }
2908
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,n_div_8_subtile)2909 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, n_div_8_subtile) {
2910 for (uint32_t n = 16; n <= 24; n += 8) {
2911 for (size_t k = 1; k <= 20; k += 5) {
2912 for (uint32_t m = 1; m <= 5; m++) {
2913 GemmMicrokernelTester()
2914 .mr(5)
2915 .nr(8)
2916 .kr(1)
2917 .sr(4)
2918 .m(m)
2919 .n(n)
2920 .k(k)
2921 .iterations(1)
2922 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2923 }
2924 }
2925 }
2926 }
2927
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,strided_cm_subtile)2928 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, strided_cm_subtile) {
2929 for (size_t k = 1; k <= 20; k += 5) {
2930 for (uint32_t n = 1; n <= 8; n++) {
2931 for (uint32_t m = 1; m <= 5; m++) {
2932 GemmMicrokernelTester()
2933 .mr(5)
2934 .nr(8)
2935 .kr(1)
2936 .sr(4)
2937 .m(m)
2938 .n(n)
2939 .k(k)
2940 .cm_stride(11)
2941 .iterations(1)
2942 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2943 }
2944 }
2945 }
2946 }
2947
TEST(F32_GEMM_RELU_5X8S4__WASMSIMD,strided_cm)2948 TEST(F32_GEMM_RELU_5X8S4__WASMSIMD, strided_cm) {
2949 GemmMicrokernelTester()
2950 .mr(5)
2951 .nr(8)
2952 .kr(1)
2953 .sr(4)
2954 .m(5)
2955 .n(8)
2956 .k(4)
2957 .cm_stride(11)
2958 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd);
2959 }
2960 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2961
2962
2963 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4)2964 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4) {
2965 GemmMicrokernelTester()
2966 .mr(6)
2967 .nr(8)
2968 .kr(1)
2969 .sr(1)
2970 .m(6)
2971 .n(8)
2972 .k(4)
2973 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
2974 }
2975
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,strided_cn)2976 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, strided_cn) {
2977 GemmMicrokernelTester()
2978 .mr(6)
2979 .nr(8)
2980 .kr(1)
2981 .sr(1)
2982 .m(6)
2983 .n(8)
2984 .k(4)
2985 .cn_stride(11)
2986 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
2987 }
2988
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4_strided_a)2989 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
2990 GemmMicrokernelTester()
2991 .mr(6)
2992 .nr(8)
2993 .kr(1)
2994 .sr(1)
2995 .m(6)
2996 .n(8)
2997 .k(4)
2998 .a_stride(7)
2999 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3000 }
3001
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4_subtile)3002 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
3003 for (uint32_t n = 1; n <= 8; n++) {
3004 for (uint32_t m = 1; m <= 6; m++) {
3005 GemmMicrokernelTester()
3006 .mr(6)
3007 .nr(8)
3008 .kr(1)
3009 .sr(1)
3010 .m(m)
3011 .n(n)
3012 .k(4)
3013 .iterations(1)
3014 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3015 }
3016 }
3017 }
3018
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)3019 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
3020 for (uint32_t m = 1; m <= 6; m++) {
3021 GemmMicrokernelTester()
3022 .mr(6)
3023 .nr(8)
3024 .kr(1)
3025 .sr(1)
3026 .m(m)
3027 .n(8)
3028 .k(4)
3029 .iterations(1)
3030 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3031 }
3032 }
3033
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)3034 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
3035 for (uint32_t n = 1; n <= 8; n++) {
3036 GemmMicrokernelTester()
3037 .mr(6)
3038 .nr(8)
3039 .kr(1)
3040 .sr(1)
3041 .m(6)
3042 .n(n)
3043 .k(4)
3044 .iterations(1)
3045 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3046 }
3047 }
3048
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_lt_4)3049 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_lt_4) {
3050 for (size_t k = 1; k < 4; k++) {
3051 GemmMicrokernelTester()
3052 .mr(6)
3053 .nr(8)
3054 .kr(1)
3055 .sr(1)
3056 .m(6)
3057 .n(8)
3058 .k(k)
3059 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3060 }
3061 }
3062
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_lt_4_strided_a)3063 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
3064 for (size_t k = 1; k < 4; k++) {
3065 GemmMicrokernelTester()
3066 .mr(6)
3067 .nr(8)
3068 .kr(1)
3069 .sr(1)
3070 .m(6)
3071 .n(8)
3072 .k(k)
3073 .a_stride(7)
3074 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3075 }
3076 }
3077
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_lt_4_subtile)3078 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
3079 for (size_t k = 1; k < 4; k++) {
3080 for (uint32_t n = 1; n <= 8; n++) {
3081 for (uint32_t m = 1; m <= 6; m++) {
3082 GemmMicrokernelTester()
3083 .mr(6)
3084 .nr(8)
3085 .kr(1)
3086 .sr(1)
3087 .m(m)
3088 .n(n)
3089 .k(k)
3090 .iterations(1)
3091 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3092 }
3093 }
3094 }
3095 }
3096
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_gt_4)3097 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_gt_4) {
3098 for (size_t k = 5; k < 8; k++) {
3099 GemmMicrokernelTester()
3100 .mr(6)
3101 .nr(8)
3102 .kr(1)
3103 .sr(1)
3104 .m(6)
3105 .n(8)
3106 .k(k)
3107 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3108 }
3109 }
3110
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_gt_4_strided_a)3111 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
3112 for (size_t k = 5; k < 8; k++) {
3113 GemmMicrokernelTester()
3114 .mr(6)
3115 .nr(8)
3116 .kr(1)
3117 .sr(1)
3118 .m(6)
3119 .n(8)
3120 .k(k)
3121 .a_stride(11)
3122 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3123 }
3124 }
3125
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_gt_4_subtile)3126 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
3127 for (size_t k = 5; k < 8; k++) {
3128 for (uint32_t n = 1; n <= 8; n++) {
3129 for (uint32_t m = 1; m <= 6; m++) {
3130 GemmMicrokernelTester()
3131 .mr(6)
3132 .nr(8)
3133 .kr(1)
3134 .sr(1)
3135 .m(m)
3136 .n(n)
3137 .k(k)
3138 .iterations(1)
3139 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3140 }
3141 }
3142 }
3143 }
3144
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_div_4)3145 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_div_4) {
3146 for (size_t k = 8; k <= 40; k += 4) {
3147 GemmMicrokernelTester()
3148 .mr(6)
3149 .nr(8)
3150 .kr(1)
3151 .sr(1)
3152 .m(6)
3153 .n(8)
3154 .k(k)
3155 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3156 }
3157 }
3158
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_div_4_strided_a)3159 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
3160 for (size_t k = 8; k <= 40; k += 4) {
3161 GemmMicrokernelTester()
3162 .mr(6)
3163 .nr(8)
3164 .kr(1)
3165 .sr(1)
3166 .m(6)
3167 .n(8)
3168 .k(k)
3169 .a_stride(43)
3170 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3171 }
3172 }
3173
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,k_div_4_subtile)3174 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, k_div_4_subtile) {
3175 for (size_t k = 8; k <= 40; k += 4) {
3176 for (uint32_t n = 1; n <= 8; n++) {
3177 for (uint32_t m = 1; m <= 6; m++) {
3178 GemmMicrokernelTester()
3179 .mr(6)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(m)
3184 .n(n)
3185 .k(k)
3186 .iterations(1)
3187 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3188 }
3189 }
3190 }
3191 }
3192
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8)3193 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8) {
3194 for (uint32_t n = 9; n < 16; n++) {
3195 for (size_t k = 1; k <= 20; k += 5) {
3196 GemmMicrokernelTester()
3197 .mr(6)
3198 .nr(8)
3199 .kr(1)
3200 .sr(1)
3201 .m(6)
3202 .n(n)
3203 .k(k)
3204 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3205 }
3206 }
3207 }
3208
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)3209 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
3210 for (uint32_t n = 9; n < 16; n++) {
3211 for (size_t k = 1; k <= 20; k += 5) {
3212 GemmMicrokernelTester()
3213 .mr(6)
3214 .nr(8)
3215 .kr(1)
3216 .sr(1)
3217 .m(6)
3218 .n(n)
3219 .k(k)
3220 .cn_stride(11)
3221 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3222 }
3223 }
3224 }
3225
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8_strided_a)3226 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
3227 for (uint32_t n = 9; n < 16; n++) {
3228 for (size_t k = 1; k <= 20; k += 5) {
3229 GemmMicrokernelTester()
3230 .mr(6)
3231 .nr(8)
3232 .kr(1)
3233 .sr(1)
3234 .m(6)
3235 .n(n)
3236 .k(k)
3237 .a_stride(23)
3238 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3239 }
3240 }
3241 }
3242
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8_subtile)3243 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
3244 for (uint32_t n = 9; n < 16; n++) {
3245 for (size_t k = 1; k <= 20; k += 5) {
3246 for (uint32_t m = 1; m <= 6; m++) {
3247 GemmMicrokernelTester()
3248 .mr(6)
3249 .nr(8)
3250 .kr(1)
3251 .sr(1)
3252 .m(m)
3253 .n(n)
3254 .k(k)
3255 .iterations(1)
3256 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3257 }
3258 }
3259 }
3260 }
3261
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8)3262 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8) {
3263 for (uint32_t n = 16; n <= 24; n += 8) {
3264 for (size_t k = 1; k <= 20; k += 5) {
3265 GemmMicrokernelTester()
3266 .mr(6)
3267 .nr(8)
3268 .kr(1)
3269 .sr(1)
3270 .m(6)
3271 .n(n)
3272 .k(k)
3273 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3274 }
3275 }
3276 }
3277
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8_strided_cn)3278 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
3279 for (uint32_t n = 16; n <= 24; n += 8) {
3280 for (size_t k = 1; k <= 20; k += 5) {
3281 GemmMicrokernelTester()
3282 .mr(6)
3283 .nr(8)
3284 .kr(1)
3285 .sr(1)
3286 .m(6)
3287 .n(n)
3288 .k(k)
3289 .cn_stride(11)
3290 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3291 }
3292 }
3293 }
3294
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8_strided_a)3295 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
3296 for (uint32_t n = 16; n <= 24; n += 8) {
3297 for (size_t k = 1; k <= 20; k += 5) {
3298 GemmMicrokernelTester()
3299 .mr(6)
3300 .nr(8)
3301 .kr(1)
3302 .sr(1)
3303 .m(6)
3304 .n(n)
3305 .k(k)
3306 .a_stride(23)
3307 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3308 }
3309 }
3310 }
3311
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8_subtile)3312 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8_subtile) {
3313 for (uint32_t n = 16; n <= 24; n += 8) {
3314 for (size_t k = 1; k <= 20; k += 5) {
3315 for (uint32_t m = 1; m <= 6; m++) {
3316 GemmMicrokernelTester()
3317 .mr(6)
3318 .nr(8)
3319 .kr(1)
3320 .sr(1)
3321 .m(m)
3322 .n(n)
3323 .k(k)
3324 .iterations(1)
3325 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3326 }
3327 }
3328 }
3329 }
3330
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,strided_cm_subtile)3331 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, strided_cm_subtile) {
3332 for (size_t k = 1; k <= 20; k += 5) {
3333 for (uint32_t n = 1; n <= 8; n++) {
3334 for (uint32_t m = 1; m <= 6; m++) {
3335 GemmMicrokernelTester()
3336 .mr(6)
3337 .nr(8)
3338 .kr(1)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(k)
3343 .cm_stride(11)
3344 .iterations(1)
3345 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3346 }
3347 }
3348 }
3349 }
3350
TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT,strided_cm)3351 TEST(F32_GEMM_RELU_6X8__WASMSIMD_SPLAT, strided_cm) {
3352 GemmMicrokernelTester()
3353 .mr(6)
3354 .nr(8)
3355 .kr(1)
3356 .sr(1)
3357 .m(6)
3358 .n(8)
3359 .k(4)
3360 .cm_stride(11)
3361 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_splat);
3362 }
3363 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3364
3365
3366 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)3367 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
3368 GemmMicrokernelTester()
3369 .mr(1)
3370 .nr(8)
3371 .kr(1)
3372 .sr(1)
3373 .m(1)
3374 .n(8)
3375 .k(1)
3376 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3377 }
3378
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)3379 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
3380 GemmMicrokernelTester()
3381 .mr(1)
3382 .nr(8)
3383 .kr(1)
3384 .sr(1)
3385 .m(1)
3386 .n(8)
3387 .k(1)
3388 .cn_stride(11)
3389 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3390 }
3391
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)3392 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
3393 GemmMicrokernelTester()
3394 .mr(1)
3395 .nr(8)
3396 .kr(1)
3397 .sr(1)
3398 .m(1)
3399 .n(8)
3400 .k(1)
3401 .a_stride(3)
3402 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3403 }
3404
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)3405 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
3406 for (uint32_t n = 1; n <= 8; n++) {
3407 for (uint32_t m = 1; m <= 1; m++) {
3408 GemmMicrokernelTester()
3409 .mr(1)
3410 .nr(8)
3411 .kr(1)
3412 .sr(1)
3413 .m(m)
3414 .n(n)
3415 .k(1)
3416 .iterations(1)
3417 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3418 }
3419 }
3420 }
3421
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)3422 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
3423 for (uint32_t m = 1; m <= 1; m++) {
3424 GemmMicrokernelTester()
3425 .mr(1)
3426 .nr(8)
3427 .kr(1)
3428 .sr(1)
3429 .m(m)
3430 .n(8)
3431 .k(1)
3432 .iterations(1)
3433 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3434 }
3435 }
3436
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)3437 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
3438 for (uint32_t n = 1; n <= 8; n++) {
3439 GemmMicrokernelTester()
3440 .mr(1)
3441 .nr(8)
3442 .kr(1)
3443 .sr(1)
3444 .m(1)
3445 .n(n)
3446 .k(1)
3447 .iterations(1)
3448 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3449 }
3450 }
3451
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)3452 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
3453 for (size_t k = 2; k < 10; k++) {
3454 GemmMicrokernelTester()
3455 .mr(1)
3456 .nr(8)
3457 .kr(1)
3458 .sr(1)
3459 .m(1)
3460 .n(8)
3461 .k(k)
3462 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3463 }
3464 }
3465
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)3466 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
3467 for (size_t k = 2; k < 10; k++) {
3468 GemmMicrokernelTester()
3469 .mr(1)
3470 .nr(8)
3471 .kr(1)
3472 .sr(1)
3473 .m(1)
3474 .n(8)
3475 .k(k)
3476 .a_stride(11)
3477 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3478 }
3479 }
3480
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)3481 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
3482 for (size_t k = 2; k < 10; k++) {
3483 for (uint32_t n = 1; n <= 8; n++) {
3484 for (uint32_t m = 1; m <= 1; m++) {
3485 GemmMicrokernelTester()
3486 .mr(1)
3487 .nr(8)
3488 .kr(1)
3489 .sr(1)
3490 .m(m)
3491 .n(n)
3492 .k(k)
3493 .iterations(1)
3494 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3495 }
3496 }
3497 }
3498 }
3499
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)3500 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
3501 for (uint32_t n = 9; n < 16; n++) {
3502 for (size_t k = 1; k <= 5; k += 2) {
3503 GemmMicrokernelTester()
3504 .mr(1)
3505 .nr(8)
3506 .kr(1)
3507 .sr(1)
3508 .m(1)
3509 .n(n)
3510 .k(k)
3511 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3512 }
3513 }
3514 }
3515
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)3516 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
3517 for (uint32_t n = 9; n < 16; n++) {
3518 for (size_t k = 1; k <= 5; k += 2) {
3519 GemmMicrokernelTester()
3520 .mr(1)
3521 .nr(8)
3522 .kr(1)
3523 .sr(1)
3524 .m(1)
3525 .n(n)
3526 .k(k)
3527 .cn_stride(11)
3528 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3529 }
3530 }
3531 }
3532
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)3533 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
3534 for (uint32_t n = 9; n < 16; n++) {
3535 for (size_t k = 1; k <= 5; k += 2) {
3536 GemmMicrokernelTester()
3537 .mr(1)
3538 .nr(8)
3539 .kr(1)
3540 .sr(1)
3541 .m(1)
3542 .n(n)
3543 .k(k)
3544 .a_stride(7)
3545 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3546 }
3547 }
3548 }
3549
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)3550 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
3551 for (uint32_t n = 9; n < 16; n++) {
3552 for (size_t k = 1; k <= 5; k += 2) {
3553 for (uint32_t m = 1; m <= 1; m++) {
3554 GemmMicrokernelTester()
3555 .mr(1)
3556 .nr(8)
3557 .kr(1)
3558 .sr(1)
3559 .m(m)
3560 .n(n)
3561 .k(k)
3562 .iterations(1)
3563 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3564 }
3565 }
3566 }
3567 }
3568
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)3569 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
3570 for (uint32_t n = 16; n <= 24; n += 8) {
3571 for (size_t k = 1; k <= 5; k += 2) {
3572 GemmMicrokernelTester()
3573 .mr(1)
3574 .nr(8)
3575 .kr(1)
3576 .sr(1)
3577 .m(1)
3578 .n(n)
3579 .k(k)
3580 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3581 }
3582 }
3583 }
3584
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)3585 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
3586 for (uint32_t n = 16; n <= 24; n += 8) {
3587 for (size_t k = 1; k <= 5; k += 2) {
3588 GemmMicrokernelTester()
3589 .mr(1)
3590 .nr(8)
3591 .kr(1)
3592 .sr(1)
3593 .m(1)
3594 .n(n)
3595 .k(k)
3596 .cn_stride(11)
3597 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3598 }
3599 }
3600 }
3601
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)3602 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
3603 for (uint32_t n = 16; n <= 24; n += 8) {
3604 for (size_t k = 1; k <= 5; k += 2) {
3605 GemmMicrokernelTester()
3606 .mr(1)
3607 .nr(8)
3608 .kr(1)
3609 .sr(1)
3610 .m(1)
3611 .n(n)
3612 .k(k)
3613 .a_stride(7)
3614 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3615 }
3616 }
3617 }
3618
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)3619 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
3620 for (uint32_t n = 16; n <= 24; n += 8) {
3621 for (size_t k = 1; k <= 5; k += 2) {
3622 for (uint32_t m = 1; m <= 1; m++) {
3623 GemmMicrokernelTester()
3624 .mr(1)
3625 .nr(8)
3626 .kr(1)
3627 .sr(1)
3628 .m(m)
3629 .n(n)
3630 .k(k)
3631 .iterations(1)
3632 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3633 }
3634 }
3635 }
3636 }
3637
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)3638 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
3639 for (size_t k = 1; k <= 5; k += 2) {
3640 for (uint32_t n = 1; n <= 8; n++) {
3641 for (uint32_t m = 1; m <= 1; m++) {
3642 GemmMicrokernelTester()
3643 .mr(1)
3644 .nr(8)
3645 .kr(1)
3646 .sr(1)
3647 .m(m)
3648 .n(n)
3649 .k(k)
3650 .cm_stride(11)
3651 .iterations(1)
3652 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3653 }
3654 }
3655 }
3656 }
3657
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)3658 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
3659 GemmMicrokernelTester()
3660 .mr(1)
3661 .nr(8)
3662 .kr(1)
3663 .sr(1)
3664 .m(1)
3665 .n(8)
3666 .k(1)
3667 .cm_stride(11)
3668 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3669 }
3670 #endif // XNN_ARCH_WASMRELAXEDSIMD
3671
3672
3673 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)3674 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3675 GemmMicrokernelTester()
3676 .mr(1)
3677 .nr(8)
3678 .kr(1)
3679 .sr(4)
3680 .m(1)
3681 .n(8)
3682 .k(4)
3683 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3684 }
3685
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,strided_cn)3686 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
3687 GemmMicrokernelTester()
3688 .mr(1)
3689 .nr(8)
3690 .kr(1)
3691 .sr(4)
3692 .m(1)
3693 .n(8)
3694 .k(4)
3695 .cn_stride(11)
3696 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3697 }
3698
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)3699 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
3700 GemmMicrokernelTester()
3701 .mr(1)
3702 .nr(8)
3703 .kr(1)
3704 .sr(4)
3705 .m(1)
3706 .n(8)
3707 .k(4)
3708 .a_stride(7)
3709 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3710 }
3711
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)3712 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
3713 for (uint32_t n = 1; n <= 8; n++) {
3714 for (uint32_t m = 1; m <= 1; m++) {
3715 GemmMicrokernelTester()
3716 .mr(1)
3717 .nr(8)
3718 .kr(1)
3719 .sr(4)
3720 .m(m)
3721 .n(n)
3722 .k(4)
3723 .iterations(1)
3724 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3725 }
3726 }
3727 }
3728
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)3729 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
3730 for (uint32_t m = 1; m <= 1; m++) {
3731 GemmMicrokernelTester()
3732 .mr(1)
3733 .nr(8)
3734 .kr(1)
3735 .sr(4)
3736 .m(m)
3737 .n(8)
3738 .k(4)
3739 .iterations(1)
3740 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3741 }
3742 }
3743
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)3744 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
3745 for (uint32_t n = 1; n <= 8; n++) {
3746 GemmMicrokernelTester()
3747 .mr(1)
3748 .nr(8)
3749 .kr(1)
3750 .sr(4)
3751 .m(1)
3752 .n(n)
3753 .k(4)
3754 .iterations(1)
3755 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3756 }
3757 }
3758
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)3759 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
3760 for (size_t k = 1; k < 4; k++) {
3761 GemmMicrokernelTester()
3762 .mr(1)
3763 .nr(8)
3764 .kr(1)
3765 .sr(4)
3766 .m(1)
3767 .n(8)
3768 .k(k)
3769 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3770 }
3771 }
3772
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)3773 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
3774 for (size_t k = 1; k < 4; k++) {
3775 GemmMicrokernelTester()
3776 .mr(1)
3777 .nr(8)
3778 .kr(1)
3779 .sr(4)
3780 .m(1)
3781 .n(8)
3782 .k(k)
3783 .a_stride(7)
3784 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3785 }
3786 }
3787
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)3788 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
3789 for (size_t k = 1; k < 4; k++) {
3790 for (uint32_t n = 1; n <= 8; n++) {
3791 for (uint32_t m = 1; m <= 1; m++) {
3792 GemmMicrokernelTester()
3793 .mr(1)
3794 .nr(8)
3795 .kr(1)
3796 .sr(4)
3797 .m(m)
3798 .n(n)
3799 .k(k)
3800 .iterations(1)
3801 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3802 }
3803 }
3804 }
3805 }
3806
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)3807 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
3808 for (size_t k = 5; k < 8; k++) {
3809 GemmMicrokernelTester()
3810 .mr(1)
3811 .nr(8)
3812 .kr(1)
3813 .sr(4)
3814 .m(1)
3815 .n(8)
3816 .k(k)
3817 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3818 }
3819 }
3820
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)3821 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
3822 for (size_t k = 5; k < 8; k++) {
3823 GemmMicrokernelTester()
3824 .mr(1)
3825 .nr(8)
3826 .kr(1)
3827 .sr(4)
3828 .m(1)
3829 .n(8)
3830 .k(k)
3831 .a_stride(11)
3832 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3833 }
3834 }
3835
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)3836 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
3837 for (size_t k = 5; k < 8; k++) {
3838 for (uint32_t n = 1; n <= 8; n++) {
3839 for (uint32_t m = 1; m <= 1; m++) {
3840 GemmMicrokernelTester()
3841 .mr(1)
3842 .nr(8)
3843 .kr(1)
3844 .sr(4)
3845 .m(m)
3846 .n(n)
3847 .k(k)
3848 .iterations(1)
3849 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3850 }
3851 }
3852 }
3853 }
3854
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4)3855 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
3856 for (size_t k = 8; k <= 40; k += 4) {
3857 GemmMicrokernelTester()
3858 .mr(1)
3859 .nr(8)
3860 .kr(1)
3861 .sr(4)
3862 .m(1)
3863 .n(8)
3864 .k(k)
3865 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3866 }
3867 }
3868
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)3869 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
3870 for (size_t k = 8; k <= 40; k += 4) {
3871 GemmMicrokernelTester()
3872 .mr(1)
3873 .nr(8)
3874 .kr(1)
3875 .sr(4)
3876 .m(1)
3877 .n(8)
3878 .k(k)
3879 .a_stride(43)
3880 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3881 }
3882 }
3883
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)3884 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
3885 for (size_t k = 8; k <= 40; k += 4) {
3886 for (uint32_t n = 1; n <= 8; n++) {
3887 for (uint32_t m = 1; m <= 1; m++) {
3888 GemmMicrokernelTester()
3889 .mr(1)
3890 .nr(8)
3891 .kr(1)
3892 .sr(4)
3893 .m(m)
3894 .n(n)
3895 .k(k)
3896 .iterations(1)
3897 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3898 }
3899 }
3900 }
3901 }
3902
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)3903 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
3904 for (uint32_t n = 9; n < 16; n++) {
3905 for (size_t k = 1; k <= 20; k += 5) {
3906 GemmMicrokernelTester()
3907 .mr(1)
3908 .nr(8)
3909 .kr(1)
3910 .sr(4)
3911 .m(1)
3912 .n(n)
3913 .k(k)
3914 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3915 }
3916 }
3917 }
3918
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)3919 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
3920 for (uint32_t n = 9; n < 16; n++) {
3921 for (size_t k = 1; k <= 20; k += 5) {
3922 GemmMicrokernelTester()
3923 .mr(1)
3924 .nr(8)
3925 .kr(1)
3926 .sr(4)
3927 .m(1)
3928 .n(n)
3929 .k(k)
3930 .cn_stride(11)
3931 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3932 }
3933 }
3934 }
3935
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)3936 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
3937 for (uint32_t n = 9; n < 16; n++) {
3938 for (size_t k = 1; k <= 20; k += 5) {
3939 GemmMicrokernelTester()
3940 .mr(1)
3941 .nr(8)
3942 .kr(1)
3943 .sr(4)
3944 .m(1)
3945 .n(n)
3946 .k(k)
3947 .a_stride(23)
3948 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3949 }
3950 }
3951 }
3952
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)3953 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
3954 for (uint32_t n = 9; n < 16; n++) {
3955 for (size_t k = 1; k <= 20; k += 5) {
3956 for (uint32_t m = 1; m <= 1; m++) {
3957 GemmMicrokernelTester()
3958 .mr(1)
3959 .nr(8)
3960 .kr(1)
3961 .sr(4)
3962 .m(m)
3963 .n(n)
3964 .k(k)
3965 .iterations(1)
3966 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3967 }
3968 }
3969 }
3970 }
3971
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8)3972 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
3973 for (uint32_t n = 16; n <= 24; n += 8) {
3974 for (size_t k = 1; k <= 20; k += 5) {
3975 GemmMicrokernelTester()
3976 .mr(1)
3977 .nr(8)
3978 .kr(1)
3979 .sr(4)
3980 .m(1)
3981 .n(n)
3982 .k(k)
3983 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3984 }
3985 }
3986 }
3987
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)3988 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
3989 for (uint32_t n = 16; n <= 24; n += 8) {
3990 for (size_t k = 1; k <= 20; k += 5) {
3991 GemmMicrokernelTester()
3992 .mr(1)
3993 .nr(8)
3994 .kr(1)
3995 .sr(4)
3996 .m(1)
3997 .n(n)
3998 .k(k)
3999 .cn_stride(11)
4000 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
4001 }
4002 }
4003 }
4004
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)4005 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
4006 for (uint32_t n = 16; n <= 24; n += 8) {
4007 for (size_t k = 1; k <= 20; k += 5) {
4008 GemmMicrokernelTester()
4009 .mr(1)
4010 .nr(8)
4011 .kr(1)
4012 .sr(4)
4013 .m(1)
4014 .n(n)
4015 .k(k)
4016 .a_stride(23)
4017 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
4018 }
4019 }
4020 }
4021
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)4022 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
4023 for (uint32_t n = 16; n <= 24; n += 8) {
4024 for (size_t k = 1; k <= 20; k += 5) {
4025 for (uint32_t m = 1; m <= 1; m++) {
4026 GemmMicrokernelTester()
4027 .mr(1)
4028 .nr(8)
4029 .kr(1)
4030 .sr(4)
4031 .m(m)
4032 .n(n)
4033 .k(k)
4034 .iterations(1)
4035 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
4036 }
4037 }
4038 }
4039 }
4040
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4041 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4042 for (size_t k = 1; k <= 20; k += 5) {
4043 for (uint32_t n = 1; n <= 8; n++) {
4044 for (uint32_t m = 1; m <= 1; m++) {
4045 GemmMicrokernelTester()
4046 .mr(1)
4047 .nr(8)
4048 .kr(1)
4049 .sr(4)
4050 .m(m)
4051 .n(n)
4052 .k(k)
4053 .cm_stride(11)
4054 .iterations(1)
4055 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
4056 }
4057 }
4058 }
4059 }
4060
TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm)4061 TEST(F32_GEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
4062 GemmMicrokernelTester()
4063 .mr(1)
4064 .nr(8)
4065 .kr(1)
4066 .sr(4)
4067 .m(1)
4068 .n(8)
4069 .k(4)
4070 .cm_stride(11)
4071 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
4072 }
4073 #endif // XNN_ARCH_WASMRELAXEDSIMD
4074
4075
4076 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)4077 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
4078 GemmMicrokernelTester()
4079 .mr(3)
4080 .nr(8)
4081 .kr(1)
4082 .sr(1)
4083 .m(3)
4084 .n(8)
4085 .k(1)
4086 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4087 }
4088
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)4089 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
4090 GemmMicrokernelTester()
4091 .mr(3)
4092 .nr(8)
4093 .kr(1)
4094 .sr(1)
4095 .m(3)
4096 .n(8)
4097 .k(1)
4098 .cn_stride(11)
4099 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4100 }
4101
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)4102 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
4103 GemmMicrokernelTester()
4104 .mr(3)
4105 .nr(8)
4106 .kr(1)
4107 .sr(1)
4108 .m(3)
4109 .n(8)
4110 .k(1)
4111 .a_stride(3)
4112 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4113 }
4114
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)4115 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
4116 for (uint32_t n = 1; n <= 8; n++) {
4117 for (uint32_t m = 1; m <= 3; m++) {
4118 GemmMicrokernelTester()
4119 .mr(3)
4120 .nr(8)
4121 .kr(1)
4122 .sr(1)
4123 .m(m)
4124 .n(n)
4125 .k(1)
4126 .iterations(1)
4127 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4128 }
4129 }
4130 }
4131
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)4132 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
4133 for (uint32_t m = 1; m <= 3; m++) {
4134 GemmMicrokernelTester()
4135 .mr(3)
4136 .nr(8)
4137 .kr(1)
4138 .sr(1)
4139 .m(m)
4140 .n(8)
4141 .k(1)
4142 .iterations(1)
4143 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4144 }
4145 }
4146
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)4147 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
4148 for (uint32_t n = 1; n <= 8; n++) {
4149 GemmMicrokernelTester()
4150 .mr(3)
4151 .nr(8)
4152 .kr(1)
4153 .sr(1)
4154 .m(3)
4155 .n(n)
4156 .k(1)
4157 .iterations(1)
4158 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4159 }
4160 }
4161
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)4162 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
4163 for (size_t k = 2; k < 10; k++) {
4164 GemmMicrokernelTester()
4165 .mr(3)
4166 .nr(8)
4167 .kr(1)
4168 .sr(1)
4169 .m(3)
4170 .n(8)
4171 .k(k)
4172 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4173 }
4174 }
4175
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)4176 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
4177 for (size_t k = 2; k < 10; k++) {
4178 GemmMicrokernelTester()
4179 .mr(3)
4180 .nr(8)
4181 .kr(1)
4182 .sr(1)
4183 .m(3)
4184 .n(8)
4185 .k(k)
4186 .a_stride(11)
4187 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4188 }
4189 }
4190
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)4191 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
4192 for (size_t k = 2; k < 10; k++) {
4193 for (uint32_t n = 1; n <= 8; n++) {
4194 for (uint32_t m = 1; m <= 3; m++) {
4195 GemmMicrokernelTester()
4196 .mr(3)
4197 .nr(8)
4198 .kr(1)
4199 .sr(1)
4200 .m(m)
4201 .n(n)
4202 .k(k)
4203 .iterations(1)
4204 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4205 }
4206 }
4207 }
4208 }
4209
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)4210 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
4211 for (uint32_t n = 9; n < 16; n++) {
4212 for (size_t k = 1; k <= 5; k += 2) {
4213 GemmMicrokernelTester()
4214 .mr(3)
4215 .nr(8)
4216 .kr(1)
4217 .sr(1)
4218 .m(3)
4219 .n(n)
4220 .k(k)
4221 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4222 }
4223 }
4224 }
4225
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)4226 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
4227 for (uint32_t n = 9; n < 16; n++) {
4228 for (size_t k = 1; k <= 5; k += 2) {
4229 GemmMicrokernelTester()
4230 .mr(3)
4231 .nr(8)
4232 .kr(1)
4233 .sr(1)
4234 .m(3)
4235 .n(n)
4236 .k(k)
4237 .cn_stride(11)
4238 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4239 }
4240 }
4241 }
4242
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)4243 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
4244 for (uint32_t n = 9; n < 16; n++) {
4245 for (size_t k = 1; k <= 5; k += 2) {
4246 GemmMicrokernelTester()
4247 .mr(3)
4248 .nr(8)
4249 .kr(1)
4250 .sr(1)
4251 .m(3)
4252 .n(n)
4253 .k(k)
4254 .a_stride(7)
4255 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4256 }
4257 }
4258 }
4259
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)4260 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
4261 for (uint32_t n = 9; n < 16; n++) {
4262 for (size_t k = 1; k <= 5; k += 2) {
4263 for (uint32_t m = 1; m <= 3; m++) {
4264 GemmMicrokernelTester()
4265 .mr(3)
4266 .nr(8)
4267 .kr(1)
4268 .sr(1)
4269 .m(m)
4270 .n(n)
4271 .k(k)
4272 .iterations(1)
4273 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4274 }
4275 }
4276 }
4277 }
4278
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)4279 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
4280 for (uint32_t n = 16; n <= 24; n += 8) {
4281 for (size_t k = 1; k <= 5; k += 2) {
4282 GemmMicrokernelTester()
4283 .mr(3)
4284 .nr(8)
4285 .kr(1)
4286 .sr(1)
4287 .m(3)
4288 .n(n)
4289 .k(k)
4290 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4291 }
4292 }
4293 }
4294
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)4295 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
4296 for (uint32_t n = 16; n <= 24; n += 8) {
4297 for (size_t k = 1; k <= 5; k += 2) {
4298 GemmMicrokernelTester()
4299 .mr(3)
4300 .nr(8)
4301 .kr(1)
4302 .sr(1)
4303 .m(3)
4304 .n(n)
4305 .k(k)
4306 .cn_stride(11)
4307 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4308 }
4309 }
4310 }
4311
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)4312 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
4313 for (uint32_t n = 16; n <= 24; n += 8) {
4314 for (size_t k = 1; k <= 5; k += 2) {
4315 GemmMicrokernelTester()
4316 .mr(3)
4317 .nr(8)
4318 .kr(1)
4319 .sr(1)
4320 .m(3)
4321 .n(n)
4322 .k(k)
4323 .a_stride(7)
4324 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4325 }
4326 }
4327 }
4328
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)4329 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
4330 for (uint32_t n = 16; n <= 24; n += 8) {
4331 for (size_t k = 1; k <= 5; k += 2) {
4332 for (uint32_t m = 1; m <= 3; m++) {
4333 GemmMicrokernelTester()
4334 .mr(3)
4335 .nr(8)
4336 .kr(1)
4337 .sr(1)
4338 .m(m)
4339 .n(n)
4340 .k(k)
4341 .iterations(1)
4342 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4343 }
4344 }
4345 }
4346 }
4347
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)4348 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
4349 for (size_t k = 1; k <= 5; k += 2) {
4350 for (uint32_t n = 1; n <= 8; n++) {
4351 for (uint32_t m = 1; m <= 3; m++) {
4352 GemmMicrokernelTester()
4353 .mr(3)
4354 .nr(8)
4355 .kr(1)
4356 .sr(1)
4357 .m(m)
4358 .n(n)
4359 .k(k)
4360 .cm_stride(11)
4361 .iterations(1)
4362 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4363 }
4364 }
4365 }
4366 }
4367
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)4368 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
4369 GemmMicrokernelTester()
4370 .mr(3)
4371 .nr(8)
4372 .kr(1)
4373 .sr(1)
4374 .m(3)
4375 .n(8)
4376 .k(1)
4377 .cm_stride(11)
4378 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4379 }
4380 #endif // XNN_ARCH_WASMRELAXEDSIMD
4381
4382
4383 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4)4384 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4) {
4385 GemmMicrokernelTester()
4386 .mr(4)
4387 .nr(2)
4388 .kr(4)
4389 .sr(1)
4390 .m(4)
4391 .n(2)
4392 .k(4)
4393 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4394 }
4395
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,strided_cn)4396 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, strided_cn) {
4397 GemmMicrokernelTester()
4398 .mr(4)
4399 .nr(2)
4400 .kr(4)
4401 .sr(1)
4402 .m(4)
4403 .n(2)
4404 .k(4)
4405 .cn_stride(5)
4406 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4407 }
4408
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)4409 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
4410 GemmMicrokernelTester()
4411 .mr(4)
4412 .nr(2)
4413 .kr(4)
4414 .sr(1)
4415 .m(4)
4416 .n(2)
4417 .k(4)
4418 .a_stride(7)
4419 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4420 }
4421
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4422 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4423 for (uint32_t n = 1; n <= 2; n++) {
4424 for (uint32_t m = 1; m <= 4; m++) {
4425 GemmMicrokernelTester()
4426 .mr(4)
4427 .nr(2)
4428 .kr(4)
4429 .sr(1)
4430 .m(m)
4431 .n(n)
4432 .k(4)
4433 .iterations(1)
4434 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4435 }
4436 }
4437 }
4438
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4439 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4440 for (uint32_t m = 1; m <= 4; m++) {
4441 GemmMicrokernelTester()
4442 .mr(4)
4443 .nr(2)
4444 .kr(4)
4445 .sr(1)
4446 .m(m)
4447 .n(2)
4448 .k(4)
4449 .iterations(1)
4450 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4451 }
4452 }
4453
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4454 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4455 for (uint32_t n = 1; n <= 2; n++) {
4456 GemmMicrokernelTester()
4457 .mr(4)
4458 .nr(2)
4459 .kr(4)
4460 .sr(1)
4461 .m(4)
4462 .n(n)
4463 .k(4)
4464 .iterations(1)
4465 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4466 }
4467 }
4468
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4)4469 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4470 for (size_t k = 1; k < 4; k++) {
4471 GemmMicrokernelTester()
4472 .mr(4)
4473 .nr(2)
4474 .kr(4)
4475 .sr(1)
4476 .m(4)
4477 .n(2)
4478 .k(k)
4479 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4480 }
4481 }
4482
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)4483 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
4484 for (size_t k = 1; k < 4; k++) {
4485 GemmMicrokernelTester()
4486 .mr(4)
4487 .nr(2)
4488 .kr(4)
4489 .sr(1)
4490 .m(4)
4491 .n(2)
4492 .k(k)
4493 .a_stride(7)
4494 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4495 }
4496 }
4497
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4498 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4499 for (size_t k = 1; k < 4; k++) {
4500 for (uint32_t n = 1; n <= 2; n++) {
4501 for (uint32_t m = 1; m <= 4; m++) {
4502 GemmMicrokernelTester()
4503 .mr(4)
4504 .nr(2)
4505 .kr(4)
4506 .sr(1)
4507 .m(m)
4508 .n(n)
4509 .k(k)
4510 .iterations(1)
4511 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4512 }
4513 }
4514 }
4515 }
4516
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4)4517 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4518 for (size_t k = 5; k < 8; k++) {
4519 GemmMicrokernelTester()
4520 .mr(4)
4521 .nr(2)
4522 .kr(4)
4523 .sr(1)
4524 .m(4)
4525 .n(2)
4526 .k(k)
4527 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4528 }
4529 }
4530
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)4531 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
4532 for (size_t k = 5; k < 8; k++) {
4533 GemmMicrokernelTester()
4534 .mr(4)
4535 .nr(2)
4536 .kr(4)
4537 .sr(1)
4538 .m(4)
4539 .n(2)
4540 .k(k)
4541 .a_stride(11)
4542 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4543 }
4544 }
4545
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4546 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4547 for (size_t k = 5; k < 8; k++) {
4548 for (uint32_t n = 1; n <= 2; n++) {
4549 for (uint32_t m = 1; m <= 4; m++) {
4550 GemmMicrokernelTester()
4551 .mr(4)
4552 .nr(2)
4553 .kr(4)
4554 .sr(1)
4555 .m(m)
4556 .n(n)
4557 .k(k)
4558 .iterations(1)
4559 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4560 }
4561 }
4562 }
4563 }
4564
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4)4565 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4) {
4566 for (size_t k = 8; k <= 40; k += 4) {
4567 GemmMicrokernelTester()
4568 .mr(4)
4569 .nr(2)
4570 .kr(4)
4571 .sr(1)
4572 .m(4)
4573 .n(2)
4574 .k(k)
4575 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4576 }
4577 }
4578
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)4579 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
4580 for (size_t k = 8; k <= 40; k += 4) {
4581 GemmMicrokernelTester()
4582 .mr(4)
4583 .nr(2)
4584 .kr(4)
4585 .sr(1)
4586 .m(4)
4587 .n(2)
4588 .k(k)
4589 .a_stride(43)
4590 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4591 }
4592 }
4593
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4594 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4595 for (size_t k = 8; k <= 40; k += 4) {
4596 for (uint32_t n = 1; n <= 2; n++) {
4597 for (uint32_t m = 1; m <= 4; m++) {
4598 GemmMicrokernelTester()
4599 .mr(4)
4600 .nr(2)
4601 .kr(4)
4602 .sr(1)
4603 .m(m)
4604 .n(n)
4605 .k(k)
4606 .iterations(1)
4607 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4608 }
4609 }
4610 }
4611 }
4612
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2)4613 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2) {
4614 for (uint32_t n = 3; n < 4; n++) {
4615 for (size_t k = 1; k <= 20; k += 5) {
4616 GemmMicrokernelTester()
4617 .mr(4)
4618 .nr(2)
4619 .kr(4)
4620 .sr(1)
4621 .m(4)
4622 .n(n)
4623 .k(k)
4624 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4625 }
4626 }
4627 }
4628
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_strided_cn)4629 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_strided_cn) {
4630 for (uint32_t n = 3; n < 4; n++) {
4631 for (size_t k = 1; k <= 20; k += 5) {
4632 GemmMicrokernelTester()
4633 .mr(4)
4634 .nr(2)
4635 .kr(4)
4636 .sr(1)
4637 .m(4)
4638 .n(n)
4639 .k(k)
4640 .cn_stride(5)
4641 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4642 }
4643 }
4644 }
4645
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_strided_a)4646 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_strided_a) {
4647 for (uint32_t n = 3; n < 4; n++) {
4648 for (size_t k = 1; k <= 20; k += 5) {
4649 GemmMicrokernelTester()
4650 .mr(4)
4651 .nr(2)
4652 .kr(4)
4653 .sr(1)
4654 .m(4)
4655 .n(n)
4656 .k(k)
4657 .a_stride(23)
4658 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4659 }
4660 }
4661 }
4662
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_subtile)4663 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_subtile) {
4664 for (uint32_t n = 3; n < 4; n++) {
4665 for (size_t k = 1; k <= 20; k += 5) {
4666 for (uint32_t m = 1; m <= 4; m++) {
4667 GemmMicrokernelTester()
4668 .mr(4)
4669 .nr(2)
4670 .kr(4)
4671 .sr(1)
4672 .m(m)
4673 .n(n)
4674 .k(k)
4675 .iterations(1)
4676 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4677 }
4678 }
4679 }
4680 }
4681
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2)4682 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2) {
4683 for (uint32_t n = 4; n <= 6; n += 2) {
4684 for (size_t k = 1; k <= 20; k += 5) {
4685 GemmMicrokernelTester()
4686 .mr(4)
4687 .nr(2)
4688 .kr(4)
4689 .sr(1)
4690 .m(4)
4691 .n(n)
4692 .k(k)
4693 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4694 }
4695 }
4696 }
4697
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_strided_cn)4698 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_strided_cn) {
4699 for (uint32_t n = 4; n <= 6; n += 2) {
4700 for (size_t k = 1; k <= 20; k += 5) {
4701 GemmMicrokernelTester()
4702 .mr(4)
4703 .nr(2)
4704 .kr(4)
4705 .sr(1)
4706 .m(4)
4707 .n(n)
4708 .k(k)
4709 .cn_stride(5)
4710 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4711 }
4712 }
4713 }
4714
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_strided_a)4715 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_strided_a) {
4716 for (uint32_t n = 4; n <= 6; n += 2) {
4717 for (size_t k = 1; k <= 20; k += 5) {
4718 GemmMicrokernelTester()
4719 .mr(4)
4720 .nr(2)
4721 .kr(4)
4722 .sr(1)
4723 .m(4)
4724 .n(n)
4725 .k(k)
4726 .a_stride(23)
4727 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4728 }
4729 }
4730 }
4731
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_subtile)4732 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_subtile) {
4733 for (uint32_t n = 4; n <= 6; n += 2) {
4734 for (size_t k = 1; k <= 20; k += 5) {
4735 for (uint32_t m = 1; m <= 4; m++) {
4736 GemmMicrokernelTester()
4737 .mr(4)
4738 .nr(2)
4739 .kr(4)
4740 .sr(1)
4741 .m(m)
4742 .n(n)
4743 .k(k)
4744 .iterations(1)
4745 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4746 }
4747 }
4748 }
4749 }
4750
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4751 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4752 for (size_t k = 1; k <= 20; k += 5) {
4753 for (uint32_t n = 1; n <= 2; n++) {
4754 for (uint32_t m = 1; m <= 4; m++) {
4755 GemmMicrokernelTester()
4756 .mr(4)
4757 .nr(2)
4758 .kr(4)
4759 .sr(1)
4760 .m(m)
4761 .n(n)
4762 .k(k)
4763 .cm_stride(5)
4764 .iterations(1)
4765 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4766 }
4767 }
4768 }
4769 }
4770
TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm)4771 TEST(F32_GEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm) {
4772 GemmMicrokernelTester()
4773 .mr(4)
4774 .nr(2)
4775 .kr(4)
4776 .sr(1)
4777 .m(4)
4778 .n(2)
4779 .k(4)
4780 .cm_stride(5)
4781 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4782 }
4783 #endif // XNN_ARCH_WASMRELAXEDSIMD
4784
4785
4786 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)4787 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
4788 GemmMicrokernelTester()
4789 .mr(4)
4790 .nr(8)
4791 .kr(1)
4792 .sr(1)
4793 .m(4)
4794 .n(8)
4795 .k(1)
4796 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4797 }
4798
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)4799 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
4800 GemmMicrokernelTester()
4801 .mr(4)
4802 .nr(8)
4803 .kr(1)
4804 .sr(1)
4805 .m(4)
4806 .n(8)
4807 .k(1)
4808 .cn_stride(11)
4809 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4810 }
4811
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)4812 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
4813 GemmMicrokernelTester()
4814 .mr(4)
4815 .nr(8)
4816 .kr(1)
4817 .sr(1)
4818 .m(4)
4819 .n(8)
4820 .k(1)
4821 .a_stride(3)
4822 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4823 }
4824
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)4825 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
4826 for (uint32_t n = 1; n <= 8; n++) {
4827 for (uint32_t m = 1; m <= 4; m++) {
4828 GemmMicrokernelTester()
4829 .mr(4)
4830 .nr(8)
4831 .kr(1)
4832 .sr(1)
4833 .m(m)
4834 .n(n)
4835 .k(1)
4836 .iterations(1)
4837 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4838 }
4839 }
4840 }
4841
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)4842 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
4843 for (uint32_t m = 1; m <= 4; m++) {
4844 GemmMicrokernelTester()
4845 .mr(4)
4846 .nr(8)
4847 .kr(1)
4848 .sr(1)
4849 .m(m)
4850 .n(8)
4851 .k(1)
4852 .iterations(1)
4853 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4854 }
4855 }
4856
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)4857 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
4858 for (uint32_t n = 1; n <= 8; n++) {
4859 GemmMicrokernelTester()
4860 .mr(4)
4861 .nr(8)
4862 .kr(1)
4863 .sr(1)
4864 .m(4)
4865 .n(n)
4866 .k(1)
4867 .iterations(1)
4868 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4869 }
4870 }
4871
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)4872 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
4873 for (size_t k = 2; k < 10; k++) {
4874 GemmMicrokernelTester()
4875 .mr(4)
4876 .nr(8)
4877 .kr(1)
4878 .sr(1)
4879 .m(4)
4880 .n(8)
4881 .k(k)
4882 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4883 }
4884 }
4885
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)4886 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
4887 for (size_t k = 2; k < 10; k++) {
4888 GemmMicrokernelTester()
4889 .mr(4)
4890 .nr(8)
4891 .kr(1)
4892 .sr(1)
4893 .m(4)
4894 .n(8)
4895 .k(k)
4896 .a_stride(11)
4897 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4898 }
4899 }
4900
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)4901 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
4902 for (size_t k = 2; k < 10; k++) {
4903 for (uint32_t n = 1; n <= 8; n++) {
4904 for (uint32_t m = 1; m <= 4; m++) {
4905 GemmMicrokernelTester()
4906 .mr(4)
4907 .nr(8)
4908 .kr(1)
4909 .sr(1)
4910 .m(m)
4911 .n(n)
4912 .k(k)
4913 .iterations(1)
4914 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4915 }
4916 }
4917 }
4918 }
4919
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)4920 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
4921 for (uint32_t n = 9; n < 16; n++) {
4922 for (size_t k = 1; k <= 5; k += 2) {
4923 GemmMicrokernelTester()
4924 .mr(4)
4925 .nr(8)
4926 .kr(1)
4927 .sr(1)
4928 .m(4)
4929 .n(n)
4930 .k(k)
4931 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4932 }
4933 }
4934 }
4935
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)4936 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
4937 for (uint32_t n = 9; n < 16; n++) {
4938 for (size_t k = 1; k <= 5; k += 2) {
4939 GemmMicrokernelTester()
4940 .mr(4)
4941 .nr(8)
4942 .kr(1)
4943 .sr(1)
4944 .m(4)
4945 .n(n)
4946 .k(k)
4947 .cn_stride(11)
4948 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4949 }
4950 }
4951 }
4952
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)4953 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
4954 for (uint32_t n = 9; n < 16; n++) {
4955 for (size_t k = 1; k <= 5; k += 2) {
4956 GemmMicrokernelTester()
4957 .mr(4)
4958 .nr(8)
4959 .kr(1)
4960 .sr(1)
4961 .m(4)
4962 .n(n)
4963 .k(k)
4964 .a_stride(7)
4965 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4966 }
4967 }
4968 }
4969
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)4970 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
4971 for (uint32_t n = 9; n < 16; n++) {
4972 for (size_t k = 1; k <= 5; k += 2) {
4973 for (uint32_t m = 1; m <= 4; m++) {
4974 GemmMicrokernelTester()
4975 .mr(4)
4976 .nr(8)
4977 .kr(1)
4978 .sr(1)
4979 .m(m)
4980 .n(n)
4981 .k(k)
4982 .iterations(1)
4983 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4984 }
4985 }
4986 }
4987 }
4988
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)4989 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
4990 for (uint32_t n = 16; n <= 24; n += 8) {
4991 for (size_t k = 1; k <= 5; k += 2) {
4992 GemmMicrokernelTester()
4993 .mr(4)
4994 .nr(8)
4995 .kr(1)
4996 .sr(1)
4997 .m(4)
4998 .n(n)
4999 .k(k)
5000 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5001 }
5002 }
5003 }
5004
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)5005 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
5006 for (uint32_t n = 16; n <= 24; n += 8) {
5007 for (size_t k = 1; k <= 5; k += 2) {
5008 GemmMicrokernelTester()
5009 .mr(4)
5010 .nr(8)
5011 .kr(1)
5012 .sr(1)
5013 .m(4)
5014 .n(n)
5015 .k(k)
5016 .cn_stride(11)
5017 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5018 }
5019 }
5020 }
5021
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)5022 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
5023 for (uint32_t n = 16; n <= 24; n += 8) {
5024 for (size_t k = 1; k <= 5; k += 2) {
5025 GemmMicrokernelTester()
5026 .mr(4)
5027 .nr(8)
5028 .kr(1)
5029 .sr(1)
5030 .m(4)
5031 .n(n)
5032 .k(k)
5033 .a_stride(7)
5034 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5035 }
5036 }
5037 }
5038
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)5039 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
5040 for (uint32_t n = 16; n <= 24; n += 8) {
5041 for (size_t k = 1; k <= 5; k += 2) {
5042 for (uint32_t m = 1; m <= 4; m++) {
5043 GemmMicrokernelTester()
5044 .mr(4)
5045 .nr(8)
5046 .kr(1)
5047 .sr(1)
5048 .m(m)
5049 .n(n)
5050 .k(k)
5051 .iterations(1)
5052 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5053 }
5054 }
5055 }
5056 }
5057
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)5058 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
5059 for (size_t k = 1; k <= 5; k += 2) {
5060 for (uint32_t n = 1; n <= 8; n++) {
5061 for (uint32_t m = 1; m <= 4; m++) {
5062 GemmMicrokernelTester()
5063 .mr(4)
5064 .nr(8)
5065 .kr(1)
5066 .sr(1)
5067 .m(m)
5068 .n(n)
5069 .k(k)
5070 .cm_stride(11)
5071 .iterations(1)
5072 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5073 }
5074 }
5075 }
5076 }
5077
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5078 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5079 GemmMicrokernelTester()
5080 .mr(4)
5081 .nr(8)
5082 .kr(1)
5083 .sr(1)
5084 .m(4)
5085 .n(8)
5086 .k(1)
5087 .cm_stride(11)
5088 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5089 }
5090 #endif // XNN_ARCH_WASMRELAXEDSIMD
5091
5092
5093 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)5094 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
5095 GemmMicrokernelTester()
5096 .mr(5)
5097 .nr(8)
5098 .kr(1)
5099 .sr(1)
5100 .m(5)
5101 .n(8)
5102 .k(1)
5103 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5104 }
5105
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)5106 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
5107 GemmMicrokernelTester()
5108 .mr(5)
5109 .nr(8)
5110 .kr(1)
5111 .sr(1)
5112 .m(5)
5113 .n(8)
5114 .k(1)
5115 .cn_stride(11)
5116 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5117 }
5118
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)5119 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
5120 GemmMicrokernelTester()
5121 .mr(5)
5122 .nr(8)
5123 .kr(1)
5124 .sr(1)
5125 .m(5)
5126 .n(8)
5127 .k(1)
5128 .a_stride(3)
5129 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5130 }
5131
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)5132 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
5133 for (uint32_t n = 1; n <= 8; n++) {
5134 for (uint32_t m = 1; m <= 5; m++) {
5135 GemmMicrokernelTester()
5136 .mr(5)
5137 .nr(8)
5138 .kr(1)
5139 .sr(1)
5140 .m(m)
5141 .n(n)
5142 .k(1)
5143 .iterations(1)
5144 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5145 }
5146 }
5147 }
5148
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)5149 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
5150 for (uint32_t m = 1; m <= 5; m++) {
5151 GemmMicrokernelTester()
5152 .mr(5)
5153 .nr(8)
5154 .kr(1)
5155 .sr(1)
5156 .m(m)
5157 .n(8)
5158 .k(1)
5159 .iterations(1)
5160 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5161 }
5162 }
5163
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)5164 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
5165 for (uint32_t n = 1; n <= 8; n++) {
5166 GemmMicrokernelTester()
5167 .mr(5)
5168 .nr(8)
5169 .kr(1)
5170 .sr(1)
5171 .m(5)
5172 .n(n)
5173 .k(1)
5174 .iterations(1)
5175 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5176 }
5177 }
5178
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)5179 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
5180 for (size_t k = 2; k < 10; k++) {
5181 GemmMicrokernelTester()
5182 .mr(5)
5183 .nr(8)
5184 .kr(1)
5185 .sr(1)
5186 .m(5)
5187 .n(8)
5188 .k(k)
5189 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5190 }
5191 }
5192
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)5193 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
5194 for (size_t k = 2; k < 10; k++) {
5195 GemmMicrokernelTester()
5196 .mr(5)
5197 .nr(8)
5198 .kr(1)
5199 .sr(1)
5200 .m(5)
5201 .n(8)
5202 .k(k)
5203 .a_stride(11)
5204 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5205 }
5206 }
5207
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)5208 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
5209 for (size_t k = 2; k < 10; k++) {
5210 for (uint32_t n = 1; n <= 8; n++) {
5211 for (uint32_t m = 1; m <= 5; m++) {
5212 GemmMicrokernelTester()
5213 .mr(5)
5214 .nr(8)
5215 .kr(1)
5216 .sr(1)
5217 .m(m)
5218 .n(n)
5219 .k(k)
5220 .iterations(1)
5221 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5222 }
5223 }
5224 }
5225 }
5226
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)5227 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
5228 for (uint32_t n = 9; n < 16; n++) {
5229 for (size_t k = 1; k <= 5; k += 2) {
5230 GemmMicrokernelTester()
5231 .mr(5)
5232 .nr(8)
5233 .kr(1)
5234 .sr(1)
5235 .m(5)
5236 .n(n)
5237 .k(k)
5238 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5239 }
5240 }
5241 }
5242
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)5243 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
5244 for (uint32_t n = 9; n < 16; n++) {
5245 for (size_t k = 1; k <= 5; k += 2) {
5246 GemmMicrokernelTester()
5247 .mr(5)
5248 .nr(8)
5249 .kr(1)
5250 .sr(1)
5251 .m(5)
5252 .n(n)
5253 .k(k)
5254 .cn_stride(11)
5255 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5256 }
5257 }
5258 }
5259
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)5260 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
5261 for (uint32_t n = 9; n < 16; n++) {
5262 for (size_t k = 1; k <= 5; k += 2) {
5263 GemmMicrokernelTester()
5264 .mr(5)
5265 .nr(8)
5266 .kr(1)
5267 .sr(1)
5268 .m(5)
5269 .n(n)
5270 .k(k)
5271 .a_stride(7)
5272 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5273 }
5274 }
5275 }
5276
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)5277 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
5278 for (uint32_t n = 9; n < 16; n++) {
5279 for (size_t k = 1; k <= 5; k += 2) {
5280 for (uint32_t m = 1; m <= 5; m++) {
5281 GemmMicrokernelTester()
5282 .mr(5)
5283 .nr(8)
5284 .kr(1)
5285 .sr(1)
5286 .m(m)
5287 .n(n)
5288 .k(k)
5289 .iterations(1)
5290 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5291 }
5292 }
5293 }
5294 }
5295
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)5296 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
5297 for (uint32_t n = 16; n <= 24; n += 8) {
5298 for (size_t k = 1; k <= 5; k += 2) {
5299 GemmMicrokernelTester()
5300 .mr(5)
5301 .nr(8)
5302 .kr(1)
5303 .sr(1)
5304 .m(5)
5305 .n(n)
5306 .k(k)
5307 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5308 }
5309 }
5310 }
5311
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)5312 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
5313 for (uint32_t n = 16; n <= 24; n += 8) {
5314 for (size_t k = 1; k <= 5; k += 2) {
5315 GemmMicrokernelTester()
5316 .mr(5)
5317 .nr(8)
5318 .kr(1)
5319 .sr(1)
5320 .m(5)
5321 .n(n)
5322 .k(k)
5323 .cn_stride(11)
5324 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5325 }
5326 }
5327 }
5328
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)5329 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
5330 for (uint32_t n = 16; n <= 24; n += 8) {
5331 for (size_t k = 1; k <= 5; k += 2) {
5332 GemmMicrokernelTester()
5333 .mr(5)
5334 .nr(8)
5335 .kr(1)
5336 .sr(1)
5337 .m(5)
5338 .n(n)
5339 .k(k)
5340 .a_stride(7)
5341 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5342 }
5343 }
5344 }
5345
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)5346 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
5347 for (uint32_t n = 16; n <= 24; n += 8) {
5348 for (size_t k = 1; k <= 5; k += 2) {
5349 for (uint32_t m = 1; m <= 5; m++) {
5350 GemmMicrokernelTester()
5351 .mr(5)
5352 .nr(8)
5353 .kr(1)
5354 .sr(1)
5355 .m(m)
5356 .n(n)
5357 .k(k)
5358 .iterations(1)
5359 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5360 }
5361 }
5362 }
5363 }
5364
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)5365 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
5366 for (size_t k = 1; k <= 5; k += 2) {
5367 for (uint32_t n = 1; n <= 8; n++) {
5368 for (uint32_t m = 1; m <= 5; m++) {
5369 GemmMicrokernelTester()
5370 .mr(5)
5371 .nr(8)
5372 .kr(1)
5373 .sr(1)
5374 .m(m)
5375 .n(n)
5376 .k(k)
5377 .cm_stride(11)
5378 .iterations(1)
5379 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5380 }
5381 }
5382 }
5383 }
5384
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5385 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5386 GemmMicrokernelTester()
5387 .mr(5)
5388 .nr(8)
5389 .kr(1)
5390 .sr(1)
5391 .m(5)
5392 .n(8)
5393 .k(1)
5394 .cm_stride(11)
5395 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5396 }
5397 #endif // XNN_ARCH_WASMRELAXEDSIMD
5398
5399
5400 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)5401 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
5402 GemmMicrokernelTester()
5403 .mr(5)
5404 .nr(8)
5405 .kr(1)
5406 .sr(4)
5407 .m(5)
5408 .n(8)
5409 .k(4)
5410 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5411 }
5412
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,strided_cn)5413 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
5414 GemmMicrokernelTester()
5415 .mr(5)
5416 .nr(8)
5417 .kr(1)
5418 .sr(4)
5419 .m(5)
5420 .n(8)
5421 .k(4)
5422 .cn_stride(11)
5423 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5424 }
5425
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)5426 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
5427 GemmMicrokernelTester()
5428 .mr(5)
5429 .nr(8)
5430 .kr(1)
5431 .sr(4)
5432 .m(5)
5433 .n(8)
5434 .k(4)
5435 .a_stride(7)
5436 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5437 }
5438
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)5439 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
5440 for (uint32_t n = 1; n <= 8; n++) {
5441 for (uint32_t m = 1; m <= 5; m++) {
5442 GemmMicrokernelTester()
5443 .mr(5)
5444 .nr(8)
5445 .kr(1)
5446 .sr(4)
5447 .m(m)
5448 .n(n)
5449 .k(4)
5450 .iterations(1)
5451 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5452 }
5453 }
5454 }
5455
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)5456 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
5457 for (uint32_t m = 1; m <= 5; m++) {
5458 GemmMicrokernelTester()
5459 .mr(5)
5460 .nr(8)
5461 .kr(1)
5462 .sr(4)
5463 .m(m)
5464 .n(8)
5465 .k(4)
5466 .iterations(1)
5467 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5468 }
5469 }
5470
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)5471 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
5472 for (uint32_t n = 1; n <= 8; n++) {
5473 GemmMicrokernelTester()
5474 .mr(5)
5475 .nr(8)
5476 .kr(1)
5477 .sr(4)
5478 .m(5)
5479 .n(n)
5480 .k(4)
5481 .iterations(1)
5482 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5483 }
5484 }
5485
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)5486 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
5487 for (size_t k = 1; k < 4; k++) {
5488 GemmMicrokernelTester()
5489 .mr(5)
5490 .nr(8)
5491 .kr(1)
5492 .sr(4)
5493 .m(5)
5494 .n(8)
5495 .k(k)
5496 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5497 }
5498 }
5499
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)5500 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
5501 for (size_t k = 1; k < 4; k++) {
5502 GemmMicrokernelTester()
5503 .mr(5)
5504 .nr(8)
5505 .kr(1)
5506 .sr(4)
5507 .m(5)
5508 .n(8)
5509 .k(k)
5510 .a_stride(7)
5511 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5512 }
5513 }
5514
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)5515 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
5516 for (size_t k = 1; k < 4; k++) {
5517 for (uint32_t n = 1; n <= 8; n++) {
5518 for (uint32_t m = 1; m <= 5; m++) {
5519 GemmMicrokernelTester()
5520 .mr(5)
5521 .nr(8)
5522 .kr(1)
5523 .sr(4)
5524 .m(m)
5525 .n(n)
5526 .k(k)
5527 .iterations(1)
5528 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5529 }
5530 }
5531 }
5532 }
5533
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)5534 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
5535 for (size_t k = 5; k < 8; k++) {
5536 GemmMicrokernelTester()
5537 .mr(5)
5538 .nr(8)
5539 .kr(1)
5540 .sr(4)
5541 .m(5)
5542 .n(8)
5543 .k(k)
5544 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5545 }
5546 }
5547
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)5548 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
5549 for (size_t k = 5; k < 8; k++) {
5550 GemmMicrokernelTester()
5551 .mr(5)
5552 .nr(8)
5553 .kr(1)
5554 .sr(4)
5555 .m(5)
5556 .n(8)
5557 .k(k)
5558 .a_stride(11)
5559 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5560 }
5561 }
5562
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)5563 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
5564 for (size_t k = 5; k < 8; k++) {
5565 for (uint32_t n = 1; n <= 8; n++) {
5566 for (uint32_t m = 1; m <= 5; m++) {
5567 GemmMicrokernelTester()
5568 .mr(5)
5569 .nr(8)
5570 .kr(1)
5571 .sr(4)
5572 .m(m)
5573 .n(n)
5574 .k(k)
5575 .iterations(1)
5576 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5577 }
5578 }
5579 }
5580 }
5581
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4)5582 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
5583 for (size_t k = 8; k <= 40; k += 4) {
5584 GemmMicrokernelTester()
5585 .mr(5)
5586 .nr(8)
5587 .kr(1)
5588 .sr(4)
5589 .m(5)
5590 .n(8)
5591 .k(k)
5592 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5593 }
5594 }
5595
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)5596 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
5597 for (size_t k = 8; k <= 40; k += 4) {
5598 GemmMicrokernelTester()
5599 .mr(5)
5600 .nr(8)
5601 .kr(1)
5602 .sr(4)
5603 .m(5)
5604 .n(8)
5605 .k(k)
5606 .a_stride(43)
5607 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5608 }
5609 }
5610
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)5611 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
5612 for (size_t k = 8; k <= 40; k += 4) {
5613 for (uint32_t n = 1; n <= 8; n++) {
5614 for (uint32_t m = 1; m <= 5; m++) {
5615 GemmMicrokernelTester()
5616 .mr(5)
5617 .nr(8)
5618 .kr(1)
5619 .sr(4)
5620 .m(m)
5621 .n(n)
5622 .k(k)
5623 .iterations(1)
5624 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5625 }
5626 }
5627 }
5628 }
5629
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)5630 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
5631 for (uint32_t n = 9; n < 16; n++) {
5632 for (size_t k = 1; k <= 20; k += 5) {
5633 GemmMicrokernelTester()
5634 .mr(5)
5635 .nr(8)
5636 .kr(1)
5637 .sr(4)
5638 .m(5)
5639 .n(n)
5640 .k(k)
5641 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5642 }
5643 }
5644 }
5645
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)5646 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
5647 for (uint32_t n = 9; n < 16; n++) {
5648 for (size_t k = 1; k <= 20; k += 5) {
5649 GemmMicrokernelTester()
5650 .mr(5)
5651 .nr(8)
5652 .kr(1)
5653 .sr(4)
5654 .m(5)
5655 .n(n)
5656 .k(k)
5657 .cn_stride(11)
5658 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5659 }
5660 }
5661 }
5662
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)5663 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
5664 for (uint32_t n = 9; n < 16; n++) {
5665 for (size_t k = 1; k <= 20; k += 5) {
5666 GemmMicrokernelTester()
5667 .mr(5)
5668 .nr(8)
5669 .kr(1)
5670 .sr(4)
5671 .m(5)
5672 .n(n)
5673 .k(k)
5674 .a_stride(23)
5675 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5676 }
5677 }
5678 }
5679
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)5680 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
5681 for (uint32_t n = 9; n < 16; n++) {
5682 for (size_t k = 1; k <= 20; k += 5) {
5683 for (uint32_t m = 1; m <= 5; m++) {
5684 GemmMicrokernelTester()
5685 .mr(5)
5686 .nr(8)
5687 .kr(1)
5688 .sr(4)
5689 .m(m)
5690 .n(n)
5691 .k(k)
5692 .iterations(1)
5693 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5694 }
5695 }
5696 }
5697 }
5698
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8)5699 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
5700 for (uint32_t n = 16; n <= 24; n += 8) {
5701 for (size_t k = 1; k <= 20; k += 5) {
5702 GemmMicrokernelTester()
5703 .mr(5)
5704 .nr(8)
5705 .kr(1)
5706 .sr(4)
5707 .m(5)
5708 .n(n)
5709 .k(k)
5710 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5711 }
5712 }
5713 }
5714
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)5715 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
5716 for (uint32_t n = 16; n <= 24; n += 8) {
5717 for (size_t k = 1; k <= 20; k += 5) {
5718 GemmMicrokernelTester()
5719 .mr(5)
5720 .nr(8)
5721 .kr(1)
5722 .sr(4)
5723 .m(5)
5724 .n(n)
5725 .k(k)
5726 .cn_stride(11)
5727 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5728 }
5729 }
5730 }
5731
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)5732 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
5733 for (uint32_t n = 16; n <= 24; n += 8) {
5734 for (size_t k = 1; k <= 20; k += 5) {
5735 GemmMicrokernelTester()
5736 .mr(5)
5737 .nr(8)
5738 .kr(1)
5739 .sr(4)
5740 .m(5)
5741 .n(n)
5742 .k(k)
5743 .a_stride(23)
5744 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5745 }
5746 }
5747 }
5748
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)5749 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
5750 for (uint32_t n = 16; n <= 24; n += 8) {
5751 for (size_t k = 1; k <= 20; k += 5) {
5752 for (uint32_t m = 1; m <= 5; m++) {
5753 GemmMicrokernelTester()
5754 .mr(5)
5755 .nr(8)
5756 .kr(1)
5757 .sr(4)
5758 .m(m)
5759 .n(n)
5760 .k(k)
5761 .iterations(1)
5762 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5763 }
5764 }
5765 }
5766 }
5767
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)5768 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
5769 for (size_t k = 1; k <= 20; k += 5) {
5770 for (uint32_t n = 1; n <= 8; n++) {
5771 for (uint32_t m = 1; m <= 5; m++) {
5772 GemmMicrokernelTester()
5773 .mr(5)
5774 .nr(8)
5775 .kr(1)
5776 .sr(4)
5777 .m(m)
5778 .n(n)
5779 .k(k)
5780 .cm_stride(11)
5781 .iterations(1)
5782 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5783 }
5784 }
5785 }
5786 }
5787
TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm)5788 TEST(F32_GEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
5789 GemmMicrokernelTester()
5790 .mr(5)
5791 .nr(8)
5792 .kr(1)
5793 .sr(4)
5794 .m(5)
5795 .n(8)
5796 .k(4)
5797 .cm_stride(11)
5798 .Test(xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5799 }
5800 #endif // XNN_ARCH_WASMRELAXEDSIMD
5801
5802
5803 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)5804 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
5805 GemmMicrokernelTester()
5806 .mr(6)
5807 .nr(8)
5808 .kr(1)
5809 .sr(1)
5810 .m(6)
5811 .n(8)
5812 .k(1)
5813 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5814 }
5815
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)5816 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
5817 GemmMicrokernelTester()
5818 .mr(6)
5819 .nr(8)
5820 .kr(1)
5821 .sr(1)
5822 .m(6)
5823 .n(8)
5824 .k(1)
5825 .cn_stride(11)
5826 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5827 }
5828
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)5829 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
5830 GemmMicrokernelTester()
5831 .mr(6)
5832 .nr(8)
5833 .kr(1)
5834 .sr(1)
5835 .m(6)
5836 .n(8)
5837 .k(1)
5838 .a_stride(3)
5839 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5840 }
5841
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)5842 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
5843 for (uint32_t n = 1; n <= 8; n++) {
5844 for (uint32_t m = 1; m <= 6; m++) {
5845 GemmMicrokernelTester()
5846 .mr(6)
5847 .nr(8)
5848 .kr(1)
5849 .sr(1)
5850 .m(m)
5851 .n(n)
5852 .k(1)
5853 .iterations(1)
5854 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5855 }
5856 }
5857 }
5858
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)5859 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
5860 for (uint32_t m = 1; m <= 6; m++) {
5861 GemmMicrokernelTester()
5862 .mr(6)
5863 .nr(8)
5864 .kr(1)
5865 .sr(1)
5866 .m(m)
5867 .n(8)
5868 .k(1)
5869 .iterations(1)
5870 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5871 }
5872 }
5873
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)5874 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
5875 for (uint32_t n = 1; n <= 8; n++) {
5876 GemmMicrokernelTester()
5877 .mr(6)
5878 .nr(8)
5879 .kr(1)
5880 .sr(1)
5881 .m(6)
5882 .n(n)
5883 .k(1)
5884 .iterations(1)
5885 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5886 }
5887 }
5888
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)5889 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
5890 for (size_t k = 2; k < 10; k++) {
5891 GemmMicrokernelTester()
5892 .mr(6)
5893 .nr(8)
5894 .kr(1)
5895 .sr(1)
5896 .m(6)
5897 .n(8)
5898 .k(k)
5899 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5900 }
5901 }
5902
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)5903 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
5904 for (size_t k = 2; k < 10; k++) {
5905 GemmMicrokernelTester()
5906 .mr(6)
5907 .nr(8)
5908 .kr(1)
5909 .sr(1)
5910 .m(6)
5911 .n(8)
5912 .k(k)
5913 .a_stride(11)
5914 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5915 }
5916 }
5917
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)5918 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
5919 for (size_t k = 2; k < 10; k++) {
5920 for (uint32_t n = 1; n <= 8; n++) {
5921 for (uint32_t m = 1; m <= 6; m++) {
5922 GemmMicrokernelTester()
5923 .mr(6)
5924 .nr(8)
5925 .kr(1)
5926 .sr(1)
5927 .m(m)
5928 .n(n)
5929 .k(k)
5930 .iterations(1)
5931 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5932 }
5933 }
5934 }
5935 }
5936
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)5937 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
5938 for (uint32_t n = 9; n < 16; n++) {
5939 for (size_t k = 1; k <= 5; k += 2) {
5940 GemmMicrokernelTester()
5941 .mr(6)
5942 .nr(8)
5943 .kr(1)
5944 .sr(1)
5945 .m(6)
5946 .n(n)
5947 .k(k)
5948 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5949 }
5950 }
5951 }
5952
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)5953 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
5954 for (uint32_t n = 9; n < 16; n++) {
5955 for (size_t k = 1; k <= 5; k += 2) {
5956 GemmMicrokernelTester()
5957 .mr(6)
5958 .nr(8)
5959 .kr(1)
5960 .sr(1)
5961 .m(6)
5962 .n(n)
5963 .k(k)
5964 .cn_stride(11)
5965 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5966 }
5967 }
5968 }
5969
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)5970 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
5971 for (uint32_t n = 9; n < 16; n++) {
5972 for (size_t k = 1; k <= 5; k += 2) {
5973 GemmMicrokernelTester()
5974 .mr(6)
5975 .nr(8)
5976 .kr(1)
5977 .sr(1)
5978 .m(6)
5979 .n(n)
5980 .k(k)
5981 .a_stride(7)
5982 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5983 }
5984 }
5985 }
5986
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)5987 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
5988 for (uint32_t n = 9; n < 16; n++) {
5989 for (size_t k = 1; k <= 5; k += 2) {
5990 for (uint32_t m = 1; m <= 6; m++) {
5991 GemmMicrokernelTester()
5992 .mr(6)
5993 .nr(8)
5994 .kr(1)
5995 .sr(1)
5996 .m(m)
5997 .n(n)
5998 .k(k)
5999 .iterations(1)
6000 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6001 }
6002 }
6003 }
6004 }
6005
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)6006 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
6007 for (uint32_t n = 16; n <= 24; n += 8) {
6008 for (size_t k = 1; k <= 5; k += 2) {
6009 GemmMicrokernelTester()
6010 .mr(6)
6011 .nr(8)
6012 .kr(1)
6013 .sr(1)
6014 .m(6)
6015 .n(n)
6016 .k(k)
6017 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6018 }
6019 }
6020 }
6021
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)6022 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
6023 for (uint32_t n = 16; n <= 24; n += 8) {
6024 for (size_t k = 1; k <= 5; k += 2) {
6025 GemmMicrokernelTester()
6026 .mr(6)
6027 .nr(8)
6028 .kr(1)
6029 .sr(1)
6030 .m(6)
6031 .n(n)
6032 .k(k)
6033 .cn_stride(11)
6034 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6035 }
6036 }
6037 }
6038
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)6039 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
6040 for (uint32_t n = 16; n <= 24; n += 8) {
6041 for (size_t k = 1; k <= 5; k += 2) {
6042 GemmMicrokernelTester()
6043 .mr(6)
6044 .nr(8)
6045 .kr(1)
6046 .sr(1)
6047 .m(6)
6048 .n(n)
6049 .k(k)
6050 .a_stride(7)
6051 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6052 }
6053 }
6054 }
6055
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)6056 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
6057 for (uint32_t n = 16; n <= 24; n += 8) {
6058 for (size_t k = 1; k <= 5; k += 2) {
6059 for (uint32_t m = 1; m <= 6; m++) {
6060 GemmMicrokernelTester()
6061 .mr(6)
6062 .nr(8)
6063 .kr(1)
6064 .sr(1)
6065 .m(m)
6066 .n(n)
6067 .k(k)
6068 .iterations(1)
6069 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6070 }
6071 }
6072 }
6073 }
6074
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)6075 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
6076 for (size_t k = 1; k <= 5; k += 2) {
6077 for (uint32_t n = 1; n <= 8; n++) {
6078 for (uint32_t m = 1; m <= 6; m++) {
6079 GemmMicrokernelTester()
6080 .mr(6)
6081 .nr(8)
6082 .kr(1)
6083 .sr(1)
6084 .m(m)
6085 .n(n)
6086 .k(k)
6087 .cm_stride(11)
6088 .iterations(1)
6089 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6090 }
6091 }
6092 }
6093 }
6094
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)6095 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
6096 GemmMicrokernelTester()
6097 .mr(6)
6098 .nr(8)
6099 .kr(1)
6100 .sr(1)
6101 .m(6)
6102 .n(8)
6103 .k(1)
6104 .cm_stride(11)
6105 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6106 }
6107 #endif // XNN_ARCH_WASMRELAXEDSIMD
6108
6109
6110 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)6111 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
6112 GemmMicrokernelTester()
6113 .mr(6)
6114 .nr(8)
6115 .kr(1)
6116 .sr(4)
6117 .m(6)
6118 .n(8)
6119 .k(4)
6120 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6121 }
6122
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,strided_cn)6123 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
6124 GemmMicrokernelTester()
6125 .mr(6)
6126 .nr(8)
6127 .kr(1)
6128 .sr(4)
6129 .m(6)
6130 .n(8)
6131 .k(4)
6132 .cn_stride(11)
6133 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6134 }
6135
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)6136 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
6137 GemmMicrokernelTester()
6138 .mr(6)
6139 .nr(8)
6140 .kr(1)
6141 .sr(4)
6142 .m(6)
6143 .n(8)
6144 .k(4)
6145 .a_stride(7)
6146 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6147 }
6148
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)6149 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
6150 for (uint32_t n = 1; n <= 8; n++) {
6151 for (uint32_t m = 1; m <= 6; m++) {
6152 GemmMicrokernelTester()
6153 .mr(6)
6154 .nr(8)
6155 .kr(1)
6156 .sr(4)
6157 .m(m)
6158 .n(n)
6159 .k(4)
6160 .iterations(1)
6161 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6162 }
6163 }
6164 }
6165
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)6166 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
6167 for (uint32_t m = 1; m <= 6; m++) {
6168 GemmMicrokernelTester()
6169 .mr(6)
6170 .nr(8)
6171 .kr(1)
6172 .sr(4)
6173 .m(m)
6174 .n(8)
6175 .k(4)
6176 .iterations(1)
6177 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6178 }
6179 }
6180
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)6181 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
6182 for (uint32_t n = 1; n <= 8; n++) {
6183 GemmMicrokernelTester()
6184 .mr(6)
6185 .nr(8)
6186 .kr(1)
6187 .sr(4)
6188 .m(6)
6189 .n(n)
6190 .k(4)
6191 .iterations(1)
6192 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6193 }
6194 }
6195
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)6196 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
6197 for (size_t k = 1; k < 4; k++) {
6198 GemmMicrokernelTester()
6199 .mr(6)
6200 .nr(8)
6201 .kr(1)
6202 .sr(4)
6203 .m(6)
6204 .n(8)
6205 .k(k)
6206 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6207 }
6208 }
6209
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)6210 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
6211 for (size_t k = 1; k < 4; k++) {
6212 GemmMicrokernelTester()
6213 .mr(6)
6214 .nr(8)
6215 .kr(1)
6216 .sr(4)
6217 .m(6)
6218 .n(8)
6219 .k(k)
6220 .a_stride(7)
6221 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6222 }
6223 }
6224
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)6225 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
6226 for (size_t k = 1; k < 4; k++) {
6227 for (uint32_t n = 1; n <= 8; n++) {
6228 for (uint32_t m = 1; m <= 6; m++) {
6229 GemmMicrokernelTester()
6230 .mr(6)
6231 .nr(8)
6232 .kr(1)
6233 .sr(4)
6234 .m(m)
6235 .n(n)
6236 .k(k)
6237 .iterations(1)
6238 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6239 }
6240 }
6241 }
6242 }
6243
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)6244 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
6245 for (size_t k = 5; k < 8; k++) {
6246 GemmMicrokernelTester()
6247 .mr(6)
6248 .nr(8)
6249 .kr(1)
6250 .sr(4)
6251 .m(6)
6252 .n(8)
6253 .k(k)
6254 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6255 }
6256 }
6257
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)6258 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
6259 for (size_t k = 5; k < 8; k++) {
6260 GemmMicrokernelTester()
6261 .mr(6)
6262 .nr(8)
6263 .kr(1)
6264 .sr(4)
6265 .m(6)
6266 .n(8)
6267 .k(k)
6268 .a_stride(11)
6269 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6270 }
6271 }
6272
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)6273 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
6274 for (size_t k = 5; k < 8; k++) {
6275 for (uint32_t n = 1; n <= 8; n++) {
6276 for (uint32_t m = 1; m <= 6; m++) {
6277 GemmMicrokernelTester()
6278 .mr(6)
6279 .nr(8)
6280 .kr(1)
6281 .sr(4)
6282 .m(m)
6283 .n(n)
6284 .k(k)
6285 .iterations(1)
6286 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6287 }
6288 }
6289 }
6290 }
6291
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4)6292 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
6293 for (size_t k = 8; k <= 40; k += 4) {
6294 GemmMicrokernelTester()
6295 .mr(6)
6296 .nr(8)
6297 .kr(1)
6298 .sr(4)
6299 .m(6)
6300 .n(8)
6301 .k(k)
6302 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6303 }
6304 }
6305
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)6306 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
6307 for (size_t k = 8; k <= 40; k += 4) {
6308 GemmMicrokernelTester()
6309 .mr(6)
6310 .nr(8)
6311 .kr(1)
6312 .sr(4)
6313 .m(6)
6314 .n(8)
6315 .k(k)
6316 .a_stride(43)
6317 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6318 }
6319 }
6320
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)6321 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
6322 for (size_t k = 8; k <= 40; k += 4) {
6323 for (uint32_t n = 1; n <= 8; n++) {
6324 for (uint32_t m = 1; m <= 6; m++) {
6325 GemmMicrokernelTester()
6326 .mr(6)
6327 .nr(8)
6328 .kr(1)
6329 .sr(4)
6330 .m(m)
6331 .n(n)
6332 .k(k)
6333 .iterations(1)
6334 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6335 }
6336 }
6337 }
6338 }
6339
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)6340 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
6341 for (uint32_t n = 9; n < 16; n++) {
6342 for (size_t k = 1; k <= 20; k += 5) {
6343 GemmMicrokernelTester()
6344 .mr(6)
6345 .nr(8)
6346 .kr(1)
6347 .sr(4)
6348 .m(6)
6349 .n(n)
6350 .k(k)
6351 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6352 }
6353 }
6354 }
6355
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)6356 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
6357 for (uint32_t n = 9; n < 16; n++) {
6358 for (size_t k = 1; k <= 20; k += 5) {
6359 GemmMicrokernelTester()
6360 .mr(6)
6361 .nr(8)
6362 .kr(1)
6363 .sr(4)
6364 .m(6)
6365 .n(n)
6366 .k(k)
6367 .cn_stride(11)
6368 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6369 }
6370 }
6371 }
6372
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)6373 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
6374 for (uint32_t n = 9; n < 16; n++) {
6375 for (size_t k = 1; k <= 20; k += 5) {
6376 GemmMicrokernelTester()
6377 .mr(6)
6378 .nr(8)
6379 .kr(1)
6380 .sr(4)
6381 .m(6)
6382 .n(n)
6383 .k(k)
6384 .a_stride(23)
6385 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6386 }
6387 }
6388 }
6389
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)6390 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
6391 for (uint32_t n = 9; n < 16; n++) {
6392 for (size_t k = 1; k <= 20; k += 5) {
6393 for (uint32_t m = 1; m <= 6; m++) {
6394 GemmMicrokernelTester()
6395 .mr(6)
6396 .nr(8)
6397 .kr(1)
6398 .sr(4)
6399 .m(m)
6400 .n(n)
6401 .k(k)
6402 .iterations(1)
6403 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6404 }
6405 }
6406 }
6407 }
6408
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8)6409 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
6410 for (uint32_t n = 16; n <= 24; n += 8) {
6411 for (size_t k = 1; k <= 20; k += 5) {
6412 GemmMicrokernelTester()
6413 .mr(6)
6414 .nr(8)
6415 .kr(1)
6416 .sr(4)
6417 .m(6)
6418 .n(n)
6419 .k(k)
6420 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6421 }
6422 }
6423 }
6424
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)6425 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
6426 for (uint32_t n = 16; n <= 24; n += 8) {
6427 for (size_t k = 1; k <= 20; k += 5) {
6428 GemmMicrokernelTester()
6429 .mr(6)
6430 .nr(8)
6431 .kr(1)
6432 .sr(4)
6433 .m(6)
6434 .n(n)
6435 .k(k)
6436 .cn_stride(11)
6437 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6438 }
6439 }
6440 }
6441
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)6442 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
6443 for (uint32_t n = 16; n <= 24; n += 8) {
6444 for (size_t k = 1; k <= 20; k += 5) {
6445 GemmMicrokernelTester()
6446 .mr(6)
6447 .nr(8)
6448 .kr(1)
6449 .sr(4)
6450 .m(6)
6451 .n(n)
6452 .k(k)
6453 .a_stride(23)
6454 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6455 }
6456 }
6457 }
6458
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)6459 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
6460 for (uint32_t n = 16; n <= 24; n += 8) {
6461 for (size_t k = 1; k <= 20; k += 5) {
6462 for (uint32_t m = 1; m <= 6; m++) {
6463 GemmMicrokernelTester()
6464 .mr(6)
6465 .nr(8)
6466 .kr(1)
6467 .sr(4)
6468 .m(m)
6469 .n(n)
6470 .k(k)
6471 .iterations(1)
6472 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6473 }
6474 }
6475 }
6476 }
6477
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)6478 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
6479 for (size_t k = 1; k <= 20; k += 5) {
6480 for (uint32_t n = 1; n <= 8; n++) {
6481 for (uint32_t m = 1; m <= 6; m++) {
6482 GemmMicrokernelTester()
6483 .mr(6)
6484 .nr(8)
6485 .kr(1)
6486 .sr(4)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .cm_stride(11)
6491 .iterations(1)
6492 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6493 }
6494 }
6495 }
6496 }
6497
TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm)6498 TEST(F32_GEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
6499 GemmMicrokernelTester()
6500 .mr(6)
6501 .nr(8)
6502 .kr(1)
6503 .sr(4)
6504 .m(6)
6505 .n(8)
6506 .k(4)
6507 .cm_stride(11)
6508 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6509 }
6510 #endif // XNN_ARCH_WASMRELAXEDSIMD
6511
6512
6513 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1)6514 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1) {
6515 GemmMicrokernelTester()
6516 .mr(2)
6517 .nr(4)
6518 .kr(1)
6519 .sr(1)
6520 .m(2)
6521 .n(4)
6522 .k(1)
6523 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6524 }
6525
TEST(F32_GEMM_RELU_2X4__WASM,strided_cn)6526 TEST(F32_GEMM_RELU_2X4__WASM, strided_cn) {
6527 GemmMicrokernelTester()
6528 .mr(2)
6529 .nr(4)
6530 .kr(1)
6531 .sr(1)
6532 .m(2)
6533 .n(4)
6534 .k(1)
6535 .cn_stride(7)
6536 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6537 }
6538
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_strided_a)6539 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_strided_a) {
6540 GemmMicrokernelTester()
6541 .mr(2)
6542 .nr(4)
6543 .kr(1)
6544 .sr(1)
6545 .m(2)
6546 .n(4)
6547 .k(1)
6548 .a_stride(3)
6549 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6550 }
6551
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_subtile)6552 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile) {
6553 for (uint32_t n = 1; n <= 4; n++) {
6554 for (uint32_t m = 1; m <= 2; m++) {
6555 GemmMicrokernelTester()
6556 .mr(2)
6557 .nr(4)
6558 .kr(1)
6559 .sr(1)
6560 .m(m)
6561 .n(n)
6562 .k(1)
6563 .iterations(1)
6564 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6565 }
6566 }
6567 }
6568
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_subtile_m)6569 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile_m) {
6570 for (uint32_t m = 1; m <= 2; m++) {
6571 GemmMicrokernelTester()
6572 .mr(2)
6573 .nr(4)
6574 .kr(1)
6575 .sr(1)
6576 .m(m)
6577 .n(4)
6578 .k(1)
6579 .iterations(1)
6580 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6581 }
6582 }
6583
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_subtile_n)6584 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile_n) {
6585 for (uint32_t n = 1; n <= 4; n++) {
6586 GemmMicrokernelTester()
6587 .mr(2)
6588 .nr(4)
6589 .kr(1)
6590 .sr(1)
6591 .m(2)
6592 .n(n)
6593 .k(1)
6594 .iterations(1)
6595 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6596 }
6597 }
6598
TEST(F32_GEMM_RELU_2X4__WASM,k_gt_1)6599 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1) {
6600 for (size_t k = 2; k < 10; k++) {
6601 GemmMicrokernelTester()
6602 .mr(2)
6603 .nr(4)
6604 .kr(1)
6605 .sr(1)
6606 .m(2)
6607 .n(4)
6608 .k(k)
6609 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6610 }
6611 }
6612
TEST(F32_GEMM_RELU_2X4__WASM,k_gt_1_strided_a)6613 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1_strided_a) {
6614 for (size_t k = 2; k < 10; k++) {
6615 GemmMicrokernelTester()
6616 .mr(2)
6617 .nr(4)
6618 .kr(1)
6619 .sr(1)
6620 .m(2)
6621 .n(4)
6622 .k(k)
6623 .a_stride(11)
6624 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6625 }
6626 }
6627
TEST(F32_GEMM_RELU_2X4__WASM,k_gt_1_subtile)6628 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1_subtile) {
6629 for (size_t k = 2; k < 10; k++) {
6630 for (uint32_t n = 1; n <= 4; n++) {
6631 for (uint32_t m = 1; m <= 2; m++) {
6632 GemmMicrokernelTester()
6633 .mr(2)
6634 .nr(4)
6635 .kr(1)
6636 .sr(1)
6637 .m(m)
6638 .n(n)
6639 .k(k)
6640 .iterations(1)
6641 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6642 }
6643 }
6644 }
6645 }
6646
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4)6647 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4) {
6648 for (uint32_t n = 5; n < 8; n++) {
6649 for (size_t k = 1; k <= 5; k += 2) {
6650 GemmMicrokernelTester()
6651 .mr(2)
6652 .nr(4)
6653 .kr(1)
6654 .sr(1)
6655 .m(2)
6656 .n(n)
6657 .k(k)
6658 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6659 }
6660 }
6661 }
6662
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4_strided_cn)6663 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_strided_cn) {
6664 for (uint32_t n = 5; n < 8; n++) {
6665 for (size_t k = 1; k <= 5; k += 2) {
6666 GemmMicrokernelTester()
6667 .mr(2)
6668 .nr(4)
6669 .kr(1)
6670 .sr(1)
6671 .m(2)
6672 .n(n)
6673 .k(k)
6674 .cn_stride(7)
6675 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6676 }
6677 }
6678 }
6679
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4_strided_a)6680 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_strided_a) {
6681 for (uint32_t n = 5; n < 8; n++) {
6682 for (size_t k = 1; k <= 5; k += 2) {
6683 GemmMicrokernelTester()
6684 .mr(2)
6685 .nr(4)
6686 .kr(1)
6687 .sr(1)
6688 .m(2)
6689 .n(n)
6690 .k(k)
6691 .a_stride(7)
6692 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6693 }
6694 }
6695 }
6696
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4_subtile)6697 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_subtile) {
6698 for (uint32_t n = 5; n < 8; n++) {
6699 for (size_t k = 1; k <= 5; k += 2) {
6700 for (uint32_t m = 1; m <= 2; m++) {
6701 GemmMicrokernelTester()
6702 .mr(2)
6703 .nr(4)
6704 .kr(1)
6705 .sr(1)
6706 .m(m)
6707 .n(n)
6708 .k(k)
6709 .iterations(1)
6710 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6711 }
6712 }
6713 }
6714 }
6715
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4)6716 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4) {
6717 for (uint32_t n = 8; n <= 12; n += 4) {
6718 for (size_t k = 1; k <= 5; k += 2) {
6719 GemmMicrokernelTester()
6720 .mr(2)
6721 .nr(4)
6722 .kr(1)
6723 .sr(1)
6724 .m(2)
6725 .n(n)
6726 .k(k)
6727 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6728 }
6729 }
6730 }
6731
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4_strided_cn)6732 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_strided_cn) {
6733 for (uint32_t n = 8; n <= 12; n += 4) {
6734 for (size_t k = 1; k <= 5; k += 2) {
6735 GemmMicrokernelTester()
6736 .mr(2)
6737 .nr(4)
6738 .kr(1)
6739 .sr(1)
6740 .m(2)
6741 .n(n)
6742 .k(k)
6743 .cn_stride(7)
6744 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6745 }
6746 }
6747 }
6748
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4_strided_a)6749 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_strided_a) {
6750 for (uint32_t n = 8; n <= 12; n += 4) {
6751 for (size_t k = 1; k <= 5; k += 2) {
6752 GemmMicrokernelTester()
6753 .mr(2)
6754 .nr(4)
6755 .kr(1)
6756 .sr(1)
6757 .m(2)
6758 .n(n)
6759 .k(k)
6760 .a_stride(7)
6761 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6762 }
6763 }
6764 }
6765
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4_subtile)6766 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_subtile) {
6767 for (uint32_t n = 8; n <= 12; n += 4) {
6768 for (size_t k = 1; k <= 5; k += 2) {
6769 for (uint32_t m = 1; m <= 2; m++) {
6770 GemmMicrokernelTester()
6771 .mr(2)
6772 .nr(4)
6773 .kr(1)
6774 .sr(1)
6775 .m(m)
6776 .n(n)
6777 .k(k)
6778 .iterations(1)
6779 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6780 }
6781 }
6782 }
6783 }
6784
TEST(F32_GEMM_RELU_2X4__WASM,strided_cm_subtile)6785 TEST(F32_GEMM_RELU_2X4__WASM, strided_cm_subtile) {
6786 for (size_t k = 1; k <= 5; k += 2) {
6787 for (uint32_t n = 1; n <= 4; n++) {
6788 for (uint32_t m = 1; m <= 2; m++) {
6789 GemmMicrokernelTester()
6790 .mr(2)
6791 .nr(4)
6792 .kr(1)
6793 .sr(1)
6794 .m(m)
6795 .n(n)
6796 .k(k)
6797 .cm_stride(7)
6798 .iterations(1)
6799 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6800 }
6801 }
6802 }
6803 }
6804
TEST(F32_GEMM_RELU_2X4__WASM,strided_cm)6805 TEST(F32_GEMM_RELU_2X4__WASM, strided_cm) {
6806 GemmMicrokernelTester()
6807 .mr(2)
6808 .nr(4)
6809 .kr(1)
6810 .sr(1)
6811 .m(2)
6812 .n(4)
6813 .k(1)
6814 .cm_stride(7)
6815 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
6816 }
6817 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6818
6819
6820 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1)6821 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1) {
6822 GemmMicrokernelTester()
6823 .mr(4)
6824 .nr(2)
6825 .kr(1)
6826 .sr(1)
6827 .m(4)
6828 .n(2)
6829 .k(1)
6830 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6831 }
6832
TEST(F32_GEMM_RELU_4X2__WASM,strided_cn)6833 TEST(F32_GEMM_RELU_4X2__WASM, strided_cn) {
6834 GemmMicrokernelTester()
6835 .mr(4)
6836 .nr(2)
6837 .kr(1)
6838 .sr(1)
6839 .m(4)
6840 .n(2)
6841 .k(1)
6842 .cn_stride(5)
6843 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6844 }
6845
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_strided_a)6846 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_strided_a) {
6847 GemmMicrokernelTester()
6848 .mr(4)
6849 .nr(2)
6850 .kr(1)
6851 .sr(1)
6852 .m(4)
6853 .n(2)
6854 .k(1)
6855 .a_stride(3)
6856 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6857 }
6858
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_subtile)6859 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile) {
6860 for (uint32_t n = 1; n <= 2; n++) {
6861 for (uint32_t m = 1; m <= 4; m++) {
6862 GemmMicrokernelTester()
6863 .mr(4)
6864 .nr(2)
6865 .kr(1)
6866 .sr(1)
6867 .m(m)
6868 .n(n)
6869 .k(1)
6870 .iterations(1)
6871 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6872 }
6873 }
6874 }
6875
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_subtile_m)6876 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile_m) {
6877 for (uint32_t m = 1; m <= 4; m++) {
6878 GemmMicrokernelTester()
6879 .mr(4)
6880 .nr(2)
6881 .kr(1)
6882 .sr(1)
6883 .m(m)
6884 .n(2)
6885 .k(1)
6886 .iterations(1)
6887 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6888 }
6889 }
6890
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_subtile_n)6891 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile_n) {
6892 for (uint32_t n = 1; n <= 2; n++) {
6893 GemmMicrokernelTester()
6894 .mr(4)
6895 .nr(2)
6896 .kr(1)
6897 .sr(1)
6898 .m(4)
6899 .n(n)
6900 .k(1)
6901 .iterations(1)
6902 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6903 }
6904 }
6905
TEST(F32_GEMM_RELU_4X2__WASM,k_gt_1)6906 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1) {
6907 for (size_t k = 2; k < 10; k++) {
6908 GemmMicrokernelTester()
6909 .mr(4)
6910 .nr(2)
6911 .kr(1)
6912 .sr(1)
6913 .m(4)
6914 .n(2)
6915 .k(k)
6916 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6917 }
6918 }
6919
TEST(F32_GEMM_RELU_4X2__WASM,k_gt_1_strided_a)6920 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1_strided_a) {
6921 for (size_t k = 2; k < 10; k++) {
6922 GemmMicrokernelTester()
6923 .mr(4)
6924 .nr(2)
6925 .kr(1)
6926 .sr(1)
6927 .m(4)
6928 .n(2)
6929 .k(k)
6930 .a_stride(11)
6931 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6932 }
6933 }
6934
TEST(F32_GEMM_RELU_4X2__WASM,k_gt_1_subtile)6935 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1_subtile) {
6936 for (size_t k = 2; k < 10; k++) {
6937 for (uint32_t n = 1; n <= 2; n++) {
6938 for (uint32_t m = 1; m <= 4; m++) {
6939 GemmMicrokernelTester()
6940 .mr(4)
6941 .nr(2)
6942 .kr(1)
6943 .sr(1)
6944 .m(m)
6945 .n(n)
6946 .k(k)
6947 .iterations(1)
6948 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6949 }
6950 }
6951 }
6952 }
6953
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2)6954 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2) {
6955 for (uint32_t n = 3; n < 4; n++) {
6956 for (size_t k = 1; k <= 5; k += 2) {
6957 GemmMicrokernelTester()
6958 .mr(4)
6959 .nr(2)
6960 .kr(1)
6961 .sr(1)
6962 .m(4)
6963 .n(n)
6964 .k(k)
6965 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6966 }
6967 }
6968 }
6969
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2_strided_cn)6970 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_strided_cn) {
6971 for (uint32_t n = 3; n < 4; n++) {
6972 for (size_t k = 1; k <= 5; k += 2) {
6973 GemmMicrokernelTester()
6974 .mr(4)
6975 .nr(2)
6976 .kr(1)
6977 .sr(1)
6978 .m(4)
6979 .n(n)
6980 .k(k)
6981 .cn_stride(5)
6982 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
6983 }
6984 }
6985 }
6986
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2_strided_a)6987 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_strided_a) {
6988 for (uint32_t n = 3; n < 4; n++) {
6989 for (size_t k = 1; k <= 5; k += 2) {
6990 GemmMicrokernelTester()
6991 .mr(4)
6992 .nr(2)
6993 .kr(1)
6994 .sr(1)
6995 .m(4)
6996 .n(n)
6997 .k(k)
6998 .a_stride(7)
6999 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7000 }
7001 }
7002 }
7003
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2_subtile)7004 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_subtile) {
7005 for (uint32_t n = 3; n < 4; n++) {
7006 for (size_t k = 1; k <= 5; k += 2) {
7007 for (uint32_t m = 1; m <= 4; m++) {
7008 GemmMicrokernelTester()
7009 .mr(4)
7010 .nr(2)
7011 .kr(1)
7012 .sr(1)
7013 .m(m)
7014 .n(n)
7015 .k(k)
7016 .iterations(1)
7017 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7018 }
7019 }
7020 }
7021 }
7022
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2)7023 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2) {
7024 for (uint32_t n = 4; n <= 6; n += 2) {
7025 for (size_t k = 1; k <= 5; k += 2) {
7026 GemmMicrokernelTester()
7027 .mr(4)
7028 .nr(2)
7029 .kr(1)
7030 .sr(1)
7031 .m(4)
7032 .n(n)
7033 .k(k)
7034 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7035 }
7036 }
7037 }
7038
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2_strided_cn)7039 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_strided_cn) {
7040 for (uint32_t n = 4; n <= 6; n += 2) {
7041 for (size_t k = 1; k <= 5; k += 2) {
7042 GemmMicrokernelTester()
7043 .mr(4)
7044 .nr(2)
7045 .kr(1)
7046 .sr(1)
7047 .m(4)
7048 .n(n)
7049 .k(k)
7050 .cn_stride(5)
7051 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7052 }
7053 }
7054 }
7055
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2_strided_a)7056 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_strided_a) {
7057 for (uint32_t n = 4; n <= 6; n += 2) {
7058 for (size_t k = 1; k <= 5; k += 2) {
7059 GemmMicrokernelTester()
7060 .mr(4)
7061 .nr(2)
7062 .kr(1)
7063 .sr(1)
7064 .m(4)
7065 .n(n)
7066 .k(k)
7067 .a_stride(7)
7068 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7069 }
7070 }
7071 }
7072
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2_subtile)7073 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_subtile) {
7074 for (uint32_t n = 4; n <= 6; n += 2) {
7075 for (size_t k = 1; k <= 5; k += 2) {
7076 for (uint32_t m = 1; m <= 4; m++) {
7077 GemmMicrokernelTester()
7078 .mr(4)
7079 .nr(2)
7080 .kr(1)
7081 .sr(1)
7082 .m(m)
7083 .n(n)
7084 .k(k)
7085 .iterations(1)
7086 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7087 }
7088 }
7089 }
7090 }
7091
TEST(F32_GEMM_RELU_4X2__WASM,strided_cm_subtile)7092 TEST(F32_GEMM_RELU_4X2__WASM, strided_cm_subtile) {
7093 for (size_t k = 1; k <= 5; k += 2) {
7094 for (uint32_t n = 1; n <= 2; n++) {
7095 for (uint32_t m = 1; m <= 4; m++) {
7096 GemmMicrokernelTester()
7097 .mr(4)
7098 .nr(2)
7099 .kr(1)
7100 .sr(1)
7101 .m(m)
7102 .n(n)
7103 .k(k)
7104 .cm_stride(5)
7105 .iterations(1)
7106 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7107 }
7108 }
7109 }
7110 }
7111
TEST(F32_GEMM_RELU_4X2__WASM,strided_cm)7112 TEST(F32_GEMM_RELU_4X2__WASM, strided_cm) {
7113 GemmMicrokernelTester()
7114 .mr(4)
7115 .nr(2)
7116 .kr(1)
7117 .sr(1)
7118 .m(4)
7119 .n(2)
7120 .k(1)
7121 .cm_stride(5)
7122 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
7123 }
7124 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
7125
7126
7127 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1)7128 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1) {
7129 GemmMicrokernelTester()
7130 .mr(4)
7131 .nr(4)
7132 .kr(1)
7133 .sr(1)
7134 .m(4)
7135 .n(4)
7136 .k(1)
7137 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7138 }
7139
TEST(F32_GEMM_RELU_4X4__WASM,strided_cn)7140 TEST(F32_GEMM_RELU_4X4__WASM, strided_cn) {
7141 GemmMicrokernelTester()
7142 .mr(4)
7143 .nr(4)
7144 .kr(1)
7145 .sr(1)
7146 .m(4)
7147 .n(4)
7148 .k(1)
7149 .cn_stride(7)
7150 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7151 }
7152
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_strided_a)7153 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_strided_a) {
7154 GemmMicrokernelTester()
7155 .mr(4)
7156 .nr(4)
7157 .kr(1)
7158 .sr(1)
7159 .m(4)
7160 .n(4)
7161 .k(1)
7162 .a_stride(3)
7163 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7164 }
7165
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_subtile)7166 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile) {
7167 for (uint32_t n = 1; n <= 4; n++) {
7168 for (uint32_t m = 1; m <= 4; m++) {
7169 GemmMicrokernelTester()
7170 .mr(4)
7171 .nr(4)
7172 .kr(1)
7173 .sr(1)
7174 .m(m)
7175 .n(n)
7176 .k(1)
7177 .iterations(1)
7178 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7179 }
7180 }
7181 }
7182
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_subtile_m)7183 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile_m) {
7184 for (uint32_t m = 1; m <= 4; m++) {
7185 GemmMicrokernelTester()
7186 .mr(4)
7187 .nr(4)
7188 .kr(1)
7189 .sr(1)
7190 .m(m)
7191 .n(4)
7192 .k(1)
7193 .iterations(1)
7194 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7195 }
7196 }
7197
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_subtile_n)7198 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile_n) {
7199 for (uint32_t n = 1; n <= 4; n++) {
7200 GemmMicrokernelTester()
7201 .mr(4)
7202 .nr(4)
7203 .kr(1)
7204 .sr(1)
7205 .m(4)
7206 .n(n)
7207 .k(1)
7208 .iterations(1)
7209 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7210 }
7211 }
7212
TEST(F32_GEMM_RELU_4X4__WASM,k_gt_1)7213 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1) {
7214 for (size_t k = 2; k < 10; k++) {
7215 GemmMicrokernelTester()
7216 .mr(4)
7217 .nr(4)
7218 .kr(1)
7219 .sr(1)
7220 .m(4)
7221 .n(4)
7222 .k(k)
7223 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7224 }
7225 }
7226
TEST(F32_GEMM_RELU_4X4__WASM,k_gt_1_strided_a)7227 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1_strided_a) {
7228 for (size_t k = 2; k < 10; k++) {
7229 GemmMicrokernelTester()
7230 .mr(4)
7231 .nr(4)
7232 .kr(1)
7233 .sr(1)
7234 .m(4)
7235 .n(4)
7236 .k(k)
7237 .a_stride(11)
7238 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7239 }
7240 }
7241
TEST(F32_GEMM_RELU_4X4__WASM,k_gt_1_subtile)7242 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1_subtile) {
7243 for (size_t k = 2; k < 10; k++) {
7244 for (uint32_t n = 1; n <= 4; n++) {
7245 for (uint32_t m = 1; m <= 4; m++) {
7246 GemmMicrokernelTester()
7247 .mr(4)
7248 .nr(4)
7249 .kr(1)
7250 .sr(1)
7251 .m(m)
7252 .n(n)
7253 .k(k)
7254 .iterations(1)
7255 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7256 }
7257 }
7258 }
7259 }
7260
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4)7261 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4) {
7262 for (uint32_t n = 5; n < 8; n++) {
7263 for (size_t k = 1; k <= 5; k += 2) {
7264 GemmMicrokernelTester()
7265 .mr(4)
7266 .nr(4)
7267 .kr(1)
7268 .sr(1)
7269 .m(4)
7270 .n(n)
7271 .k(k)
7272 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7273 }
7274 }
7275 }
7276
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4_strided_cn)7277 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_strided_cn) {
7278 for (uint32_t n = 5; n < 8; n++) {
7279 for (size_t k = 1; k <= 5; k += 2) {
7280 GemmMicrokernelTester()
7281 .mr(4)
7282 .nr(4)
7283 .kr(1)
7284 .sr(1)
7285 .m(4)
7286 .n(n)
7287 .k(k)
7288 .cn_stride(7)
7289 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7290 }
7291 }
7292 }
7293
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4_strided_a)7294 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_strided_a) {
7295 for (uint32_t n = 5; n < 8; n++) {
7296 for (size_t k = 1; k <= 5; k += 2) {
7297 GemmMicrokernelTester()
7298 .mr(4)
7299 .nr(4)
7300 .kr(1)
7301 .sr(1)
7302 .m(4)
7303 .n(n)
7304 .k(k)
7305 .a_stride(7)
7306 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7307 }
7308 }
7309 }
7310
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4_subtile)7311 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_subtile) {
7312 for (uint32_t n = 5; n < 8; n++) {
7313 for (size_t k = 1; k <= 5; k += 2) {
7314 for (uint32_t m = 1; m <= 4; m++) {
7315 GemmMicrokernelTester()
7316 .mr(4)
7317 .nr(4)
7318 .kr(1)
7319 .sr(1)
7320 .m(m)
7321 .n(n)
7322 .k(k)
7323 .iterations(1)
7324 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7325 }
7326 }
7327 }
7328 }
7329
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4)7330 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4) {
7331 for (uint32_t n = 8; n <= 12; n += 4) {
7332 for (size_t k = 1; k <= 5; k += 2) {
7333 GemmMicrokernelTester()
7334 .mr(4)
7335 .nr(4)
7336 .kr(1)
7337 .sr(1)
7338 .m(4)
7339 .n(n)
7340 .k(k)
7341 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7342 }
7343 }
7344 }
7345
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4_strided_cn)7346 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_strided_cn) {
7347 for (uint32_t n = 8; n <= 12; n += 4) {
7348 for (size_t k = 1; k <= 5; k += 2) {
7349 GemmMicrokernelTester()
7350 .mr(4)
7351 .nr(4)
7352 .kr(1)
7353 .sr(1)
7354 .m(4)
7355 .n(n)
7356 .k(k)
7357 .cn_stride(7)
7358 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7359 }
7360 }
7361 }
7362
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4_strided_a)7363 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_strided_a) {
7364 for (uint32_t n = 8; n <= 12; n += 4) {
7365 for (size_t k = 1; k <= 5; k += 2) {
7366 GemmMicrokernelTester()
7367 .mr(4)
7368 .nr(4)
7369 .kr(1)
7370 .sr(1)
7371 .m(4)
7372 .n(n)
7373 .k(k)
7374 .a_stride(7)
7375 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7376 }
7377 }
7378 }
7379
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4_subtile)7380 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_subtile) {
7381 for (uint32_t n = 8; n <= 12; n += 4) {
7382 for (size_t k = 1; k <= 5; k += 2) {
7383 for (uint32_t m = 1; m <= 4; m++) {
7384 GemmMicrokernelTester()
7385 .mr(4)
7386 .nr(4)
7387 .kr(1)
7388 .sr(1)
7389 .m(m)
7390 .n(n)
7391 .k(k)
7392 .iterations(1)
7393 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7394 }
7395 }
7396 }
7397 }
7398
TEST(F32_GEMM_RELU_4X4__WASM,strided_cm_subtile)7399 TEST(F32_GEMM_RELU_4X4__WASM, strided_cm_subtile) {
7400 for (size_t k = 1; k <= 5; k += 2) {
7401 for (uint32_t n = 1; n <= 4; n++) {
7402 for (uint32_t m = 1; m <= 4; m++) {
7403 GemmMicrokernelTester()
7404 .mr(4)
7405 .nr(4)
7406 .kr(1)
7407 .sr(1)
7408 .m(m)
7409 .n(n)
7410 .k(k)
7411 .cm_stride(7)
7412 .iterations(1)
7413 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7414 }
7415 }
7416 }
7417 }
7418
TEST(F32_GEMM_RELU_4X4__WASM,strided_cm)7419 TEST(F32_GEMM_RELU_4X4__WASM, strided_cm) {
7420 GemmMicrokernelTester()
7421 .mr(4)
7422 .nr(4)
7423 .kr(1)
7424 .sr(1)
7425 .m(4)
7426 .n(4)
7427 .k(1)
7428 .cm_stride(7)
7429 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
7430 }
7431 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
7432
7433
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1)7434 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1) {
7435 GemmMicrokernelTester()
7436 .mr(2)
7437 .nr(4)
7438 .kr(1)
7439 .sr(1)
7440 .m(2)
7441 .n(4)
7442 .k(1)
7443 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7444 }
7445
TEST(F32_GEMM_RELU_2X4__SCALAR,strided_cn)7446 TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cn) {
7447 GemmMicrokernelTester()
7448 .mr(2)
7449 .nr(4)
7450 .kr(1)
7451 .sr(1)
7452 .m(2)
7453 .n(4)
7454 .k(1)
7455 .cn_stride(7)
7456 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7457 }
7458
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_strided_a)7459 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_strided_a) {
7460 GemmMicrokernelTester()
7461 .mr(2)
7462 .nr(4)
7463 .kr(1)
7464 .sr(1)
7465 .m(2)
7466 .n(4)
7467 .k(1)
7468 .a_stride(3)
7469 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7470 }
7471
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_subtile)7472 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile) {
7473 for (uint32_t n = 1; n <= 4; n++) {
7474 for (uint32_t m = 1; m <= 2; m++) {
7475 GemmMicrokernelTester()
7476 .mr(2)
7477 .nr(4)
7478 .kr(1)
7479 .sr(1)
7480 .m(m)
7481 .n(n)
7482 .k(1)
7483 .iterations(1)
7484 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7485 }
7486 }
7487 }
7488
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_subtile_m)7489 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile_m) {
7490 for (uint32_t m = 1; m <= 2; m++) {
7491 GemmMicrokernelTester()
7492 .mr(2)
7493 .nr(4)
7494 .kr(1)
7495 .sr(1)
7496 .m(m)
7497 .n(4)
7498 .k(1)
7499 .iterations(1)
7500 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7501 }
7502 }
7503
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_subtile_n)7504 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile_n) {
7505 for (uint32_t n = 1; n <= 4; n++) {
7506 GemmMicrokernelTester()
7507 .mr(2)
7508 .nr(4)
7509 .kr(1)
7510 .sr(1)
7511 .m(2)
7512 .n(n)
7513 .k(1)
7514 .iterations(1)
7515 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7516 }
7517 }
7518
TEST(F32_GEMM_RELU_2X4__SCALAR,k_gt_1)7519 TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1) {
7520 for (size_t k = 2; k < 10; k++) {
7521 GemmMicrokernelTester()
7522 .mr(2)
7523 .nr(4)
7524 .kr(1)
7525 .sr(1)
7526 .m(2)
7527 .n(4)
7528 .k(k)
7529 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7530 }
7531 }
7532
TEST(F32_GEMM_RELU_2X4__SCALAR,k_gt_1_strided_a)7533 TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1_strided_a) {
7534 for (size_t k = 2; k < 10; k++) {
7535 GemmMicrokernelTester()
7536 .mr(2)
7537 .nr(4)
7538 .kr(1)
7539 .sr(1)
7540 .m(2)
7541 .n(4)
7542 .k(k)
7543 .a_stride(11)
7544 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7545 }
7546 }
7547
TEST(F32_GEMM_RELU_2X4__SCALAR,k_gt_1_subtile)7548 TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1_subtile) {
7549 for (size_t k = 2; k < 10; k++) {
7550 for (uint32_t n = 1; n <= 4; n++) {
7551 for (uint32_t m = 1; m <= 2; m++) {
7552 GemmMicrokernelTester()
7553 .mr(2)
7554 .nr(4)
7555 .kr(1)
7556 .sr(1)
7557 .m(m)
7558 .n(n)
7559 .k(k)
7560 .iterations(1)
7561 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7562 }
7563 }
7564 }
7565 }
7566
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4)7567 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4) {
7568 for (uint32_t n = 5; n < 8; n++) {
7569 for (size_t k = 1; k <= 5; k += 2) {
7570 GemmMicrokernelTester()
7571 .mr(2)
7572 .nr(4)
7573 .kr(1)
7574 .sr(1)
7575 .m(2)
7576 .n(n)
7577 .k(k)
7578 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7579 }
7580 }
7581 }
7582
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4_strided_cn)7583 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_strided_cn) {
7584 for (uint32_t n = 5; n < 8; n++) {
7585 for (size_t k = 1; k <= 5; k += 2) {
7586 GemmMicrokernelTester()
7587 .mr(2)
7588 .nr(4)
7589 .kr(1)
7590 .sr(1)
7591 .m(2)
7592 .n(n)
7593 .k(k)
7594 .cn_stride(7)
7595 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7596 }
7597 }
7598 }
7599
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4_strided_a)7600 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_strided_a) {
7601 for (uint32_t n = 5; n < 8; n++) {
7602 for (size_t k = 1; k <= 5; k += 2) {
7603 GemmMicrokernelTester()
7604 .mr(2)
7605 .nr(4)
7606 .kr(1)
7607 .sr(1)
7608 .m(2)
7609 .n(n)
7610 .k(k)
7611 .a_stride(7)
7612 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7613 }
7614 }
7615 }
7616
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4_subtile)7617 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_subtile) {
7618 for (uint32_t n = 5; n < 8; n++) {
7619 for (size_t k = 1; k <= 5; k += 2) {
7620 for (uint32_t m = 1; m <= 2; m++) {
7621 GemmMicrokernelTester()
7622 .mr(2)
7623 .nr(4)
7624 .kr(1)
7625 .sr(1)
7626 .m(m)
7627 .n(n)
7628 .k(k)
7629 .iterations(1)
7630 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7631 }
7632 }
7633 }
7634 }
7635
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4)7636 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4) {
7637 for (uint32_t n = 8; n <= 12; n += 4) {
7638 for (size_t k = 1; k <= 5; k += 2) {
7639 GemmMicrokernelTester()
7640 .mr(2)
7641 .nr(4)
7642 .kr(1)
7643 .sr(1)
7644 .m(2)
7645 .n(n)
7646 .k(k)
7647 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7648 }
7649 }
7650 }
7651
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4_strided_cn)7652 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_strided_cn) {
7653 for (uint32_t n = 8; n <= 12; n += 4) {
7654 for (size_t k = 1; k <= 5; k += 2) {
7655 GemmMicrokernelTester()
7656 .mr(2)
7657 .nr(4)
7658 .kr(1)
7659 .sr(1)
7660 .m(2)
7661 .n(n)
7662 .k(k)
7663 .cn_stride(7)
7664 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7665 }
7666 }
7667 }
7668
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4_strided_a)7669 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_strided_a) {
7670 for (uint32_t n = 8; n <= 12; n += 4) {
7671 for (size_t k = 1; k <= 5; k += 2) {
7672 GemmMicrokernelTester()
7673 .mr(2)
7674 .nr(4)
7675 .kr(1)
7676 .sr(1)
7677 .m(2)
7678 .n(n)
7679 .k(k)
7680 .a_stride(7)
7681 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7682 }
7683 }
7684 }
7685
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4_subtile)7686 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_subtile) {
7687 for (uint32_t n = 8; n <= 12; n += 4) {
7688 for (size_t k = 1; k <= 5; k += 2) {
7689 for (uint32_t m = 1; m <= 2; m++) {
7690 GemmMicrokernelTester()
7691 .mr(2)
7692 .nr(4)
7693 .kr(1)
7694 .sr(1)
7695 .m(m)
7696 .n(n)
7697 .k(k)
7698 .iterations(1)
7699 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7700 }
7701 }
7702 }
7703 }
7704
TEST(F32_GEMM_RELU_2X4__SCALAR,strided_cm_subtile)7705 TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cm_subtile) {
7706 for (size_t k = 1; k <= 5; k += 2) {
7707 for (uint32_t n = 1; n <= 4; n++) {
7708 for (uint32_t m = 1; m <= 2; m++) {
7709 GemmMicrokernelTester()
7710 .mr(2)
7711 .nr(4)
7712 .kr(1)
7713 .sr(1)
7714 .m(m)
7715 .n(n)
7716 .k(k)
7717 .cm_stride(7)
7718 .iterations(1)
7719 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7720 }
7721 }
7722 }
7723 }
7724
TEST(F32_GEMM_RELU_2X4__SCALAR,strided_cm)7725 TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cm) {
7726 GemmMicrokernelTester()
7727 .mr(2)
7728 .nr(4)
7729 .kr(1)
7730 .sr(1)
7731 .m(2)
7732 .n(4)
7733 .k(1)
7734 .cm_stride(7)
7735 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
7736 }
7737