1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-gemm-relu.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1)28 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1) {
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(1)
37 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
38 }
39
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,strided_cn)40 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(1)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
51 }
52
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)53 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
54 GemmMicrokernelTester()
55 .mr(1)
56 .nr(8)
57 .kr(1)
58 .sr(1)
59 .m(1)
60 .n(8)
61 .k(1)
62 .a_stride(3)
63 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
64 }
65
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)66 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
67 for (uint32_t n = 1; n <= 8; n++) {
68 for (uint32_t m = 1; m <= 1; m++) {
69 GemmMicrokernelTester()
70 .mr(1)
71 .nr(8)
72 .kr(1)
73 .sr(1)
74 .m(m)
75 .n(n)
76 .k(1)
77 .iterations(1)
78 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
79 }
80 }
81 }
82
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)83 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
84 for (uint32_t m = 1; m <= 1; m++) {
85 GemmMicrokernelTester()
86 .mr(1)
87 .nr(8)
88 .kr(1)
89 .sr(1)
90 .m(m)
91 .n(8)
92 .k(1)
93 .iterations(1)
94 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
95 }
96 }
97
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)98 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
99 for (uint32_t n = 1; n <= 8; n++) {
100 GemmMicrokernelTester()
101 .mr(1)
102 .nr(8)
103 .kr(1)
104 .sr(1)
105 .m(1)
106 .n(n)
107 .k(1)
108 .iterations(1)
109 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
110 }
111 }
112
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_gt_1)113 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_gt_1) {
114 for (size_t k = 2; k < 10; k++) {
115 GemmMicrokernelTester()
116 .mr(1)
117 .nr(8)
118 .kr(1)
119 .sr(1)
120 .m(1)
121 .n(8)
122 .k(k)
123 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
124 }
125 }
126
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)127 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
128 for (size_t k = 2; k < 10; k++) {
129 GemmMicrokernelTester()
130 .mr(1)
131 .nr(8)
132 .kr(1)
133 .sr(1)
134 .m(1)
135 .n(8)
136 .k(k)
137 .a_stride(11)
138 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
139 }
140 }
141
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)142 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
143 for (size_t k = 2; k < 10; k++) {
144 for (uint32_t n = 1; n <= 8; n++) {
145 for (uint32_t m = 1; m <= 1; m++) {
146 GemmMicrokernelTester()
147 .mr(1)
148 .nr(8)
149 .kr(1)
150 .sr(1)
151 .m(m)
152 .n(n)
153 .k(k)
154 .iterations(1)
155 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
156 }
157 }
158 }
159 }
160
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8)161 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8) {
162 for (uint32_t n = 9; n < 16; n++) {
163 for (size_t k = 1; k <= 5; k += 2) {
164 GemmMicrokernelTester()
165 .mr(1)
166 .nr(8)
167 .kr(1)
168 .sr(1)
169 .m(1)
170 .n(n)
171 .k(k)
172 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
173 }
174 }
175 }
176
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)177 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
178 for (uint32_t n = 9; n < 16; n++) {
179 for (size_t k = 1; k <= 5; k += 2) {
180 GemmMicrokernelTester()
181 .mr(1)
182 .nr(8)
183 .kr(1)
184 .sr(1)
185 .m(1)
186 .n(n)
187 .k(k)
188 .cn_stride(11)
189 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
190 }
191 }
192 }
193
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)194 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
195 for (uint32_t n = 9; n < 16; n++) {
196 for (size_t k = 1; k <= 5; k += 2) {
197 GemmMicrokernelTester()
198 .mr(1)
199 .nr(8)
200 .kr(1)
201 .sr(1)
202 .m(1)
203 .n(n)
204 .k(k)
205 .a_stride(7)
206 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
207 }
208 }
209 }
210
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)211 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
212 for (uint32_t n = 9; n < 16; n++) {
213 for (size_t k = 1; k <= 5; k += 2) {
214 for (uint32_t m = 1; m <= 1; m++) {
215 GemmMicrokernelTester()
216 .mr(1)
217 .nr(8)
218 .kr(1)
219 .sr(1)
220 .m(m)
221 .n(n)
222 .k(k)
223 .iterations(1)
224 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
225 }
226 }
227 }
228 }
229
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8)230 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8) {
231 for (uint32_t n = 16; n <= 24; n += 8) {
232 for (size_t k = 1; k <= 5; k += 2) {
233 GemmMicrokernelTester()
234 .mr(1)
235 .nr(8)
236 .kr(1)
237 .sr(1)
238 .m(1)
239 .n(n)
240 .k(k)
241 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
242 }
243 }
244 }
245
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)246 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
247 for (uint32_t n = 16; n <= 24; n += 8) {
248 for (size_t k = 1; k <= 5; k += 2) {
249 GemmMicrokernelTester()
250 .mr(1)
251 .nr(8)
252 .kr(1)
253 .sr(1)
254 .m(1)
255 .n(n)
256 .k(k)
257 .cn_stride(11)
258 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
259 }
260 }
261 }
262
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)263 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
264 for (uint32_t n = 16; n <= 24; n += 8) {
265 for (size_t k = 1; k <= 5; k += 2) {
266 GemmMicrokernelTester()
267 .mr(1)
268 .nr(8)
269 .kr(1)
270 .sr(1)
271 .m(1)
272 .n(n)
273 .k(k)
274 .a_stride(7)
275 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
276 }
277 }
278 }
279
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)280 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
281 for (uint32_t n = 16; n <= 24; n += 8) {
282 for (size_t k = 1; k <= 5; k += 2) {
283 for (uint32_t m = 1; m <= 1; m++) {
284 GemmMicrokernelTester()
285 .mr(1)
286 .nr(8)
287 .kr(1)
288 .sr(1)
289 .m(m)
290 .n(n)
291 .k(k)
292 .iterations(1)
293 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
294 }
295 }
296 }
297 }
298
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)299 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
300 for (size_t k = 1; k <= 5; k += 2) {
301 for (uint32_t n = 1; n <= 8; n++) {
302 for (uint32_t m = 1; m <= 1; m++) {
303 GemmMicrokernelTester()
304 .mr(1)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .cm_stride(11)
312 .iterations(1)
313 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
314 }
315 }
316 }
317 }
318
TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT,strided_cm)319 TEST(F32_GEMM_RELU_1X8__WASMSIMD_LOADSPLAT, strided_cm) {
320 GemmMicrokernelTester()
321 .mr(1)
322 .nr(8)
323 .kr(1)
324 .sr(1)
325 .m(1)
326 .n(8)
327 .k(1)
328 .cm_stride(11)
329 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_loadsplat);
330 }
331 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
332
333
334 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4)335 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4) {
336 GemmMicrokernelTester()
337 .mr(1)
338 .nr(8)
339 .kr(1)
340 .sr(1)
341 .m(1)
342 .n(8)
343 .k(4)
344 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
345 }
346
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cn)347 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cn) {
348 GemmMicrokernelTester()
349 .mr(1)
350 .nr(8)
351 .kr(1)
352 .sr(1)
353 .m(1)
354 .n(8)
355 .k(4)
356 .cn_stride(11)
357 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
358 }
359
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_strided_a)360 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
361 GemmMicrokernelTester()
362 .mr(1)
363 .nr(8)
364 .kr(1)
365 .sr(1)
366 .m(1)
367 .n(8)
368 .k(4)
369 .a_stride(7)
370 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
371 }
372
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile)373 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
374 for (uint32_t n = 1; n <= 8; n++) {
375 for (uint32_t m = 1; m <= 1; m++) {
376 GemmMicrokernelTester()
377 .mr(1)
378 .nr(8)
379 .kr(1)
380 .sr(1)
381 .m(m)
382 .n(n)
383 .k(4)
384 .iterations(1)
385 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
386 }
387 }
388 }
389
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)390 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
391 for (uint32_t m = 1; m <= 1; m++) {
392 GemmMicrokernelTester()
393 .mr(1)
394 .nr(8)
395 .kr(1)
396 .sr(1)
397 .m(m)
398 .n(8)
399 .k(4)
400 .iterations(1)
401 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
402 }
403 }
404
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)405 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
406 for (uint32_t n = 1; n <= 8; n++) {
407 GemmMicrokernelTester()
408 .mr(1)
409 .nr(8)
410 .kr(1)
411 .sr(1)
412 .m(1)
413 .n(n)
414 .k(4)
415 .iterations(1)
416 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
417 }
418 }
419
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4)420 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4) {
421 for (size_t k = 1; k < 4; k++) {
422 GemmMicrokernelTester()
423 .mr(1)
424 .nr(8)
425 .kr(1)
426 .sr(1)
427 .m(1)
428 .n(8)
429 .k(k)
430 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
431 }
432 }
433
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4_strided_a)434 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
435 for (size_t k = 1; k < 4; k++) {
436 GemmMicrokernelTester()
437 .mr(1)
438 .nr(8)
439 .kr(1)
440 .sr(1)
441 .m(1)
442 .n(8)
443 .k(k)
444 .a_stride(7)
445 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
446 }
447 }
448
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4_subtile)449 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
450 for (size_t k = 1; k < 4; k++) {
451 for (uint32_t n = 1; n <= 8; n++) {
452 for (uint32_t m = 1; m <= 1; m++) {
453 GemmMicrokernelTester()
454 .mr(1)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(m)
459 .n(n)
460 .k(k)
461 .iterations(1)
462 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
463 }
464 }
465 }
466 }
467
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4)468 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4) {
469 for (size_t k = 5; k < 8; k++) {
470 GemmMicrokernelTester()
471 .mr(1)
472 .nr(8)
473 .kr(1)
474 .sr(1)
475 .m(1)
476 .n(8)
477 .k(k)
478 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
479 }
480 }
481
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4_strided_a)482 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
483 for (size_t k = 5; k < 8; k++) {
484 GemmMicrokernelTester()
485 .mr(1)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(1)
490 .n(8)
491 .k(k)
492 .a_stride(11)
493 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
494 }
495 }
496
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4_subtile)497 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
498 for (size_t k = 5; k < 8; k++) {
499 for (uint32_t n = 1; n <= 8; n++) {
500 for (uint32_t m = 1; m <= 1; m++) {
501 GemmMicrokernelTester()
502 .mr(1)
503 .nr(8)
504 .kr(1)
505 .sr(1)
506 .m(m)
507 .n(n)
508 .k(k)
509 .iterations(1)
510 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
511 }
512 }
513 }
514 }
515
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4)516 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4) {
517 for (size_t k = 8; k <= 40; k += 4) {
518 GemmMicrokernelTester()
519 .mr(1)
520 .nr(8)
521 .kr(1)
522 .sr(1)
523 .m(1)
524 .n(8)
525 .k(k)
526 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
527 }
528 }
529
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4_strided_a)530 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
531 for (size_t k = 8; k <= 40; k += 4) {
532 GemmMicrokernelTester()
533 .mr(1)
534 .nr(8)
535 .kr(1)
536 .sr(1)
537 .m(1)
538 .n(8)
539 .k(k)
540 .a_stride(43)
541 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
542 }
543 }
544
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4_subtile)545 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
546 for (size_t k = 8; k <= 40; k += 4) {
547 for (uint32_t n = 1; n <= 8; n++) {
548 for (uint32_t m = 1; m <= 1; m++) {
549 GemmMicrokernelTester()
550 .mr(1)
551 .nr(8)
552 .kr(1)
553 .sr(1)
554 .m(m)
555 .n(n)
556 .k(k)
557 .iterations(1)
558 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
559 }
560 }
561 }
562 }
563
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8)564 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8) {
565 for (uint32_t n = 9; n < 16; n++) {
566 for (size_t k = 1; k <= 20; k += 5) {
567 GemmMicrokernelTester()
568 .mr(1)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(1)
573 .n(n)
574 .k(k)
575 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
576 }
577 }
578 }
579
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)580 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
581 for (uint32_t n = 9; n < 16; n++) {
582 for (size_t k = 1; k <= 20; k += 5) {
583 GemmMicrokernelTester()
584 .mr(1)
585 .nr(8)
586 .kr(1)
587 .sr(1)
588 .m(1)
589 .n(n)
590 .k(k)
591 .cn_stride(11)
592 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
593 }
594 }
595 }
596
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_strided_a)597 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
598 for (uint32_t n = 9; n < 16; n++) {
599 for (size_t k = 1; k <= 20; k += 5) {
600 GemmMicrokernelTester()
601 .mr(1)
602 .nr(8)
603 .kr(1)
604 .sr(1)
605 .m(1)
606 .n(n)
607 .k(k)
608 .a_stride(23)
609 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
610 }
611 }
612 }
613
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_subtile)614 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
615 for (uint32_t n = 9; n < 16; n++) {
616 for (size_t k = 1; k <= 20; k += 5) {
617 for (uint32_t m = 1; m <= 1; m++) {
618 GemmMicrokernelTester()
619 .mr(1)
620 .nr(8)
621 .kr(1)
622 .sr(1)
623 .m(m)
624 .n(n)
625 .k(k)
626 .iterations(1)
627 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
628 }
629 }
630 }
631 }
632
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8)633 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8) {
634 for (uint32_t n = 16; n <= 24; n += 8) {
635 for (size_t k = 1; k <= 20; k += 5) {
636 GemmMicrokernelTester()
637 .mr(1)
638 .nr(8)
639 .kr(1)
640 .sr(1)
641 .m(1)
642 .n(n)
643 .k(k)
644 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
645 }
646 }
647 }
648
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_strided_cn)649 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
650 for (uint32_t n = 16; n <= 24; n += 8) {
651 for (size_t k = 1; k <= 20; k += 5) {
652 GemmMicrokernelTester()
653 .mr(1)
654 .nr(8)
655 .kr(1)
656 .sr(1)
657 .m(1)
658 .n(n)
659 .k(k)
660 .cn_stride(11)
661 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
662 }
663 }
664 }
665
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_strided_a)666 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
667 for (uint32_t n = 16; n <= 24; n += 8) {
668 for (size_t k = 1; k <= 20; k += 5) {
669 GemmMicrokernelTester()
670 .mr(1)
671 .nr(8)
672 .kr(1)
673 .sr(1)
674 .m(1)
675 .n(n)
676 .k(k)
677 .a_stride(23)
678 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
679 }
680 }
681 }
682
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_subtile)683 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
684 for (uint32_t n = 16; n <= 24; n += 8) {
685 for (size_t k = 1; k <= 20; k += 5) {
686 for (uint32_t m = 1; m <= 1; m++) {
687 GemmMicrokernelTester()
688 .mr(1)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(m)
693 .n(n)
694 .k(k)
695 .iterations(1)
696 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
697 }
698 }
699 }
700 }
701
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cm_subtile)702 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
703 for (size_t k = 1; k <= 20; k += 5) {
704 for (uint32_t n = 1; n <= 8; n++) {
705 for (uint32_t m = 1; m <= 1; m++) {
706 GemmMicrokernelTester()
707 .mr(1)
708 .nr(8)
709 .kr(1)
710 .sr(1)
711 .m(m)
712 .n(n)
713 .k(k)
714 .cm_stride(11)
715 .iterations(1)
716 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
717 }
718 }
719 }
720 }
721
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cm)722 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm) {
723 GemmMicrokernelTester()
724 .mr(1)
725 .nr(8)
726 .kr(1)
727 .sr(1)
728 .m(1)
729 .n(8)
730 .k(4)
731 .cm_stride(11)
732 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
733 }
734 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
735
736
737 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_eq_4)738 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_eq_4) {
739 GemmMicrokernelTester()
740 .mr(1)
741 .nr(8)
742 .kr(1)
743 .sr(4)
744 .m(1)
745 .n(8)
746 .k(4)
747 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
748 }
749
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,strided_cn)750 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, strided_cn) {
751 GemmMicrokernelTester()
752 .mr(1)
753 .nr(8)
754 .kr(1)
755 .sr(4)
756 .m(1)
757 .n(8)
758 .k(4)
759 .cn_stride(11)
760 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
761 }
762
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_eq_4_strided_a)763 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_eq_4_strided_a) {
764 GemmMicrokernelTester()
765 .mr(1)
766 .nr(8)
767 .kr(1)
768 .sr(4)
769 .m(1)
770 .n(8)
771 .k(4)
772 .a_stride(7)
773 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
774 }
775
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_eq_4_subtile)776 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_eq_4_subtile) {
777 for (uint32_t n = 1; n <= 8; n++) {
778 for (uint32_t m = 1; m <= 1; m++) {
779 GemmMicrokernelTester()
780 .mr(1)
781 .nr(8)
782 .kr(1)
783 .sr(4)
784 .m(m)
785 .n(n)
786 .k(4)
787 .iterations(1)
788 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
789 }
790 }
791 }
792
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_eq_4_subtile_m)793 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_eq_4_subtile_m) {
794 for (uint32_t m = 1; m <= 1; m++) {
795 GemmMicrokernelTester()
796 .mr(1)
797 .nr(8)
798 .kr(1)
799 .sr(4)
800 .m(m)
801 .n(8)
802 .k(4)
803 .iterations(1)
804 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
805 }
806 }
807
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_eq_4_subtile_n)808 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_eq_4_subtile_n) {
809 for (uint32_t n = 1; n <= 8; n++) {
810 GemmMicrokernelTester()
811 .mr(1)
812 .nr(8)
813 .kr(1)
814 .sr(4)
815 .m(1)
816 .n(n)
817 .k(4)
818 .iterations(1)
819 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
820 }
821 }
822
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_lt_4)823 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_lt_4) {
824 for (size_t k = 1; k < 4; k++) {
825 GemmMicrokernelTester()
826 .mr(1)
827 .nr(8)
828 .kr(1)
829 .sr(4)
830 .m(1)
831 .n(8)
832 .k(k)
833 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
834 }
835 }
836
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_lt_4_strided_a)837 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_lt_4_strided_a) {
838 for (size_t k = 1; k < 4; k++) {
839 GemmMicrokernelTester()
840 .mr(1)
841 .nr(8)
842 .kr(1)
843 .sr(4)
844 .m(1)
845 .n(8)
846 .k(k)
847 .a_stride(7)
848 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
849 }
850 }
851
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_lt_4_subtile)852 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_lt_4_subtile) {
853 for (size_t k = 1; k < 4; k++) {
854 for (uint32_t n = 1; n <= 8; n++) {
855 for (uint32_t m = 1; m <= 1; m++) {
856 GemmMicrokernelTester()
857 .mr(1)
858 .nr(8)
859 .kr(1)
860 .sr(4)
861 .m(m)
862 .n(n)
863 .k(k)
864 .iterations(1)
865 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
866 }
867 }
868 }
869 }
870
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_gt_4)871 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_gt_4) {
872 for (size_t k = 5; k < 8; k++) {
873 GemmMicrokernelTester()
874 .mr(1)
875 .nr(8)
876 .kr(1)
877 .sr(4)
878 .m(1)
879 .n(8)
880 .k(k)
881 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
882 }
883 }
884
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_gt_4_strided_a)885 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_gt_4_strided_a) {
886 for (size_t k = 5; k < 8; k++) {
887 GemmMicrokernelTester()
888 .mr(1)
889 .nr(8)
890 .kr(1)
891 .sr(4)
892 .m(1)
893 .n(8)
894 .k(k)
895 .a_stride(11)
896 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
897 }
898 }
899
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_gt_4_subtile)900 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_gt_4_subtile) {
901 for (size_t k = 5; k < 8; k++) {
902 for (uint32_t n = 1; n <= 8; n++) {
903 for (uint32_t m = 1; m <= 1; m++) {
904 GemmMicrokernelTester()
905 .mr(1)
906 .nr(8)
907 .kr(1)
908 .sr(4)
909 .m(m)
910 .n(n)
911 .k(k)
912 .iterations(1)
913 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
914 }
915 }
916 }
917 }
918
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_div_4)919 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_div_4) {
920 for (size_t k = 8; k <= 40; k += 4) {
921 GemmMicrokernelTester()
922 .mr(1)
923 .nr(8)
924 .kr(1)
925 .sr(4)
926 .m(1)
927 .n(8)
928 .k(k)
929 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
930 }
931 }
932
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_div_4_strided_a)933 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_div_4_strided_a) {
934 for (size_t k = 8; k <= 40; k += 4) {
935 GemmMicrokernelTester()
936 .mr(1)
937 .nr(8)
938 .kr(1)
939 .sr(4)
940 .m(1)
941 .n(8)
942 .k(k)
943 .a_stride(43)
944 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
945 }
946 }
947
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,k_div_4_subtile)948 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, k_div_4_subtile) {
949 for (size_t k = 8; k <= 40; k += 4) {
950 for (uint32_t n = 1; n <= 8; n++) {
951 for (uint32_t m = 1; m <= 1; m++) {
952 GemmMicrokernelTester()
953 .mr(1)
954 .nr(8)
955 .kr(1)
956 .sr(4)
957 .m(m)
958 .n(n)
959 .k(k)
960 .iterations(1)
961 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
962 }
963 }
964 }
965 }
966
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_gt_8)967 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_gt_8) {
968 for (uint32_t n = 9; n < 16; n++) {
969 for (size_t k = 1; k <= 20; k += 5) {
970 GemmMicrokernelTester()
971 .mr(1)
972 .nr(8)
973 .kr(1)
974 .sr(4)
975 .m(1)
976 .n(n)
977 .k(k)
978 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
979 }
980 }
981 }
982
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_gt_8_strided_cn)983 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_gt_8_strided_cn) {
984 for (uint32_t n = 9; n < 16; n++) {
985 for (size_t k = 1; k <= 20; k += 5) {
986 GemmMicrokernelTester()
987 .mr(1)
988 .nr(8)
989 .kr(1)
990 .sr(4)
991 .m(1)
992 .n(n)
993 .k(k)
994 .cn_stride(11)
995 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
996 }
997 }
998 }
999
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_gt_8_strided_a)1000 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_gt_8_strided_a) {
1001 for (uint32_t n = 9; n < 16; n++) {
1002 for (size_t k = 1; k <= 20; k += 5) {
1003 GemmMicrokernelTester()
1004 .mr(1)
1005 .nr(8)
1006 .kr(1)
1007 .sr(4)
1008 .m(1)
1009 .n(n)
1010 .k(k)
1011 .a_stride(23)
1012 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1013 }
1014 }
1015 }
1016
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_gt_8_subtile)1017 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_gt_8_subtile) {
1018 for (uint32_t n = 9; n < 16; n++) {
1019 for (size_t k = 1; k <= 20; k += 5) {
1020 for (uint32_t m = 1; m <= 1; m++) {
1021 GemmMicrokernelTester()
1022 .mr(1)
1023 .nr(8)
1024 .kr(1)
1025 .sr(4)
1026 .m(m)
1027 .n(n)
1028 .k(k)
1029 .iterations(1)
1030 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1031 }
1032 }
1033 }
1034 }
1035
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_div_8)1036 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_div_8) {
1037 for (uint32_t n = 16; n <= 24; n += 8) {
1038 for (size_t k = 1; k <= 20; k += 5) {
1039 GemmMicrokernelTester()
1040 .mr(1)
1041 .nr(8)
1042 .kr(1)
1043 .sr(4)
1044 .m(1)
1045 .n(n)
1046 .k(k)
1047 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1048 }
1049 }
1050 }
1051
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_div_8_strided_cn)1052 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_div_8_strided_cn) {
1053 for (uint32_t n = 16; n <= 24; n += 8) {
1054 for (size_t k = 1; k <= 20; k += 5) {
1055 GemmMicrokernelTester()
1056 .mr(1)
1057 .nr(8)
1058 .kr(1)
1059 .sr(4)
1060 .m(1)
1061 .n(n)
1062 .k(k)
1063 .cn_stride(11)
1064 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1065 }
1066 }
1067 }
1068
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_div_8_strided_a)1069 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_div_8_strided_a) {
1070 for (uint32_t n = 16; n <= 24; n += 8) {
1071 for (size_t k = 1; k <= 20; k += 5) {
1072 GemmMicrokernelTester()
1073 .mr(1)
1074 .nr(8)
1075 .kr(1)
1076 .sr(4)
1077 .m(1)
1078 .n(n)
1079 .k(k)
1080 .a_stride(23)
1081 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1082 }
1083 }
1084 }
1085
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,n_div_8_subtile)1086 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, n_div_8_subtile) {
1087 for (uint32_t n = 16; n <= 24; n += 8) {
1088 for (size_t k = 1; k <= 20; k += 5) {
1089 for (uint32_t m = 1; m <= 1; m++) {
1090 GemmMicrokernelTester()
1091 .mr(1)
1092 .nr(8)
1093 .kr(1)
1094 .sr(4)
1095 .m(m)
1096 .n(n)
1097 .k(k)
1098 .iterations(1)
1099 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1100 }
1101 }
1102 }
1103 }
1104
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,strided_cm_subtile)1105 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, strided_cm_subtile) {
1106 for (size_t k = 1; k <= 20; k += 5) {
1107 for (uint32_t n = 1; n <= 8; n++) {
1108 for (uint32_t m = 1; m <= 1; m++) {
1109 GemmMicrokernelTester()
1110 .mr(1)
1111 .nr(8)
1112 .kr(1)
1113 .sr(4)
1114 .m(m)
1115 .n(n)
1116 .k(k)
1117 .cm_stride(11)
1118 .iterations(1)
1119 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1120 }
1121 }
1122 }
1123 }
1124
TEST(F32_GEMM_RELU_1X8S4__WASMSIMD,strided_cm)1125 TEST(F32_GEMM_RELU_1X8S4__WASMSIMD, strided_cm) {
1126 GemmMicrokernelTester()
1127 .mr(1)
1128 .nr(8)
1129 .kr(1)
1130 .sr(4)
1131 .m(1)
1132 .n(8)
1133 .k(4)
1134 .cm_stride(11)
1135 .Test(xnn_f32_gemm_relu_ukernel_1x8s4__wasmsimd);
1136 }
1137 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1138
1139
1140 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4)1141 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4) {
1142 GemmMicrokernelTester()
1143 .mr(4)
1144 .nr(8)
1145 .kr(1)
1146 .sr(1)
1147 .m(4)
1148 .n(8)
1149 .k(4)
1150 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1151 }
1152
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cn)1153 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cn) {
1154 GemmMicrokernelTester()
1155 .mr(4)
1156 .nr(8)
1157 .kr(1)
1158 .sr(1)
1159 .m(4)
1160 .n(8)
1161 .k(4)
1162 .cn_stride(11)
1163 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1164 }
1165
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_strided_a)1166 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
1167 GemmMicrokernelTester()
1168 .mr(4)
1169 .nr(8)
1170 .kr(1)
1171 .sr(1)
1172 .m(4)
1173 .n(8)
1174 .k(4)
1175 .a_stride(7)
1176 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1177 }
1178
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile)1179 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
1180 for (uint32_t n = 1; n <= 8; n++) {
1181 for (uint32_t m = 1; m <= 4; m++) {
1182 GemmMicrokernelTester()
1183 .mr(4)
1184 .nr(8)
1185 .kr(1)
1186 .sr(1)
1187 .m(m)
1188 .n(n)
1189 .k(4)
1190 .iterations(1)
1191 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1192 }
1193 }
1194 }
1195
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)1196 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
1197 for (uint32_t m = 1; m <= 4; m++) {
1198 GemmMicrokernelTester()
1199 .mr(4)
1200 .nr(8)
1201 .kr(1)
1202 .sr(1)
1203 .m(m)
1204 .n(8)
1205 .k(4)
1206 .iterations(1)
1207 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1208 }
1209 }
1210
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)1211 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
1212 for (uint32_t n = 1; n <= 8; n++) {
1213 GemmMicrokernelTester()
1214 .mr(4)
1215 .nr(8)
1216 .kr(1)
1217 .sr(1)
1218 .m(4)
1219 .n(n)
1220 .k(4)
1221 .iterations(1)
1222 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1223 }
1224 }
1225
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4)1226 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4) {
1227 for (size_t k = 1; k < 4; k++) {
1228 GemmMicrokernelTester()
1229 .mr(4)
1230 .nr(8)
1231 .kr(1)
1232 .sr(1)
1233 .m(4)
1234 .n(8)
1235 .k(k)
1236 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1237 }
1238 }
1239
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4_strided_a)1240 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
1241 for (size_t k = 1; k < 4; k++) {
1242 GemmMicrokernelTester()
1243 .mr(4)
1244 .nr(8)
1245 .kr(1)
1246 .sr(1)
1247 .m(4)
1248 .n(8)
1249 .k(k)
1250 .a_stride(7)
1251 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1252 }
1253 }
1254
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4_subtile)1255 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
1256 for (size_t k = 1; k < 4; k++) {
1257 for (uint32_t n = 1; n <= 8; n++) {
1258 for (uint32_t m = 1; m <= 4; m++) {
1259 GemmMicrokernelTester()
1260 .mr(4)
1261 .nr(8)
1262 .kr(1)
1263 .sr(1)
1264 .m(m)
1265 .n(n)
1266 .k(k)
1267 .iterations(1)
1268 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1269 }
1270 }
1271 }
1272 }
1273
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4)1274 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4) {
1275 for (size_t k = 5; k < 8; k++) {
1276 GemmMicrokernelTester()
1277 .mr(4)
1278 .nr(8)
1279 .kr(1)
1280 .sr(1)
1281 .m(4)
1282 .n(8)
1283 .k(k)
1284 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1285 }
1286 }
1287
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4_strided_a)1288 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
1289 for (size_t k = 5; k < 8; k++) {
1290 GemmMicrokernelTester()
1291 .mr(4)
1292 .nr(8)
1293 .kr(1)
1294 .sr(1)
1295 .m(4)
1296 .n(8)
1297 .k(k)
1298 .a_stride(11)
1299 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1300 }
1301 }
1302
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4_subtile)1303 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
1304 for (size_t k = 5; k < 8; k++) {
1305 for (uint32_t n = 1; n <= 8; n++) {
1306 for (uint32_t m = 1; m <= 4; m++) {
1307 GemmMicrokernelTester()
1308 .mr(4)
1309 .nr(8)
1310 .kr(1)
1311 .sr(1)
1312 .m(m)
1313 .n(n)
1314 .k(k)
1315 .iterations(1)
1316 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1317 }
1318 }
1319 }
1320 }
1321
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4)1322 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4) {
1323 for (size_t k = 8; k <= 40; k += 4) {
1324 GemmMicrokernelTester()
1325 .mr(4)
1326 .nr(8)
1327 .kr(1)
1328 .sr(1)
1329 .m(4)
1330 .n(8)
1331 .k(k)
1332 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1333 }
1334 }
1335
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4_strided_a)1336 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
1337 for (size_t k = 8; k <= 40; k += 4) {
1338 GemmMicrokernelTester()
1339 .mr(4)
1340 .nr(8)
1341 .kr(1)
1342 .sr(1)
1343 .m(4)
1344 .n(8)
1345 .k(k)
1346 .a_stride(43)
1347 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1348 }
1349 }
1350
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4_subtile)1351 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1352 for (size_t k = 8; k <= 40; k += 4) {
1353 for (uint32_t n = 1; n <= 8; n++) {
1354 for (uint32_t m = 1; m <= 4; m++) {
1355 GemmMicrokernelTester()
1356 .mr(4)
1357 .nr(8)
1358 .kr(1)
1359 .sr(1)
1360 .m(m)
1361 .n(n)
1362 .k(k)
1363 .iterations(1)
1364 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1365 }
1366 }
1367 }
1368 }
1369
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8)1370 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8) {
1371 for (uint32_t n = 9; n < 16; n++) {
1372 for (size_t k = 1; k <= 20; k += 5) {
1373 GemmMicrokernelTester()
1374 .mr(4)
1375 .nr(8)
1376 .kr(1)
1377 .sr(1)
1378 .m(4)
1379 .n(n)
1380 .k(k)
1381 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1382 }
1383 }
1384 }
1385
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1386 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1387 for (uint32_t n = 9; n < 16; n++) {
1388 for (size_t k = 1; k <= 20; k += 5) {
1389 GemmMicrokernelTester()
1390 .mr(4)
1391 .nr(8)
1392 .kr(1)
1393 .sr(1)
1394 .m(4)
1395 .n(n)
1396 .k(k)
1397 .cn_stride(11)
1398 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1399 }
1400 }
1401 }
1402
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_strided_a)1403 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
1404 for (uint32_t n = 9; n < 16; n++) {
1405 for (size_t k = 1; k <= 20; k += 5) {
1406 GemmMicrokernelTester()
1407 .mr(4)
1408 .nr(8)
1409 .kr(1)
1410 .sr(1)
1411 .m(4)
1412 .n(n)
1413 .k(k)
1414 .a_stride(23)
1415 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1416 }
1417 }
1418 }
1419
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_subtile)1420 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1421 for (uint32_t n = 9; n < 16; n++) {
1422 for (size_t k = 1; k <= 20; k += 5) {
1423 for (uint32_t m = 1; m <= 4; m++) {
1424 GemmMicrokernelTester()
1425 .mr(4)
1426 .nr(8)
1427 .kr(1)
1428 .sr(1)
1429 .m(m)
1430 .n(n)
1431 .k(k)
1432 .iterations(1)
1433 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1434 }
1435 }
1436 }
1437 }
1438
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8)1439 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8) {
1440 for (uint32_t n = 16; n <= 24; n += 8) {
1441 for (size_t k = 1; k <= 20; k += 5) {
1442 GemmMicrokernelTester()
1443 .mr(4)
1444 .nr(8)
1445 .kr(1)
1446 .sr(1)
1447 .m(4)
1448 .n(n)
1449 .k(k)
1450 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1451 }
1452 }
1453 }
1454
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1455 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1456 for (uint32_t n = 16; n <= 24; n += 8) {
1457 for (size_t k = 1; k <= 20; k += 5) {
1458 GemmMicrokernelTester()
1459 .mr(4)
1460 .nr(8)
1461 .kr(1)
1462 .sr(1)
1463 .m(4)
1464 .n(n)
1465 .k(k)
1466 .cn_stride(11)
1467 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1468 }
1469 }
1470 }
1471
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_strided_a)1472 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
1473 for (uint32_t n = 16; n <= 24; n += 8) {
1474 for (size_t k = 1; k <= 20; k += 5) {
1475 GemmMicrokernelTester()
1476 .mr(4)
1477 .nr(8)
1478 .kr(1)
1479 .sr(1)
1480 .m(4)
1481 .n(n)
1482 .k(k)
1483 .a_stride(23)
1484 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1485 }
1486 }
1487 }
1488
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_subtile)1489 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1490 for (uint32_t n = 16; n <= 24; n += 8) {
1491 for (size_t k = 1; k <= 20; k += 5) {
1492 for (uint32_t m = 1; m <= 4; m++) {
1493 GemmMicrokernelTester()
1494 .mr(4)
1495 .nr(8)
1496 .kr(1)
1497 .sr(1)
1498 .m(m)
1499 .n(n)
1500 .k(k)
1501 .iterations(1)
1502 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1503 }
1504 }
1505 }
1506 }
1507
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cm_subtile)1508 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1509 for (size_t k = 1; k <= 20; k += 5) {
1510 for (uint32_t n = 1; n <= 8; n++) {
1511 for (uint32_t m = 1; m <= 4; m++) {
1512 GemmMicrokernelTester()
1513 .mr(4)
1514 .nr(8)
1515 .kr(1)
1516 .sr(1)
1517 .m(m)
1518 .n(n)
1519 .k(k)
1520 .cm_stride(11)
1521 .iterations(1)
1522 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1523 }
1524 }
1525 }
1526 }
1527
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cm)1528 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm) {
1529 GemmMicrokernelTester()
1530 .mr(4)
1531 .nr(8)
1532 .kr(1)
1533 .sr(1)
1534 .m(4)
1535 .n(8)
1536 .k(4)
1537 .cm_stride(11)
1538 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
1539 }
1540 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1541
1542
1543 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4)1544 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4) {
1545 GemmMicrokernelTester()
1546 .mr(5)
1547 .nr(8)
1548 .kr(1)
1549 .sr(1)
1550 .m(5)
1551 .n(8)
1552 .k(4)
1553 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1554 }
1555
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cn)1556 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cn) {
1557 GemmMicrokernelTester()
1558 .mr(5)
1559 .nr(8)
1560 .kr(1)
1561 .sr(1)
1562 .m(5)
1563 .n(8)
1564 .k(4)
1565 .cn_stride(11)
1566 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1567 }
1568
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_strided_a)1569 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
1570 GemmMicrokernelTester()
1571 .mr(5)
1572 .nr(8)
1573 .kr(1)
1574 .sr(1)
1575 .m(5)
1576 .n(8)
1577 .k(4)
1578 .a_stride(7)
1579 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1580 }
1581
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile)1582 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
1583 for (uint32_t n = 1; n <= 8; n++) {
1584 for (uint32_t m = 1; m <= 5; m++) {
1585 GemmMicrokernelTester()
1586 .mr(5)
1587 .nr(8)
1588 .kr(1)
1589 .sr(1)
1590 .m(m)
1591 .n(n)
1592 .k(4)
1593 .iterations(1)
1594 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1595 }
1596 }
1597 }
1598
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)1599 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
1600 for (uint32_t m = 1; m <= 5; m++) {
1601 GemmMicrokernelTester()
1602 .mr(5)
1603 .nr(8)
1604 .kr(1)
1605 .sr(1)
1606 .m(m)
1607 .n(8)
1608 .k(4)
1609 .iterations(1)
1610 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1611 }
1612 }
1613
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)1614 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
1615 for (uint32_t n = 1; n <= 8; n++) {
1616 GemmMicrokernelTester()
1617 .mr(5)
1618 .nr(8)
1619 .kr(1)
1620 .sr(1)
1621 .m(5)
1622 .n(n)
1623 .k(4)
1624 .iterations(1)
1625 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1626 }
1627 }
1628
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4)1629 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4) {
1630 for (size_t k = 1; k < 4; k++) {
1631 GemmMicrokernelTester()
1632 .mr(5)
1633 .nr(8)
1634 .kr(1)
1635 .sr(1)
1636 .m(5)
1637 .n(8)
1638 .k(k)
1639 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1640 }
1641 }
1642
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4_strided_a)1643 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
1644 for (size_t k = 1; k < 4; k++) {
1645 GemmMicrokernelTester()
1646 .mr(5)
1647 .nr(8)
1648 .kr(1)
1649 .sr(1)
1650 .m(5)
1651 .n(8)
1652 .k(k)
1653 .a_stride(7)
1654 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1655 }
1656 }
1657
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4_subtile)1658 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
1659 for (size_t k = 1; k < 4; k++) {
1660 for (uint32_t n = 1; n <= 8; n++) {
1661 for (uint32_t m = 1; m <= 5; m++) {
1662 GemmMicrokernelTester()
1663 .mr(5)
1664 .nr(8)
1665 .kr(1)
1666 .sr(1)
1667 .m(m)
1668 .n(n)
1669 .k(k)
1670 .iterations(1)
1671 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1672 }
1673 }
1674 }
1675 }
1676
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4)1677 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4) {
1678 for (size_t k = 5; k < 8; k++) {
1679 GemmMicrokernelTester()
1680 .mr(5)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(5)
1685 .n(8)
1686 .k(k)
1687 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1688 }
1689 }
1690
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4_strided_a)1691 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
1692 for (size_t k = 5; k < 8; k++) {
1693 GemmMicrokernelTester()
1694 .mr(5)
1695 .nr(8)
1696 .kr(1)
1697 .sr(1)
1698 .m(5)
1699 .n(8)
1700 .k(k)
1701 .a_stride(11)
1702 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1703 }
1704 }
1705
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4_subtile)1706 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
1707 for (size_t k = 5; k < 8; k++) {
1708 for (uint32_t n = 1; n <= 8; n++) {
1709 for (uint32_t m = 1; m <= 5; m++) {
1710 GemmMicrokernelTester()
1711 .mr(5)
1712 .nr(8)
1713 .kr(1)
1714 .sr(1)
1715 .m(m)
1716 .n(n)
1717 .k(k)
1718 .iterations(1)
1719 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1720 }
1721 }
1722 }
1723 }
1724
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4)1725 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4) {
1726 for (size_t k = 8; k <= 40; k += 4) {
1727 GemmMicrokernelTester()
1728 .mr(5)
1729 .nr(8)
1730 .kr(1)
1731 .sr(1)
1732 .m(5)
1733 .n(8)
1734 .k(k)
1735 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1736 }
1737 }
1738
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4_strided_a)1739 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
1740 for (size_t k = 8; k <= 40; k += 4) {
1741 GemmMicrokernelTester()
1742 .mr(5)
1743 .nr(8)
1744 .kr(1)
1745 .sr(1)
1746 .m(5)
1747 .n(8)
1748 .k(k)
1749 .a_stride(43)
1750 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1751 }
1752 }
1753
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4_subtile)1754 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1755 for (size_t k = 8; k <= 40; k += 4) {
1756 for (uint32_t n = 1; n <= 8; n++) {
1757 for (uint32_t m = 1; m <= 5; m++) {
1758 GemmMicrokernelTester()
1759 .mr(5)
1760 .nr(8)
1761 .kr(1)
1762 .sr(1)
1763 .m(m)
1764 .n(n)
1765 .k(k)
1766 .iterations(1)
1767 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1768 }
1769 }
1770 }
1771 }
1772
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8)1773 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8) {
1774 for (uint32_t n = 9; n < 16; n++) {
1775 for (size_t k = 1; k <= 20; k += 5) {
1776 GemmMicrokernelTester()
1777 .mr(5)
1778 .nr(8)
1779 .kr(1)
1780 .sr(1)
1781 .m(5)
1782 .n(n)
1783 .k(k)
1784 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1785 }
1786 }
1787 }
1788
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1789 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1790 for (uint32_t n = 9; n < 16; n++) {
1791 for (size_t k = 1; k <= 20; k += 5) {
1792 GemmMicrokernelTester()
1793 .mr(5)
1794 .nr(8)
1795 .kr(1)
1796 .sr(1)
1797 .m(5)
1798 .n(n)
1799 .k(k)
1800 .cn_stride(11)
1801 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1802 }
1803 }
1804 }
1805
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_strided_a)1806 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
1807 for (uint32_t n = 9; n < 16; n++) {
1808 for (size_t k = 1; k <= 20; k += 5) {
1809 GemmMicrokernelTester()
1810 .mr(5)
1811 .nr(8)
1812 .kr(1)
1813 .sr(1)
1814 .m(5)
1815 .n(n)
1816 .k(k)
1817 .a_stride(23)
1818 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1819 }
1820 }
1821 }
1822
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_subtile)1823 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1824 for (uint32_t n = 9; n < 16; n++) {
1825 for (size_t k = 1; k <= 20; k += 5) {
1826 for (uint32_t m = 1; m <= 5; m++) {
1827 GemmMicrokernelTester()
1828 .mr(5)
1829 .nr(8)
1830 .kr(1)
1831 .sr(1)
1832 .m(m)
1833 .n(n)
1834 .k(k)
1835 .iterations(1)
1836 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1837 }
1838 }
1839 }
1840 }
1841
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8)1842 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8) {
1843 for (uint32_t n = 16; n <= 24; n += 8) {
1844 for (size_t k = 1; k <= 20; k += 5) {
1845 GemmMicrokernelTester()
1846 .mr(5)
1847 .nr(8)
1848 .kr(1)
1849 .sr(1)
1850 .m(5)
1851 .n(n)
1852 .k(k)
1853 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1854 }
1855 }
1856 }
1857
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1858 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1859 for (uint32_t n = 16; n <= 24; n += 8) {
1860 for (size_t k = 1; k <= 20; k += 5) {
1861 GemmMicrokernelTester()
1862 .mr(5)
1863 .nr(8)
1864 .kr(1)
1865 .sr(1)
1866 .m(5)
1867 .n(n)
1868 .k(k)
1869 .cn_stride(11)
1870 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1871 }
1872 }
1873 }
1874
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_strided_a)1875 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
1876 for (uint32_t n = 16; n <= 24; n += 8) {
1877 for (size_t k = 1; k <= 20; k += 5) {
1878 GemmMicrokernelTester()
1879 .mr(5)
1880 .nr(8)
1881 .kr(1)
1882 .sr(1)
1883 .m(5)
1884 .n(n)
1885 .k(k)
1886 .a_stride(23)
1887 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1888 }
1889 }
1890 }
1891
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_subtile)1892 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1893 for (uint32_t n = 16; n <= 24; n += 8) {
1894 for (size_t k = 1; k <= 20; k += 5) {
1895 for (uint32_t m = 1; m <= 5; m++) {
1896 GemmMicrokernelTester()
1897 .mr(5)
1898 .nr(8)
1899 .kr(1)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(k)
1904 .iterations(1)
1905 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1906 }
1907 }
1908 }
1909 }
1910
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cm_subtile)1911 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1912 for (size_t k = 1; k <= 20; k += 5) {
1913 for (uint32_t n = 1; n <= 8; n++) {
1914 for (uint32_t m = 1; m <= 5; m++) {
1915 GemmMicrokernelTester()
1916 .mr(5)
1917 .nr(8)
1918 .kr(1)
1919 .sr(1)
1920 .m(m)
1921 .n(n)
1922 .k(k)
1923 .cm_stride(11)
1924 .iterations(1)
1925 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1926 }
1927 }
1928 }
1929 }
1930
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cm)1931 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm) {
1932 GemmMicrokernelTester()
1933 .mr(5)
1934 .nr(8)
1935 .kr(1)
1936 .sr(1)
1937 .m(5)
1938 .n(8)
1939 .k(4)
1940 .cm_stride(11)
1941 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1942 }
1943 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1944
1945
1946 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1)1947 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1) {
1948 GemmMicrokernelTester()
1949 .mr(6)
1950 .nr(8)
1951 .kr(1)
1952 .sr(1)
1953 .m(6)
1954 .n(8)
1955 .k(1)
1956 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
1957 }
1958
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,strided_cn)1959 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, strided_cn) {
1960 GemmMicrokernelTester()
1961 .mr(6)
1962 .nr(8)
1963 .kr(1)
1964 .sr(1)
1965 .m(6)
1966 .n(8)
1967 .k(1)
1968 .cn_stride(11)
1969 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
1970 }
1971
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)1972 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
1973 GemmMicrokernelTester()
1974 .mr(6)
1975 .nr(8)
1976 .kr(1)
1977 .sr(1)
1978 .m(6)
1979 .n(8)
1980 .k(1)
1981 .a_stride(3)
1982 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
1983 }
1984
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)1985 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
1986 for (uint32_t n = 1; n <= 8; n++) {
1987 for (uint32_t m = 1; m <= 6; m++) {
1988 GemmMicrokernelTester()
1989 .mr(6)
1990 .nr(8)
1991 .kr(1)
1992 .sr(1)
1993 .m(m)
1994 .n(n)
1995 .k(1)
1996 .iterations(1)
1997 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
1998 }
1999 }
2000 }
2001
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2002 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2003 for (uint32_t m = 1; m <= 6; m++) {
2004 GemmMicrokernelTester()
2005 .mr(6)
2006 .nr(8)
2007 .kr(1)
2008 .sr(1)
2009 .m(m)
2010 .n(8)
2011 .k(1)
2012 .iterations(1)
2013 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2014 }
2015 }
2016
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2017 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2018 for (uint32_t n = 1; n <= 8; n++) {
2019 GemmMicrokernelTester()
2020 .mr(6)
2021 .nr(8)
2022 .kr(1)
2023 .sr(1)
2024 .m(6)
2025 .n(n)
2026 .k(1)
2027 .iterations(1)
2028 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2029 }
2030 }
2031
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_gt_1)2032 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2033 for (size_t k = 2; k < 10; k++) {
2034 GemmMicrokernelTester()
2035 .mr(6)
2036 .nr(8)
2037 .kr(1)
2038 .sr(1)
2039 .m(6)
2040 .n(8)
2041 .k(k)
2042 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2043 }
2044 }
2045
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)2046 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
2047 for (size_t k = 2; k < 10; k++) {
2048 GemmMicrokernelTester()
2049 .mr(6)
2050 .nr(8)
2051 .kr(1)
2052 .sr(1)
2053 .m(6)
2054 .n(8)
2055 .k(k)
2056 .a_stride(11)
2057 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2058 }
2059 }
2060
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2061 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2062 for (size_t k = 2; k < 10; k++) {
2063 for (uint32_t n = 1; n <= 8; n++) {
2064 for (uint32_t m = 1; m <= 6; m++) {
2065 GemmMicrokernelTester()
2066 .mr(6)
2067 .nr(8)
2068 .kr(1)
2069 .sr(1)
2070 .m(m)
2071 .n(n)
2072 .k(k)
2073 .iterations(1)
2074 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2075 }
2076 }
2077 }
2078 }
2079
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8)2080 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2081 for (uint32_t n = 9; n < 16; n++) {
2082 for (size_t k = 1; k <= 5; k += 2) {
2083 GemmMicrokernelTester()
2084 .mr(6)
2085 .nr(8)
2086 .kr(1)
2087 .sr(1)
2088 .m(6)
2089 .n(n)
2090 .k(k)
2091 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2092 }
2093 }
2094 }
2095
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2096 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2097 for (uint32_t n = 9; n < 16; n++) {
2098 for (size_t k = 1; k <= 5; k += 2) {
2099 GemmMicrokernelTester()
2100 .mr(6)
2101 .nr(8)
2102 .kr(1)
2103 .sr(1)
2104 .m(6)
2105 .n(n)
2106 .k(k)
2107 .cn_stride(11)
2108 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2109 }
2110 }
2111 }
2112
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)2113 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
2114 for (uint32_t n = 9; n < 16; n++) {
2115 for (size_t k = 1; k <= 5; k += 2) {
2116 GemmMicrokernelTester()
2117 .mr(6)
2118 .nr(8)
2119 .kr(1)
2120 .sr(1)
2121 .m(6)
2122 .n(n)
2123 .k(k)
2124 .a_stride(7)
2125 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2126 }
2127 }
2128 }
2129
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2130 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2131 for (uint32_t n = 9; n < 16; n++) {
2132 for (size_t k = 1; k <= 5; k += 2) {
2133 for (uint32_t m = 1; m <= 6; m++) {
2134 GemmMicrokernelTester()
2135 .mr(6)
2136 .nr(8)
2137 .kr(1)
2138 .sr(1)
2139 .m(m)
2140 .n(n)
2141 .k(k)
2142 .iterations(1)
2143 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2144 }
2145 }
2146 }
2147 }
2148
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8)2149 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8) {
2150 for (uint32_t n = 16; n <= 24; n += 8) {
2151 for (size_t k = 1; k <= 5; k += 2) {
2152 GemmMicrokernelTester()
2153 .mr(6)
2154 .nr(8)
2155 .kr(1)
2156 .sr(1)
2157 .m(6)
2158 .n(n)
2159 .k(k)
2160 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2161 }
2162 }
2163 }
2164
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2165 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2166 for (uint32_t n = 16; n <= 24; n += 8) {
2167 for (size_t k = 1; k <= 5; k += 2) {
2168 GemmMicrokernelTester()
2169 .mr(6)
2170 .nr(8)
2171 .kr(1)
2172 .sr(1)
2173 .m(6)
2174 .n(n)
2175 .k(k)
2176 .cn_stride(11)
2177 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2178 }
2179 }
2180 }
2181
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)2182 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
2183 for (uint32_t n = 16; n <= 24; n += 8) {
2184 for (size_t k = 1; k <= 5; k += 2) {
2185 GemmMicrokernelTester()
2186 .mr(6)
2187 .nr(8)
2188 .kr(1)
2189 .sr(1)
2190 .m(6)
2191 .n(n)
2192 .k(k)
2193 .a_stride(7)
2194 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2195 }
2196 }
2197 }
2198
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2199 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2200 for (uint32_t n = 16; n <= 24; n += 8) {
2201 for (size_t k = 1; k <= 5; k += 2) {
2202 for (uint32_t m = 1; m <= 6; m++) {
2203 GemmMicrokernelTester()
2204 .mr(6)
2205 .nr(8)
2206 .kr(1)
2207 .sr(1)
2208 .m(m)
2209 .n(n)
2210 .k(k)
2211 .iterations(1)
2212 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2213 }
2214 }
2215 }
2216 }
2217
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2218 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2219 for (size_t k = 1; k <= 5; k += 2) {
2220 for (uint32_t n = 1; n <= 8; n++) {
2221 for (uint32_t m = 1; m <= 6; m++) {
2222 GemmMicrokernelTester()
2223 .mr(6)
2224 .nr(8)
2225 .kr(1)
2226 .sr(1)
2227 .m(m)
2228 .n(n)
2229 .k(k)
2230 .cm_stride(11)
2231 .iterations(1)
2232 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2233 }
2234 }
2235 }
2236 }
2237
TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT,strided_cm)2238 TEST(F32_GEMM_RELU_6X8__WASMSIMD_LOADSPLAT, strided_cm) {
2239 GemmMicrokernelTester()
2240 .mr(6)
2241 .nr(8)
2242 .kr(1)
2243 .sr(1)
2244 .m(6)
2245 .n(8)
2246 .k(1)
2247 .cm_stride(11)
2248 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2249 }
2250 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2251
2252
2253 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_eq_4)2254 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_eq_4) {
2255 GemmMicrokernelTester()
2256 .mr(6)
2257 .nr(8)
2258 .kr(1)
2259 .sr(4)
2260 .m(6)
2261 .n(8)
2262 .k(4)
2263 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2264 }
2265
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,strided_cn)2266 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, strided_cn) {
2267 GemmMicrokernelTester()
2268 .mr(6)
2269 .nr(8)
2270 .kr(1)
2271 .sr(4)
2272 .m(6)
2273 .n(8)
2274 .k(4)
2275 .cn_stride(11)
2276 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2277 }
2278
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_eq_4_strided_a)2279 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_eq_4_strided_a) {
2280 GemmMicrokernelTester()
2281 .mr(6)
2282 .nr(8)
2283 .kr(1)
2284 .sr(4)
2285 .m(6)
2286 .n(8)
2287 .k(4)
2288 .a_stride(7)
2289 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2290 }
2291
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_eq_4_subtile)2292 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_eq_4_subtile) {
2293 for (uint32_t n = 1; n <= 8; n++) {
2294 for (uint32_t m = 1; m <= 6; m++) {
2295 GemmMicrokernelTester()
2296 .mr(6)
2297 .nr(8)
2298 .kr(1)
2299 .sr(4)
2300 .m(m)
2301 .n(n)
2302 .k(4)
2303 .iterations(1)
2304 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2305 }
2306 }
2307 }
2308
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_eq_4_subtile_m)2309 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_eq_4_subtile_m) {
2310 for (uint32_t m = 1; m <= 6; m++) {
2311 GemmMicrokernelTester()
2312 .mr(6)
2313 .nr(8)
2314 .kr(1)
2315 .sr(4)
2316 .m(m)
2317 .n(8)
2318 .k(4)
2319 .iterations(1)
2320 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2321 }
2322 }
2323
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_eq_4_subtile_n)2324 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_eq_4_subtile_n) {
2325 for (uint32_t n = 1; n <= 8; n++) {
2326 GemmMicrokernelTester()
2327 .mr(6)
2328 .nr(8)
2329 .kr(1)
2330 .sr(4)
2331 .m(6)
2332 .n(n)
2333 .k(4)
2334 .iterations(1)
2335 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2336 }
2337 }
2338
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_lt_4)2339 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_lt_4) {
2340 for (size_t k = 1; k < 4; k++) {
2341 GemmMicrokernelTester()
2342 .mr(6)
2343 .nr(8)
2344 .kr(1)
2345 .sr(4)
2346 .m(6)
2347 .n(8)
2348 .k(k)
2349 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2350 }
2351 }
2352
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_lt_4_strided_a)2353 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_lt_4_strided_a) {
2354 for (size_t k = 1; k < 4; k++) {
2355 GemmMicrokernelTester()
2356 .mr(6)
2357 .nr(8)
2358 .kr(1)
2359 .sr(4)
2360 .m(6)
2361 .n(8)
2362 .k(k)
2363 .a_stride(7)
2364 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2365 }
2366 }
2367
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_lt_4_subtile)2368 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_lt_4_subtile) {
2369 for (size_t k = 1; k < 4; k++) {
2370 for (uint32_t n = 1; n <= 8; n++) {
2371 for (uint32_t m = 1; m <= 6; m++) {
2372 GemmMicrokernelTester()
2373 .mr(6)
2374 .nr(8)
2375 .kr(1)
2376 .sr(4)
2377 .m(m)
2378 .n(n)
2379 .k(k)
2380 .iterations(1)
2381 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2382 }
2383 }
2384 }
2385 }
2386
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_gt_4)2387 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_gt_4) {
2388 for (size_t k = 5; k < 8; k++) {
2389 GemmMicrokernelTester()
2390 .mr(6)
2391 .nr(8)
2392 .kr(1)
2393 .sr(4)
2394 .m(6)
2395 .n(8)
2396 .k(k)
2397 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2398 }
2399 }
2400
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_gt_4_strided_a)2401 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_gt_4_strided_a) {
2402 for (size_t k = 5; k < 8; k++) {
2403 GemmMicrokernelTester()
2404 .mr(6)
2405 .nr(8)
2406 .kr(1)
2407 .sr(4)
2408 .m(6)
2409 .n(8)
2410 .k(k)
2411 .a_stride(11)
2412 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2413 }
2414 }
2415
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_gt_4_subtile)2416 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_gt_4_subtile) {
2417 for (size_t k = 5; k < 8; k++) {
2418 for (uint32_t n = 1; n <= 8; n++) {
2419 for (uint32_t m = 1; m <= 6; m++) {
2420 GemmMicrokernelTester()
2421 .mr(6)
2422 .nr(8)
2423 .kr(1)
2424 .sr(4)
2425 .m(m)
2426 .n(n)
2427 .k(k)
2428 .iterations(1)
2429 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2430 }
2431 }
2432 }
2433 }
2434
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_div_4)2435 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_div_4) {
2436 for (size_t k = 8; k <= 40; k += 4) {
2437 GemmMicrokernelTester()
2438 .mr(6)
2439 .nr(8)
2440 .kr(1)
2441 .sr(4)
2442 .m(6)
2443 .n(8)
2444 .k(k)
2445 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2446 }
2447 }
2448
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_div_4_strided_a)2449 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_div_4_strided_a) {
2450 for (size_t k = 8; k <= 40; k += 4) {
2451 GemmMicrokernelTester()
2452 .mr(6)
2453 .nr(8)
2454 .kr(1)
2455 .sr(4)
2456 .m(6)
2457 .n(8)
2458 .k(k)
2459 .a_stride(43)
2460 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2461 }
2462 }
2463
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,k_div_4_subtile)2464 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, k_div_4_subtile) {
2465 for (size_t k = 8; k <= 40; k += 4) {
2466 for (uint32_t n = 1; n <= 8; n++) {
2467 for (uint32_t m = 1; m <= 6; m++) {
2468 GemmMicrokernelTester()
2469 .mr(6)
2470 .nr(8)
2471 .kr(1)
2472 .sr(4)
2473 .m(m)
2474 .n(n)
2475 .k(k)
2476 .iterations(1)
2477 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2478 }
2479 }
2480 }
2481 }
2482
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_gt_8)2483 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_gt_8) {
2484 for (uint32_t n = 9; n < 16; n++) {
2485 for (size_t k = 1; k <= 20; k += 5) {
2486 GemmMicrokernelTester()
2487 .mr(6)
2488 .nr(8)
2489 .kr(1)
2490 .sr(4)
2491 .m(6)
2492 .n(n)
2493 .k(k)
2494 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2495 }
2496 }
2497 }
2498
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_gt_8_strided_cn)2499 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_gt_8_strided_cn) {
2500 for (uint32_t n = 9; n < 16; n++) {
2501 for (size_t k = 1; k <= 20; k += 5) {
2502 GemmMicrokernelTester()
2503 .mr(6)
2504 .nr(8)
2505 .kr(1)
2506 .sr(4)
2507 .m(6)
2508 .n(n)
2509 .k(k)
2510 .cn_stride(11)
2511 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2512 }
2513 }
2514 }
2515
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_gt_8_strided_a)2516 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_gt_8_strided_a) {
2517 for (uint32_t n = 9; n < 16; n++) {
2518 for (size_t k = 1; k <= 20; k += 5) {
2519 GemmMicrokernelTester()
2520 .mr(6)
2521 .nr(8)
2522 .kr(1)
2523 .sr(4)
2524 .m(6)
2525 .n(n)
2526 .k(k)
2527 .a_stride(23)
2528 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2529 }
2530 }
2531 }
2532
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_gt_8_subtile)2533 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_gt_8_subtile) {
2534 for (uint32_t n = 9; n < 16; n++) {
2535 for (size_t k = 1; k <= 20; k += 5) {
2536 for (uint32_t m = 1; m <= 6; m++) {
2537 GemmMicrokernelTester()
2538 .mr(6)
2539 .nr(8)
2540 .kr(1)
2541 .sr(4)
2542 .m(m)
2543 .n(n)
2544 .k(k)
2545 .iterations(1)
2546 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2547 }
2548 }
2549 }
2550 }
2551
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_div_8)2552 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_div_8) {
2553 for (uint32_t n = 16; n <= 24; n += 8) {
2554 for (size_t k = 1; k <= 20; k += 5) {
2555 GemmMicrokernelTester()
2556 .mr(6)
2557 .nr(8)
2558 .kr(1)
2559 .sr(4)
2560 .m(6)
2561 .n(n)
2562 .k(k)
2563 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2564 }
2565 }
2566 }
2567
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_div_8_strided_cn)2568 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_div_8_strided_cn) {
2569 for (uint32_t n = 16; n <= 24; n += 8) {
2570 for (size_t k = 1; k <= 20; k += 5) {
2571 GemmMicrokernelTester()
2572 .mr(6)
2573 .nr(8)
2574 .kr(1)
2575 .sr(4)
2576 .m(6)
2577 .n(n)
2578 .k(k)
2579 .cn_stride(11)
2580 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2581 }
2582 }
2583 }
2584
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_div_8_strided_a)2585 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_div_8_strided_a) {
2586 for (uint32_t n = 16; n <= 24; n += 8) {
2587 for (size_t k = 1; k <= 20; k += 5) {
2588 GemmMicrokernelTester()
2589 .mr(6)
2590 .nr(8)
2591 .kr(1)
2592 .sr(4)
2593 .m(6)
2594 .n(n)
2595 .k(k)
2596 .a_stride(23)
2597 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2598 }
2599 }
2600 }
2601
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,n_div_8_subtile)2602 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, n_div_8_subtile) {
2603 for (uint32_t n = 16; n <= 24; n += 8) {
2604 for (size_t k = 1; k <= 20; k += 5) {
2605 for (uint32_t m = 1; m <= 6; m++) {
2606 GemmMicrokernelTester()
2607 .mr(6)
2608 .nr(8)
2609 .kr(1)
2610 .sr(4)
2611 .m(m)
2612 .n(n)
2613 .k(k)
2614 .iterations(1)
2615 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2616 }
2617 }
2618 }
2619 }
2620
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,strided_cm_subtile)2621 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, strided_cm_subtile) {
2622 for (size_t k = 1; k <= 20; k += 5) {
2623 for (uint32_t n = 1; n <= 8; n++) {
2624 for (uint32_t m = 1; m <= 6; m++) {
2625 GemmMicrokernelTester()
2626 .mr(6)
2627 .nr(8)
2628 .kr(1)
2629 .sr(4)
2630 .m(m)
2631 .n(n)
2632 .k(k)
2633 .cm_stride(11)
2634 .iterations(1)
2635 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2636 }
2637 }
2638 }
2639 }
2640
TEST(F32_GEMM_RELU_6X8S4__WASMSIMD,strided_cm)2641 TEST(F32_GEMM_RELU_6X8S4__WASMSIMD, strided_cm) {
2642 GemmMicrokernelTester()
2643 .mr(6)
2644 .nr(8)
2645 .kr(1)
2646 .sr(4)
2647 .m(6)
2648 .n(8)
2649 .k(4)
2650 .cm_stride(11)
2651 .Test(xnn_f32_gemm_relu_ukernel_6x8s4__wasmsimd);
2652 }
2653 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2654
2655
2656 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)2657 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
2658 GemmMicrokernelTester()
2659 .mr(1)
2660 .nr(8)
2661 .kr(1)
2662 .sr(1)
2663 .m(1)
2664 .n(8)
2665 .k(4)
2666 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2667 }
2668
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)2669 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
2670 GemmMicrokernelTester()
2671 .mr(1)
2672 .nr(8)
2673 .kr(1)
2674 .sr(1)
2675 .m(1)
2676 .n(8)
2677 .k(4)
2678 .cn_stride(11)
2679 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2680 }
2681
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)2682 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
2683 GemmMicrokernelTester()
2684 .mr(1)
2685 .nr(8)
2686 .kr(1)
2687 .sr(1)
2688 .m(1)
2689 .n(8)
2690 .k(4)
2691 .a_stride(7)
2692 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2693 }
2694
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)2695 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
2696 for (uint32_t n = 1; n <= 8; n++) {
2697 for (uint32_t m = 1; m <= 1; m++) {
2698 GemmMicrokernelTester()
2699 .mr(1)
2700 .nr(8)
2701 .kr(1)
2702 .sr(1)
2703 .m(m)
2704 .n(n)
2705 .k(4)
2706 .iterations(1)
2707 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2708 }
2709 }
2710 }
2711
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)2712 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
2713 for (uint32_t m = 1; m <= 1; m++) {
2714 GemmMicrokernelTester()
2715 .mr(1)
2716 .nr(8)
2717 .kr(1)
2718 .sr(1)
2719 .m(m)
2720 .n(8)
2721 .k(4)
2722 .iterations(1)
2723 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2724 }
2725 }
2726
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)2727 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
2728 for (uint32_t n = 1; n <= 8; n++) {
2729 GemmMicrokernelTester()
2730 .mr(1)
2731 .nr(8)
2732 .kr(1)
2733 .sr(1)
2734 .m(1)
2735 .n(n)
2736 .k(4)
2737 .iterations(1)
2738 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2739 }
2740 }
2741
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)2742 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
2743 for (size_t k = 1; k < 4; k++) {
2744 GemmMicrokernelTester()
2745 .mr(1)
2746 .nr(8)
2747 .kr(1)
2748 .sr(1)
2749 .m(1)
2750 .n(8)
2751 .k(k)
2752 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2753 }
2754 }
2755
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)2756 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
2757 for (size_t k = 1; k < 4; k++) {
2758 GemmMicrokernelTester()
2759 .mr(1)
2760 .nr(8)
2761 .kr(1)
2762 .sr(1)
2763 .m(1)
2764 .n(8)
2765 .k(k)
2766 .a_stride(7)
2767 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2768 }
2769 }
2770
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)2771 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
2772 for (size_t k = 1; k < 4; k++) {
2773 for (uint32_t n = 1; n <= 8; n++) {
2774 for (uint32_t m = 1; m <= 1; m++) {
2775 GemmMicrokernelTester()
2776 .mr(1)
2777 .nr(8)
2778 .kr(1)
2779 .sr(1)
2780 .m(m)
2781 .n(n)
2782 .k(k)
2783 .iterations(1)
2784 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2785 }
2786 }
2787 }
2788 }
2789
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)2790 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
2791 for (size_t k = 5; k < 8; k++) {
2792 GemmMicrokernelTester()
2793 .mr(1)
2794 .nr(8)
2795 .kr(1)
2796 .sr(1)
2797 .m(1)
2798 .n(8)
2799 .k(k)
2800 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2801 }
2802 }
2803
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)2804 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
2805 for (size_t k = 5; k < 8; k++) {
2806 GemmMicrokernelTester()
2807 .mr(1)
2808 .nr(8)
2809 .kr(1)
2810 .sr(1)
2811 .m(1)
2812 .n(8)
2813 .k(k)
2814 .a_stride(11)
2815 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2816 }
2817 }
2818
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)2819 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
2820 for (size_t k = 5; k < 8; k++) {
2821 for (uint32_t n = 1; n <= 8; n++) {
2822 for (uint32_t m = 1; m <= 1; m++) {
2823 GemmMicrokernelTester()
2824 .mr(1)
2825 .nr(8)
2826 .kr(1)
2827 .sr(1)
2828 .m(m)
2829 .n(n)
2830 .k(k)
2831 .iterations(1)
2832 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2833 }
2834 }
2835 }
2836 }
2837
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)2838 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
2839 for (size_t k = 8; k <= 40; k += 4) {
2840 GemmMicrokernelTester()
2841 .mr(1)
2842 .nr(8)
2843 .kr(1)
2844 .sr(1)
2845 .m(1)
2846 .n(8)
2847 .k(k)
2848 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2849 }
2850 }
2851
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)2852 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
2853 for (size_t k = 8; k <= 40; k += 4) {
2854 GemmMicrokernelTester()
2855 .mr(1)
2856 .nr(8)
2857 .kr(1)
2858 .sr(1)
2859 .m(1)
2860 .n(8)
2861 .k(k)
2862 .a_stride(43)
2863 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2864 }
2865 }
2866
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)2867 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
2868 for (size_t k = 8; k <= 40; k += 4) {
2869 for (uint32_t n = 1; n <= 8; n++) {
2870 for (uint32_t m = 1; m <= 1; m++) {
2871 GemmMicrokernelTester()
2872 .mr(1)
2873 .nr(8)
2874 .kr(1)
2875 .sr(1)
2876 .m(m)
2877 .n(n)
2878 .k(k)
2879 .iterations(1)
2880 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2881 }
2882 }
2883 }
2884 }
2885
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)2886 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
2887 for (uint32_t n = 9; n < 16; n++) {
2888 for (size_t k = 1; k <= 20; k += 5) {
2889 GemmMicrokernelTester()
2890 .mr(1)
2891 .nr(8)
2892 .kr(1)
2893 .sr(1)
2894 .m(1)
2895 .n(n)
2896 .k(k)
2897 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2898 }
2899 }
2900 }
2901
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)2902 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
2903 for (uint32_t n = 9; n < 16; n++) {
2904 for (size_t k = 1; k <= 20; k += 5) {
2905 GemmMicrokernelTester()
2906 .mr(1)
2907 .nr(8)
2908 .kr(1)
2909 .sr(1)
2910 .m(1)
2911 .n(n)
2912 .k(k)
2913 .cn_stride(11)
2914 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2915 }
2916 }
2917 }
2918
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)2919 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
2920 for (uint32_t n = 9; n < 16; n++) {
2921 for (size_t k = 1; k <= 20; k += 5) {
2922 GemmMicrokernelTester()
2923 .mr(1)
2924 .nr(8)
2925 .kr(1)
2926 .sr(1)
2927 .m(1)
2928 .n(n)
2929 .k(k)
2930 .a_stride(23)
2931 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2932 }
2933 }
2934 }
2935
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)2936 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
2937 for (uint32_t n = 9; n < 16; n++) {
2938 for (size_t k = 1; k <= 20; k += 5) {
2939 for (uint32_t m = 1; m <= 1; m++) {
2940 GemmMicrokernelTester()
2941 .mr(1)
2942 .nr(8)
2943 .kr(1)
2944 .sr(1)
2945 .m(m)
2946 .n(n)
2947 .k(k)
2948 .iterations(1)
2949 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2950 }
2951 }
2952 }
2953 }
2954
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)2955 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
2956 for (uint32_t n = 16; n <= 24; n += 8) {
2957 for (size_t k = 1; k <= 20; k += 5) {
2958 GemmMicrokernelTester()
2959 .mr(1)
2960 .nr(8)
2961 .kr(1)
2962 .sr(1)
2963 .m(1)
2964 .n(n)
2965 .k(k)
2966 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2967 }
2968 }
2969 }
2970
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)2971 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
2972 for (uint32_t n = 16; n <= 24; n += 8) {
2973 for (size_t k = 1; k <= 20; k += 5) {
2974 GemmMicrokernelTester()
2975 .mr(1)
2976 .nr(8)
2977 .kr(1)
2978 .sr(1)
2979 .m(1)
2980 .n(n)
2981 .k(k)
2982 .cn_stride(11)
2983 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
2984 }
2985 }
2986 }
2987
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)2988 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
2989 for (uint32_t n = 16; n <= 24; n += 8) {
2990 for (size_t k = 1; k <= 20; k += 5) {
2991 GemmMicrokernelTester()
2992 .mr(1)
2993 .nr(8)
2994 .kr(1)
2995 .sr(1)
2996 .m(1)
2997 .n(n)
2998 .k(k)
2999 .a_stride(23)
3000 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3001 }
3002 }
3003 }
3004
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)3005 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
3006 for (uint32_t n = 16; n <= 24; n += 8) {
3007 for (size_t k = 1; k <= 20; k += 5) {
3008 for (uint32_t m = 1; m <= 1; m++) {
3009 GemmMicrokernelTester()
3010 .mr(1)
3011 .nr(8)
3012 .kr(1)
3013 .sr(1)
3014 .m(m)
3015 .n(n)
3016 .k(k)
3017 .iterations(1)
3018 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3019 }
3020 }
3021 }
3022 }
3023
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)3024 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
3025 for (size_t k = 1; k <= 20; k += 5) {
3026 for (uint32_t n = 1; n <= 8; n++) {
3027 for (uint32_t m = 1; m <= 1; m++) {
3028 GemmMicrokernelTester()
3029 .mr(1)
3030 .nr(8)
3031 .kr(1)
3032 .sr(1)
3033 .m(m)
3034 .n(n)
3035 .k(k)
3036 .cm_stride(11)
3037 .iterations(1)
3038 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3039 }
3040 }
3041 }
3042 }
3043
TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)3044 TEST(F32_GEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
3045 GemmMicrokernelTester()
3046 .mr(1)
3047 .nr(8)
3048 .kr(1)
3049 .sr(1)
3050 .m(1)
3051 .n(8)
3052 .k(4)
3053 .cm_stride(11)
3054 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3055 }
3056 #endif // XNN_ARCH_WASMRELAXEDSIMD
3057
3058
3059 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)3060 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
3061 GemmMicrokernelTester()
3062 .mr(3)
3063 .nr(8)
3064 .kr(1)
3065 .sr(1)
3066 .m(3)
3067 .n(8)
3068 .k(4)
3069 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3070 }
3071
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)3072 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
3073 GemmMicrokernelTester()
3074 .mr(3)
3075 .nr(8)
3076 .kr(1)
3077 .sr(1)
3078 .m(3)
3079 .n(8)
3080 .k(4)
3081 .cn_stride(11)
3082 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3083 }
3084
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)3085 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
3086 GemmMicrokernelTester()
3087 .mr(3)
3088 .nr(8)
3089 .kr(1)
3090 .sr(1)
3091 .m(3)
3092 .n(8)
3093 .k(4)
3094 .a_stride(7)
3095 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3096 }
3097
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)3098 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
3099 for (uint32_t n = 1; n <= 8; n++) {
3100 for (uint32_t m = 1; m <= 3; m++) {
3101 GemmMicrokernelTester()
3102 .mr(3)
3103 .nr(8)
3104 .kr(1)
3105 .sr(1)
3106 .m(m)
3107 .n(n)
3108 .k(4)
3109 .iterations(1)
3110 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3111 }
3112 }
3113 }
3114
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)3115 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
3116 for (uint32_t m = 1; m <= 3; m++) {
3117 GemmMicrokernelTester()
3118 .mr(3)
3119 .nr(8)
3120 .kr(1)
3121 .sr(1)
3122 .m(m)
3123 .n(8)
3124 .k(4)
3125 .iterations(1)
3126 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3127 }
3128 }
3129
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)3130 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
3131 for (uint32_t n = 1; n <= 8; n++) {
3132 GemmMicrokernelTester()
3133 .mr(3)
3134 .nr(8)
3135 .kr(1)
3136 .sr(1)
3137 .m(3)
3138 .n(n)
3139 .k(4)
3140 .iterations(1)
3141 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3142 }
3143 }
3144
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)3145 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
3146 for (size_t k = 1; k < 4; k++) {
3147 GemmMicrokernelTester()
3148 .mr(3)
3149 .nr(8)
3150 .kr(1)
3151 .sr(1)
3152 .m(3)
3153 .n(8)
3154 .k(k)
3155 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3156 }
3157 }
3158
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)3159 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
3160 for (size_t k = 1; k < 4; k++) {
3161 GemmMicrokernelTester()
3162 .mr(3)
3163 .nr(8)
3164 .kr(1)
3165 .sr(1)
3166 .m(3)
3167 .n(8)
3168 .k(k)
3169 .a_stride(7)
3170 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3171 }
3172 }
3173
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)3174 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
3175 for (size_t k = 1; k < 4; k++) {
3176 for (uint32_t n = 1; n <= 8; n++) {
3177 for (uint32_t m = 1; m <= 3; m++) {
3178 GemmMicrokernelTester()
3179 .mr(3)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(m)
3184 .n(n)
3185 .k(k)
3186 .iterations(1)
3187 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3188 }
3189 }
3190 }
3191 }
3192
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)3193 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
3194 for (size_t k = 5; k < 8; k++) {
3195 GemmMicrokernelTester()
3196 .mr(3)
3197 .nr(8)
3198 .kr(1)
3199 .sr(1)
3200 .m(3)
3201 .n(8)
3202 .k(k)
3203 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3204 }
3205 }
3206
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)3207 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
3208 for (size_t k = 5; k < 8; k++) {
3209 GemmMicrokernelTester()
3210 .mr(3)
3211 .nr(8)
3212 .kr(1)
3213 .sr(1)
3214 .m(3)
3215 .n(8)
3216 .k(k)
3217 .a_stride(11)
3218 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3219 }
3220 }
3221
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)3222 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
3223 for (size_t k = 5; k < 8; k++) {
3224 for (uint32_t n = 1; n <= 8; n++) {
3225 for (uint32_t m = 1; m <= 3; m++) {
3226 GemmMicrokernelTester()
3227 .mr(3)
3228 .nr(8)
3229 .kr(1)
3230 .sr(1)
3231 .m(m)
3232 .n(n)
3233 .k(k)
3234 .iterations(1)
3235 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3236 }
3237 }
3238 }
3239 }
3240
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)3241 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
3242 for (size_t k = 8; k <= 40; k += 4) {
3243 GemmMicrokernelTester()
3244 .mr(3)
3245 .nr(8)
3246 .kr(1)
3247 .sr(1)
3248 .m(3)
3249 .n(8)
3250 .k(k)
3251 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3252 }
3253 }
3254
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)3255 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
3256 for (size_t k = 8; k <= 40; k += 4) {
3257 GemmMicrokernelTester()
3258 .mr(3)
3259 .nr(8)
3260 .kr(1)
3261 .sr(1)
3262 .m(3)
3263 .n(8)
3264 .k(k)
3265 .a_stride(43)
3266 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3267 }
3268 }
3269
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)3270 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
3271 for (size_t k = 8; k <= 40; k += 4) {
3272 for (uint32_t n = 1; n <= 8; n++) {
3273 for (uint32_t m = 1; m <= 3; m++) {
3274 GemmMicrokernelTester()
3275 .mr(3)
3276 .nr(8)
3277 .kr(1)
3278 .sr(1)
3279 .m(m)
3280 .n(n)
3281 .k(k)
3282 .iterations(1)
3283 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3284 }
3285 }
3286 }
3287 }
3288
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)3289 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
3290 for (uint32_t n = 9; n < 16; n++) {
3291 for (size_t k = 1; k <= 20; k += 5) {
3292 GemmMicrokernelTester()
3293 .mr(3)
3294 .nr(8)
3295 .kr(1)
3296 .sr(1)
3297 .m(3)
3298 .n(n)
3299 .k(k)
3300 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3301 }
3302 }
3303 }
3304
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)3305 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
3306 for (uint32_t n = 9; n < 16; n++) {
3307 for (size_t k = 1; k <= 20; k += 5) {
3308 GemmMicrokernelTester()
3309 .mr(3)
3310 .nr(8)
3311 .kr(1)
3312 .sr(1)
3313 .m(3)
3314 .n(n)
3315 .k(k)
3316 .cn_stride(11)
3317 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3318 }
3319 }
3320 }
3321
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)3322 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
3323 for (uint32_t n = 9; n < 16; n++) {
3324 for (size_t k = 1; k <= 20; k += 5) {
3325 GemmMicrokernelTester()
3326 .mr(3)
3327 .nr(8)
3328 .kr(1)
3329 .sr(1)
3330 .m(3)
3331 .n(n)
3332 .k(k)
3333 .a_stride(23)
3334 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3335 }
3336 }
3337 }
3338
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)3339 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
3340 for (uint32_t n = 9; n < 16; n++) {
3341 for (size_t k = 1; k <= 20; k += 5) {
3342 for (uint32_t m = 1; m <= 3; m++) {
3343 GemmMicrokernelTester()
3344 .mr(3)
3345 .nr(8)
3346 .kr(1)
3347 .sr(1)
3348 .m(m)
3349 .n(n)
3350 .k(k)
3351 .iterations(1)
3352 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3353 }
3354 }
3355 }
3356 }
3357
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)3358 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
3359 for (uint32_t n = 16; n <= 24; n += 8) {
3360 for (size_t k = 1; k <= 20; k += 5) {
3361 GemmMicrokernelTester()
3362 .mr(3)
3363 .nr(8)
3364 .kr(1)
3365 .sr(1)
3366 .m(3)
3367 .n(n)
3368 .k(k)
3369 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3370 }
3371 }
3372 }
3373
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)3374 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
3375 for (uint32_t n = 16; n <= 24; n += 8) {
3376 for (size_t k = 1; k <= 20; k += 5) {
3377 GemmMicrokernelTester()
3378 .mr(3)
3379 .nr(8)
3380 .kr(1)
3381 .sr(1)
3382 .m(3)
3383 .n(n)
3384 .k(k)
3385 .cn_stride(11)
3386 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3387 }
3388 }
3389 }
3390
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)3391 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
3392 for (uint32_t n = 16; n <= 24; n += 8) {
3393 for (size_t k = 1; k <= 20; k += 5) {
3394 GemmMicrokernelTester()
3395 .mr(3)
3396 .nr(8)
3397 .kr(1)
3398 .sr(1)
3399 .m(3)
3400 .n(n)
3401 .k(k)
3402 .a_stride(23)
3403 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3404 }
3405 }
3406 }
3407
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)3408 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
3409 for (uint32_t n = 16; n <= 24; n += 8) {
3410 for (size_t k = 1; k <= 20; k += 5) {
3411 for (uint32_t m = 1; m <= 3; m++) {
3412 GemmMicrokernelTester()
3413 .mr(3)
3414 .nr(8)
3415 .kr(1)
3416 .sr(1)
3417 .m(m)
3418 .n(n)
3419 .k(k)
3420 .iterations(1)
3421 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3422 }
3423 }
3424 }
3425 }
3426
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)3427 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
3428 for (size_t k = 1; k <= 20; k += 5) {
3429 for (uint32_t n = 1; n <= 8; n++) {
3430 for (uint32_t m = 1; m <= 3; m++) {
3431 GemmMicrokernelTester()
3432 .mr(3)
3433 .nr(8)
3434 .kr(1)
3435 .sr(1)
3436 .m(m)
3437 .n(n)
3438 .k(k)
3439 .cm_stride(11)
3440 .iterations(1)
3441 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3442 }
3443 }
3444 }
3445 }
3446
TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)3447 TEST(F32_GEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
3448 GemmMicrokernelTester()
3449 .mr(3)
3450 .nr(8)
3451 .kr(1)
3452 .sr(1)
3453 .m(3)
3454 .n(8)
3455 .k(4)
3456 .cm_stride(11)
3457 .Test(xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3458 }
3459 #endif // XNN_ARCH_WASMRELAXEDSIMD
3460
3461
3462 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)3463 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3464 GemmMicrokernelTester()
3465 .mr(3)
3466 .nr(8)
3467 .kr(1)
3468 .sr(4)
3469 .m(3)
3470 .n(8)
3471 .k(4)
3472 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3473 }
3474
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,strided_cn)3475 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
3476 GemmMicrokernelTester()
3477 .mr(3)
3478 .nr(8)
3479 .kr(1)
3480 .sr(4)
3481 .m(3)
3482 .n(8)
3483 .k(4)
3484 .cn_stride(11)
3485 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3486 }
3487
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)3488 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
3489 GemmMicrokernelTester()
3490 .mr(3)
3491 .nr(8)
3492 .kr(1)
3493 .sr(4)
3494 .m(3)
3495 .n(8)
3496 .k(4)
3497 .a_stride(7)
3498 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3499 }
3500
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)3501 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
3502 for (uint32_t n = 1; n <= 8; n++) {
3503 for (uint32_t m = 1; m <= 3; m++) {
3504 GemmMicrokernelTester()
3505 .mr(3)
3506 .nr(8)
3507 .kr(1)
3508 .sr(4)
3509 .m(m)
3510 .n(n)
3511 .k(4)
3512 .iterations(1)
3513 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3514 }
3515 }
3516 }
3517
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)3518 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
3519 for (uint32_t m = 1; m <= 3; m++) {
3520 GemmMicrokernelTester()
3521 .mr(3)
3522 .nr(8)
3523 .kr(1)
3524 .sr(4)
3525 .m(m)
3526 .n(8)
3527 .k(4)
3528 .iterations(1)
3529 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3530 }
3531 }
3532
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)3533 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
3534 for (uint32_t n = 1; n <= 8; n++) {
3535 GemmMicrokernelTester()
3536 .mr(3)
3537 .nr(8)
3538 .kr(1)
3539 .sr(4)
3540 .m(3)
3541 .n(n)
3542 .k(4)
3543 .iterations(1)
3544 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3545 }
3546 }
3547
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)3548 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
3549 for (size_t k = 1; k < 4; k++) {
3550 GemmMicrokernelTester()
3551 .mr(3)
3552 .nr(8)
3553 .kr(1)
3554 .sr(4)
3555 .m(3)
3556 .n(8)
3557 .k(k)
3558 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3559 }
3560 }
3561
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)3562 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
3563 for (size_t k = 1; k < 4; k++) {
3564 GemmMicrokernelTester()
3565 .mr(3)
3566 .nr(8)
3567 .kr(1)
3568 .sr(4)
3569 .m(3)
3570 .n(8)
3571 .k(k)
3572 .a_stride(7)
3573 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3574 }
3575 }
3576
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)3577 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
3578 for (size_t k = 1; k < 4; k++) {
3579 for (uint32_t n = 1; n <= 8; n++) {
3580 for (uint32_t m = 1; m <= 3; m++) {
3581 GemmMicrokernelTester()
3582 .mr(3)
3583 .nr(8)
3584 .kr(1)
3585 .sr(4)
3586 .m(m)
3587 .n(n)
3588 .k(k)
3589 .iterations(1)
3590 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3591 }
3592 }
3593 }
3594 }
3595
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)3596 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
3597 for (size_t k = 5; k < 8; k++) {
3598 GemmMicrokernelTester()
3599 .mr(3)
3600 .nr(8)
3601 .kr(1)
3602 .sr(4)
3603 .m(3)
3604 .n(8)
3605 .k(k)
3606 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3607 }
3608 }
3609
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)3610 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
3611 for (size_t k = 5; k < 8; k++) {
3612 GemmMicrokernelTester()
3613 .mr(3)
3614 .nr(8)
3615 .kr(1)
3616 .sr(4)
3617 .m(3)
3618 .n(8)
3619 .k(k)
3620 .a_stride(11)
3621 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3622 }
3623 }
3624
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)3625 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
3626 for (size_t k = 5; k < 8; k++) {
3627 for (uint32_t n = 1; n <= 8; n++) {
3628 for (uint32_t m = 1; m <= 3; m++) {
3629 GemmMicrokernelTester()
3630 .mr(3)
3631 .nr(8)
3632 .kr(1)
3633 .sr(4)
3634 .m(m)
3635 .n(n)
3636 .k(k)
3637 .iterations(1)
3638 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3639 }
3640 }
3641 }
3642 }
3643
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4)3644 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
3645 for (size_t k = 8; k <= 40; k += 4) {
3646 GemmMicrokernelTester()
3647 .mr(3)
3648 .nr(8)
3649 .kr(1)
3650 .sr(4)
3651 .m(3)
3652 .n(8)
3653 .k(k)
3654 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3655 }
3656 }
3657
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)3658 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
3659 for (size_t k = 8; k <= 40; k += 4) {
3660 GemmMicrokernelTester()
3661 .mr(3)
3662 .nr(8)
3663 .kr(1)
3664 .sr(4)
3665 .m(3)
3666 .n(8)
3667 .k(k)
3668 .a_stride(43)
3669 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3670 }
3671 }
3672
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)3673 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
3674 for (size_t k = 8; k <= 40; k += 4) {
3675 for (uint32_t n = 1; n <= 8; n++) {
3676 for (uint32_t m = 1; m <= 3; m++) {
3677 GemmMicrokernelTester()
3678 .mr(3)
3679 .nr(8)
3680 .kr(1)
3681 .sr(4)
3682 .m(m)
3683 .n(n)
3684 .k(k)
3685 .iterations(1)
3686 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3687 }
3688 }
3689 }
3690 }
3691
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)3692 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
3693 for (uint32_t n = 9; n < 16; n++) {
3694 for (size_t k = 1; k <= 20; k += 5) {
3695 GemmMicrokernelTester()
3696 .mr(3)
3697 .nr(8)
3698 .kr(1)
3699 .sr(4)
3700 .m(3)
3701 .n(n)
3702 .k(k)
3703 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3704 }
3705 }
3706 }
3707
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)3708 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
3709 for (uint32_t n = 9; n < 16; n++) {
3710 for (size_t k = 1; k <= 20; k += 5) {
3711 GemmMicrokernelTester()
3712 .mr(3)
3713 .nr(8)
3714 .kr(1)
3715 .sr(4)
3716 .m(3)
3717 .n(n)
3718 .k(k)
3719 .cn_stride(11)
3720 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3721 }
3722 }
3723 }
3724
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)3725 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
3726 for (uint32_t n = 9; n < 16; n++) {
3727 for (size_t k = 1; k <= 20; k += 5) {
3728 GemmMicrokernelTester()
3729 .mr(3)
3730 .nr(8)
3731 .kr(1)
3732 .sr(4)
3733 .m(3)
3734 .n(n)
3735 .k(k)
3736 .a_stride(23)
3737 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3738 }
3739 }
3740 }
3741
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)3742 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
3743 for (uint32_t n = 9; n < 16; n++) {
3744 for (size_t k = 1; k <= 20; k += 5) {
3745 for (uint32_t m = 1; m <= 3; m++) {
3746 GemmMicrokernelTester()
3747 .mr(3)
3748 .nr(8)
3749 .kr(1)
3750 .sr(4)
3751 .m(m)
3752 .n(n)
3753 .k(k)
3754 .iterations(1)
3755 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3756 }
3757 }
3758 }
3759 }
3760
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8)3761 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
3762 for (uint32_t n = 16; n <= 24; n += 8) {
3763 for (size_t k = 1; k <= 20; k += 5) {
3764 GemmMicrokernelTester()
3765 .mr(3)
3766 .nr(8)
3767 .kr(1)
3768 .sr(4)
3769 .m(3)
3770 .n(n)
3771 .k(k)
3772 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3773 }
3774 }
3775 }
3776
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)3777 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
3778 for (uint32_t n = 16; n <= 24; n += 8) {
3779 for (size_t k = 1; k <= 20; k += 5) {
3780 GemmMicrokernelTester()
3781 .mr(3)
3782 .nr(8)
3783 .kr(1)
3784 .sr(4)
3785 .m(3)
3786 .n(n)
3787 .k(k)
3788 .cn_stride(11)
3789 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3790 }
3791 }
3792 }
3793
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)3794 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
3795 for (uint32_t n = 16; n <= 24; n += 8) {
3796 for (size_t k = 1; k <= 20; k += 5) {
3797 GemmMicrokernelTester()
3798 .mr(3)
3799 .nr(8)
3800 .kr(1)
3801 .sr(4)
3802 .m(3)
3803 .n(n)
3804 .k(k)
3805 .a_stride(23)
3806 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3807 }
3808 }
3809 }
3810
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)3811 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
3812 for (uint32_t n = 16; n <= 24; n += 8) {
3813 for (size_t k = 1; k <= 20; k += 5) {
3814 for (uint32_t m = 1; m <= 3; m++) {
3815 GemmMicrokernelTester()
3816 .mr(3)
3817 .nr(8)
3818 .kr(1)
3819 .sr(4)
3820 .m(m)
3821 .n(n)
3822 .k(k)
3823 .iterations(1)
3824 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3825 }
3826 }
3827 }
3828 }
3829
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)3830 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
3831 for (size_t k = 1; k <= 20; k += 5) {
3832 for (uint32_t n = 1; n <= 8; n++) {
3833 for (uint32_t m = 1; m <= 3; m++) {
3834 GemmMicrokernelTester()
3835 .mr(3)
3836 .nr(8)
3837 .kr(1)
3838 .sr(4)
3839 .m(m)
3840 .n(n)
3841 .k(k)
3842 .cm_stride(11)
3843 .iterations(1)
3844 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3845 }
3846 }
3847 }
3848 }
3849
TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm)3850 TEST(F32_GEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
3851 GemmMicrokernelTester()
3852 .mr(3)
3853 .nr(8)
3854 .kr(1)
3855 .sr(4)
3856 .m(3)
3857 .n(8)
3858 .k(4)
3859 .cm_stride(11)
3860 .Test(xnn_f32_gemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3861 }
3862 #endif // XNN_ARCH_WASMRELAXEDSIMD
3863
3864
3865 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)3866 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
3867 GemmMicrokernelTester()
3868 .mr(4)
3869 .nr(8)
3870 .kr(1)
3871 .sr(1)
3872 .m(4)
3873 .n(8)
3874 .k(4)
3875 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3876 }
3877
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)3878 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
3879 GemmMicrokernelTester()
3880 .mr(4)
3881 .nr(8)
3882 .kr(1)
3883 .sr(1)
3884 .m(4)
3885 .n(8)
3886 .k(4)
3887 .cn_stride(11)
3888 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3889 }
3890
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)3891 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
3892 GemmMicrokernelTester()
3893 .mr(4)
3894 .nr(8)
3895 .kr(1)
3896 .sr(1)
3897 .m(4)
3898 .n(8)
3899 .k(4)
3900 .a_stride(7)
3901 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3902 }
3903
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)3904 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
3905 for (uint32_t n = 1; n <= 8; n++) {
3906 for (uint32_t m = 1; m <= 4; m++) {
3907 GemmMicrokernelTester()
3908 .mr(4)
3909 .nr(8)
3910 .kr(1)
3911 .sr(1)
3912 .m(m)
3913 .n(n)
3914 .k(4)
3915 .iterations(1)
3916 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3917 }
3918 }
3919 }
3920
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)3921 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
3922 for (uint32_t m = 1; m <= 4; m++) {
3923 GemmMicrokernelTester()
3924 .mr(4)
3925 .nr(8)
3926 .kr(1)
3927 .sr(1)
3928 .m(m)
3929 .n(8)
3930 .k(4)
3931 .iterations(1)
3932 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3933 }
3934 }
3935
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)3936 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
3937 for (uint32_t n = 1; n <= 8; n++) {
3938 GemmMicrokernelTester()
3939 .mr(4)
3940 .nr(8)
3941 .kr(1)
3942 .sr(1)
3943 .m(4)
3944 .n(n)
3945 .k(4)
3946 .iterations(1)
3947 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3948 }
3949 }
3950
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)3951 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
3952 for (size_t k = 1; k < 4; k++) {
3953 GemmMicrokernelTester()
3954 .mr(4)
3955 .nr(8)
3956 .kr(1)
3957 .sr(1)
3958 .m(4)
3959 .n(8)
3960 .k(k)
3961 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3962 }
3963 }
3964
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)3965 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
3966 for (size_t k = 1; k < 4; k++) {
3967 GemmMicrokernelTester()
3968 .mr(4)
3969 .nr(8)
3970 .kr(1)
3971 .sr(1)
3972 .m(4)
3973 .n(8)
3974 .k(k)
3975 .a_stride(7)
3976 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3977 }
3978 }
3979
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)3980 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
3981 for (size_t k = 1; k < 4; k++) {
3982 for (uint32_t n = 1; n <= 8; n++) {
3983 for (uint32_t m = 1; m <= 4; m++) {
3984 GemmMicrokernelTester()
3985 .mr(4)
3986 .nr(8)
3987 .kr(1)
3988 .sr(1)
3989 .m(m)
3990 .n(n)
3991 .k(k)
3992 .iterations(1)
3993 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
3994 }
3995 }
3996 }
3997 }
3998
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)3999 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
4000 for (size_t k = 5; k < 8; k++) {
4001 GemmMicrokernelTester()
4002 .mr(4)
4003 .nr(8)
4004 .kr(1)
4005 .sr(1)
4006 .m(4)
4007 .n(8)
4008 .k(k)
4009 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4010 }
4011 }
4012
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)4013 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
4014 for (size_t k = 5; k < 8; k++) {
4015 GemmMicrokernelTester()
4016 .mr(4)
4017 .nr(8)
4018 .kr(1)
4019 .sr(1)
4020 .m(4)
4021 .n(8)
4022 .k(k)
4023 .a_stride(11)
4024 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4025 }
4026 }
4027
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)4028 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
4029 for (size_t k = 5; k < 8; k++) {
4030 for (uint32_t n = 1; n <= 8; n++) {
4031 for (uint32_t m = 1; m <= 4; m++) {
4032 GemmMicrokernelTester()
4033 .mr(4)
4034 .nr(8)
4035 .kr(1)
4036 .sr(1)
4037 .m(m)
4038 .n(n)
4039 .k(k)
4040 .iterations(1)
4041 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4042 }
4043 }
4044 }
4045 }
4046
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)4047 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
4048 for (size_t k = 8; k <= 40; k += 4) {
4049 GemmMicrokernelTester()
4050 .mr(4)
4051 .nr(8)
4052 .kr(1)
4053 .sr(1)
4054 .m(4)
4055 .n(8)
4056 .k(k)
4057 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4058 }
4059 }
4060
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)4061 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
4062 for (size_t k = 8; k <= 40; k += 4) {
4063 GemmMicrokernelTester()
4064 .mr(4)
4065 .nr(8)
4066 .kr(1)
4067 .sr(1)
4068 .m(4)
4069 .n(8)
4070 .k(k)
4071 .a_stride(43)
4072 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4073 }
4074 }
4075
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)4076 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
4077 for (size_t k = 8; k <= 40; k += 4) {
4078 for (uint32_t n = 1; n <= 8; n++) {
4079 for (uint32_t m = 1; m <= 4; m++) {
4080 GemmMicrokernelTester()
4081 .mr(4)
4082 .nr(8)
4083 .kr(1)
4084 .sr(1)
4085 .m(m)
4086 .n(n)
4087 .k(k)
4088 .iterations(1)
4089 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4090 }
4091 }
4092 }
4093 }
4094
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)4095 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
4096 for (uint32_t n = 9; n < 16; n++) {
4097 for (size_t k = 1; k <= 20; k += 5) {
4098 GemmMicrokernelTester()
4099 .mr(4)
4100 .nr(8)
4101 .kr(1)
4102 .sr(1)
4103 .m(4)
4104 .n(n)
4105 .k(k)
4106 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4107 }
4108 }
4109 }
4110
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)4111 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
4112 for (uint32_t n = 9; n < 16; n++) {
4113 for (size_t k = 1; k <= 20; k += 5) {
4114 GemmMicrokernelTester()
4115 .mr(4)
4116 .nr(8)
4117 .kr(1)
4118 .sr(1)
4119 .m(4)
4120 .n(n)
4121 .k(k)
4122 .cn_stride(11)
4123 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4124 }
4125 }
4126 }
4127
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)4128 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
4129 for (uint32_t n = 9; n < 16; n++) {
4130 for (size_t k = 1; k <= 20; k += 5) {
4131 GemmMicrokernelTester()
4132 .mr(4)
4133 .nr(8)
4134 .kr(1)
4135 .sr(1)
4136 .m(4)
4137 .n(n)
4138 .k(k)
4139 .a_stride(23)
4140 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4141 }
4142 }
4143 }
4144
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)4145 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
4146 for (uint32_t n = 9; n < 16; n++) {
4147 for (size_t k = 1; k <= 20; k += 5) {
4148 for (uint32_t m = 1; m <= 4; m++) {
4149 GemmMicrokernelTester()
4150 .mr(4)
4151 .nr(8)
4152 .kr(1)
4153 .sr(1)
4154 .m(m)
4155 .n(n)
4156 .k(k)
4157 .iterations(1)
4158 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4159 }
4160 }
4161 }
4162 }
4163
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)4164 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
4165 for (uint32_t n = 16; n <= 24; n += 8) {
4166 for (size_t k = 1; k <= 20; k += 5) {
4167 GemmMicrokernelTester()
4168 .mr(4)
4169 .nr(8)
4170 .kr(1)
4171 .sr(1)
4172 .m(4)
4173 .n(n)
4174 .k(k)
4175 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4176 }
4177 }
4178 }
4179
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)4180 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
4181 for (uint32_t n = 16; n <= 24; n += 8) {
4182 for (size_t k = 1; k <= 20; k += 5) {
4183 GemmMicrokernelTester()
4184 .mr(4)
4185 .nr(8)
4186 .kr(1)
4187 .sr(1)
4188 .m(4)
4189 .n(n)
4190 .k(k)
4191 .cn_stride(11)
4192 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4193 }
4194 }
4195 }
4196
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)4197 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
4198 for (uint32_t n = 16; n <= 24; n += 8) {
4199 for (size_t k = 1; k <= 20; k += 5) {
4200 GemmMicrokernelTester()
4201 .mr(4)
4202 .nr(8)
4203 .kr(1)
4204 .sr(1)
4205 .m(4)
4206 .n(n)
4207 .k(k)
4208 .a_stride(23)
4209 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4210 }
4211 }
4212 }
4213
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)4214 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
4215 for (uint32_t n = 16; n <= 24; n += 8) {
4216 for (size_t k = 1; k <= 20; k += 5) {
4217 for (uint32_t m = 1; m <= 4; m++) {
4218 GemmMicrokernelTester()
4219 .mr(4)
4220 .nr(8)
4221 .kr(1)
4222 .sr(1)
4223 .m(m)
4224 .n(n)
4225 .k(k)
4226 .iterations(1)
4227 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4228 }
4229 }
4230 }
4231 }
4232
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)4233 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
4234 for (size_t k = 1; k <= 20; k += 5) {
4235 for (uint32_t n = 1; n <= 8; n++) {
4236 for (uint32_t m = 1; m <= 4; m++) {
4237 GemmMicrokernelTester()
4238 .mr(4)
4239 .nr(8)
4240 .kr(1)
4241 .sr(1)
4242 .m(m)
4243 .n(n)
4244 .k(k)
4245 .cm_stride(11)
4246 .iterations(1)
4247 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4248 }
4249 }
4250 }
4251 }
4252
TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)4253 TEST(F32_GEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
4254 GemmMicrokernelTester()
4255 .mr(4)
4256 .nr(8)
4257 .kr(1)
4258 .sr(1)
4259 .m(4)
4260 .n(8)
4261 .k(4)
4262 .cm_stride(11)
4263 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4264 }
4265 #endif // XNN_ARCH_WASMRELAXEDSIMD
4266
4267
4268 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)4269 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
4270 GemmMicrokernelTester()
4271 .mr(4)
4272 .nr(8)
4273 .kr(1)
4274 .sr(4)
4275 .m(4)
4276 .n(8)
4277 .k(4)
4278 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4279 }
4280
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,strided_cn)4281 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
4282 GemmMicrokernelTester()
4283 .mr(4)
4284 .nr(8)
4285 .kr(1)
4286 .sr(4)
4287 .m(4)
4288 .n(8)
4289 .k(4)
4290 .cn_stride(11)
4291 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4292 }
4293
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)4294 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
4295 GemmMicrokernelTester()
4296 .mr(4)
4297 .nr(8)
4298 .kr(1)
4299 .sr(4)
4300 .m(4)
4301 .n(8)
4302 .k(4)
4303 .a_stride(7)
4304 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4305 }
4306
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4307 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4308 for (uint32_t n = 1; n <= 8; n++) {
4309 for (uint32_t m = 1; m <= 4; m++) {
4310 GemmMicrokernelTester()
4311 .mr(4)
4312 .nr(8)
4313 .kr(1)
4314 .sr(4)
4315 .m(m)
4316 .n(n)
4317 .k(4)
4318 .iterations(1)
4319 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4320 }
4321 }
4322 }
4323
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4324 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4325 for (uint32_t m = 1; m <= 4; m++) {
4326 GemmMicrokernelTester()
4327 .mr(4)
4328 .nr(8)
4329 .kr(1)
4330 .sr(4)
4331 .m(m)
4332 .n(8)
4333 .k(4)
4334 .iterations(1)
4335 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4336 }
4337 }
4338
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4339 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4340 for (uint32_t n = 1; n <= 8; n++) {
4341 GemmMicrokernelTester()
4342 .mr(4)
4343 .nr(8)
4344 .kr(1)
4345 .sr(4)
4346 .m(4)
4347 .n(n)
4348 .k(4)
4349 .iterations(1)
4350 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4351 }
4352 }
4353
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)4354 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4355 for (size_t k = 1; k < 4; k++) {
4356 GemmMicrokernelTester()
4357 .mr(4)
4358 .nr(8)
4359 .kr(1)
4360 .sr(4)
4361 .m(4)
4362 .n(8)
4363 .k(k)
4364 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4365 }
4366 }
4367
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)4368 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
4369 for (size_t k = 1; k < 4; k++) {
4370 GemmMicrokernelTester()
4371 .mr(4)
4372 .nr(8)
4373 .kr(1)
4374 .sr(4)
4375 .m(4)
4376 .n(8)
4377 .k(k)
4378 .a_stride(7)
4379 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4380 }
4381 }
4382
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4383 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4384 for (size_t k = 1; k < 4; k++) {
4385 for (uint32_t n = 1; n <= 8; n++) {
4386 for (uint32_t m = 1; m <= 4; m++) {
4387 GemmMicrokernelTester()
4388 .mr(4)
4389 .nr(8)
4390 .kr(1)
4391 .sr(4)
4392 .m(m)
4393 .n(n)
4394 .k(k)
4395 .iterations(1)
4396 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4397 }
4398 }
4399 }
4400 }
4401
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)4402 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4403 for (size_t k = 5; k < 8; k++) {
4404 GemmMicrokernelTester()
4405 .mr(4)
4406 .nr(8)
4407 .kr(1)
4408 .sr(4)
4409 .m(4)
4410 .n(8)
4411 .k(k)
4412 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4413 }
4414 }
4415
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)4416 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
4417 for (size_t k = 5; k < 8; k++) {
4418 GemmMicrokernelTester()
4419 .mr(4)
4420 .nr(8)
4421 .kr(1)
4422 .sr(4)
4423 .m(4)
4424 .n(8)
4425 .k(k)
4426 .a_stride(11)
4427 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4428 }
4429 }
4430
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4431 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4432 for (size_t k = 5; k < 8; k++) {
4433 for (uint32_t n = 1; n <= 8; n++) {
4434 for (uint32_t m = 1; m <= 4; m++) {
4435 GemmMicrokernelTester()
4436 .mr(4)
4437 .nr(8)
4438 .kr(1)
4439 .sr(4)
4440 .m(m)
4441 .n(n)
4442 .k(k)
4443 .iterations(1)
4444 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4445 }
4446 }
4447 }
4448 }
4449
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4)4450 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
4451 for (size_t k = 8; k <= 40; k += 4) {
4452 GemmMicrokernelTester()
4453 .mr(4)
4454 .nr(8)
4455 .kr(1)
4456 .sr(4)
4457 .m(4)
4458 .n(8)
4459 .k(k)
4460 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4461 }
4462 }
4463
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)4464 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
4465 for (size_t k = 8; k <= 40; k += 4) {
4466 GemmMicrokernelTester()
4467 .mr(4)
4468 .nr(8)
4469 .kr(1)
4470 .sr(4)
4471 .m(4)
4472 .n(8)
4473 .k(k)
4474 .a_stride(43)
4475 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4476 }
4477 }
4478
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4479 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4480 for (size_t k = 8; k <= 40; k += 4) {
4481 for (uint32_t n = 1; n <= 8; n++) {
4482 for (uint32_t m = 1; m <= 4; m++) {
4483 GemmMicrokernelTester()
4484 .mr(4)
4485 .nr(8)
4486 .kr(1)
4487 .sr(4)
4488 .m(m)
4489 .n(n)
4490 .k(k)
4491 .iterations(1)
4492 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4493 }
4494 }
4495 }
4496 }
4497
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)4498 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
4499 for (uint32_t n = 9; n < 16; n++) {
4500 for (size_t k = 1; k <= 20; k += 5) {
4501 GemmMicrokernelTester()
4502 .mr(4)
4503 .nr(8)
4504 .kr(1)
4505 .sr(4)
4506 .m(4)
4507 .n(n)
4508 .k(k)
4509 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4510 }
4511 }
4512 }
4513
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)4514 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
4515 for (uint32_t n = 9; n < 16; n++) {
4516 for (size_t k = 1; k <= 20; k += 5) {
4517 GemmMicrokernelTester()
4518 .mr(4)
4519 .nr(8)
4520 .kr(1)
4521 .sr(4)
4522 .m(4)
4523 .n(n)
4524 .k(k)
4525 .cn_stride(11)
4526 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4527 }
4528 }
4529 }
4530
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)4531 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
4532 for (uint32_t n = 9; n < 16; n++) {
4533 for (size_t k = 1; k <= 20; k += 5) {
4534 GemmMicrokernelTester()
4535 .mr(4)
4536 .nr(8)
4537 .kr(1)
4538 .sr(4)
4539 .m(4)
4540 .n(n)
4541 .k(k)
4542 .a_stride(23)
4543 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4544 }
4545 }
4546 }
4547
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)4548 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
4549 for (uint32_t n = 9; n < 16; n++) {
4550 for (size_t k = 1; k <= 20; k += 5) {
4551 for (uint32_t m = 1; m <= 4; m++) {
4552 GemmMicrokernelTester()
4553 .mr(4)
4554 .nr(8)
4555 .kr(1)
4556 .sr(4)
4557 .m(m)
4558 .n(n)
4559 .k(k)
4560 .iterations(1)
4561 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4562 }
4563 }
4564 }
4565 }
4566
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8)4567 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
4568 for (uint32_t n = 16; n <= 24; n += 8) {
4569 for (size_t k = 1; k <= 20; k += 5) {
4570 GemmMicrokernelTester()
4571 .mr(4)
4572 .nr(8)
4573 .kr(1)
4574 .sr(4)
4575 .m(4)
4576 .n(n)
4577 .k(k)
4578 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4579 }
4580 }
4581 }
4582
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)4583 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
4584 for (uint32_t n = 16; n <= 24; n += 8) {
4585 for (size_t k = 1; k <= 20; k += 5) {
4586 GemmMicrokernelTester()
4587 .mr(4)
4588 .nr(8)
4589 .kr(1)
4590 .sr(4)
4591 .m(4)
4592 .n(n)
4593 .k(k)
4594 .cn_stride(11)
4595 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4596 }
4597 }
4598 }
4599
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)4600 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
4601 for (uint32_t n = 16; n <= 24; n += 8) {
4602 for (size_t k = 1; k <= 20; k += 5) {
4603 GemmMicrokernelTester()
4604 .mr(4)
4605 .nr(8)
4606 .kr(1)
4607 .sr(4)
4608 .m(4)
4609 .n(n)
4610 .k(k)
4611 .a_stride(23)
4612 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4613 }
4614 }
4615 }
4616
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)4617 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
4618 for (uint32_t n = 16; n <= 24; n += 8) {
4619 for (size_t k = 1; k <= 20; k += 5) {
4620 for (uint32_t m = 1; m <= 4; m++) {
4621 GemmMicrokernelTester()
4622 .mr(4)
4623 .nr(8)
4624 .kr(1)
4625 .sr(4)
4626 .m(m)
4627 .n(n)
4628 .k(k)
4629 .iterations(1)
4630 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4631 }
4632 }
4633 }
4634 }
4635
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4636 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4637 for (size_t k = 1; k <= 20; k += 5) {
4638 for (uint32_t n = 1; n <= 8; n++) {
4639 for (uint32_t m = 1; m <= 4; m++) {
4640 GemmMicrokernelTester()
4641 .mr(4)
4642 .nr(8)
4643 .kr(1)
4644 .sr(4)
4645 .m(m)
4646 .n(n)
4647 .k(k)
4648 .cm_stride(11)
4649 .iterations(1)
4650 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4651 }
4652 }
4653 }
4654 }
4655
TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm)4656 TEST(F32_GEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
4657 GemmMicrokernelTester()
4658 .mr(4)
4659 .nr(8)
4660 .kr(1)
4661 .sr(4)
4662 .m(4)
4663 .n(8)
4664 .k(4)
4665 .cm_stride(11)
4666 .Test(xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4667 }
4668 #endif // XNN_ARCH_WASMRELAXEDSIMD
4669
4670
4671 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)4672 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
4673 GemmMicrokernelTester()
4674 .mr(5)
4675 .nr(8)
4676 .kr(1)
4677 .sr(1)
4678 .m(5)
4679 .n(8)
4680 .k(4)
4681 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4682 }
4683
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)4684 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
4685 GemmMicrokernelTester()
4686 .mr(5)
4687 .nr(8)
4688 .kr(1)
4689 .sr(1)
4690 .m(5)
4691 .n(8)
4692 .k(4)
4693 .cn_stride(11)
4694 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4695 }
4696
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)4697 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
4698 GemmMicrokernelTester()
4699 .mr(5)
4700 .nr(8)
4701 .kr(1)
4702 .sr(1)
4703 .m(5)
4704 .n(8)
4705 .k(4)
4706 .a_stride(7)
4707 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4708 }
4709
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)4710 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
4711 for (uint32_t n = 1; n <= 8; n++) {
4712 for (uint32_t m = 1; m <= 5; m++) {
4713 GemmMicrokernelTester()
4714 .mr(5)
4715 .nr(8)
4716 .kr(1)
4717 .sr(1)
4718 .m(m)
4719 .n(n)
4720 .k(4)
4721 .iterations(1)
4722 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4723 }
4724 }
4725 }
4726
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)4727 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
4728 for (uint32_t m = 1; m <= 5; m++) {
4729 GemmMicrokernelTester()
4730 .mr(5)
4731 .nr(8)
4732 .kr(1)
4733 .sr(1)
4734 .m(m)
4735 .n(8)
4736 .k(4)
4737 .iterations(1)
4738 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4739 }
4740 }
4741
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)4742 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
4743 for (uint32_t n = 1; n <= 8; n++) {
4744 GemmMicrokernelTester()
4745 .mr(5)
4746 .nr(8)
4747 .kr(1)
4748 .sr(1)
4749 .m(5)
4750 .n(n)
4751 .k(4)
4752 .iterations(1)
4753 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4754 }
4755 }
4756
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)4757 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
4758 for (size_t k = 1; k < 4; k++) {
4759 GemmMicrokernelTester()
4760 .mr(5)
4761 .nr(8)
4762 .kr(1)
4763 .sr(1)
4764 .m(5)
4765 .n(8)
4766 .k(k)
4767 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4768 }
4769 }
4770
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)4771 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
4772 for (size_t k = 1; k < 4; k++) {
4773 GemmMicrokernelTester()
4774 .mr(5)
4775 .nr(8)
4776 .kr(1)
4777 .sr(1)
4778 .m(5)
4779 .n(8)
4780 .k(k)
4781 .a_stride(7)
4782 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4783 }
4784 }
4785
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)4786 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
4787 for (size_t k = 1; k < 4; k++) {
4788 for (uint32_t n = 1; n <= 8; n++) {
4789 for (uint32_t m = 1; m <= 5; m++) {
4790 GemmMicrokernelTester()
4791 .mr(5)
4792 .nr(8)
4793 .kr(1)
4794 .sr(1)
4795 .m(m)
4796 .n(n)
4797 .k(k)
4798 .iterations(1)
4799 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4800 }
4801 }
4802 }
4803 }
4804
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)4805 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
4806 for (size_t k = 5; k < 8; k++) {
4807 GemmMicrokernelTester()
4808 .mr(5)
4809 .nr(8)
4810 .kr(1)
4811 .sr(1)
4812 .m(5)
4813 .n(8)
4814 .k(k)
4815 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4816 }
4817 }
4818
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)4819 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
4820 for (size_t k = 5; k < 8; k++) {
4821 GemmMicrokernelTester()
4822 .mr(5)
4823 .nr(8)
4824 .kr(1)
4825 .sr(1)
4826 .m(5)
4827 .n(8)
4828 .k(k)
4829 .a_stride(11)
4830 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4831 }
4832 }
4833
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)4834 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
4835 for (size_t k = 5; k < 8; k++) {
4836 for (uint32_t n = 1; n <= 8; n++) {
4837 for (uint32_t m = 1; m <= 5; m++) {
4838 GemmMicrokernelTester()
4839 .mr(5)
4840 .nr(8)
4841 .kr(1)
4842 .sr(1)
4843 .m(m)
4844 .n(n)
4845 .k(k)
4846 .iterations(1)
4847 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4848 }
4849 }
4850 }
4851 }
4852
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)4853 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
4854 for (size_t k = 8; k <= 40; k += 4) {
4855 GemmMicrokernelTester()
4856 .mr(5)
4857 .nr(8)
4858 .kr(1)
4859 .sr(1)
4860 .m(5)
4861 .n(8)
4862 .k(k)
4863 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4864 }
4865 }
4866
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)4867 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
4868 for (size_t k = 8; k <= 40; k += 4) {
4869 GemmMicrokernelTester()
4870 .mr(5)
4871 .nr(8)
4872 .kr(1)
4873 .sr(1)
4874 .m(5)
4875 .n(8)
4876 .k(k)
4877 .a_stride(43)
4878 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4879 }
4880 }
4881
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)4882 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
4883 for (size_t k = 8; k <= 40; k += 4) {
4884 for (uint32_t n = 1; n <= 8; n++) {
4885 for (uint32_t m = 1; m <= 5; m++) {
4886 GemmMicrokernelTester()
4887 .mr(5)
4888 .nr(8)
4889 .kr(1)
4890 .sr(1)
4891 .m(m)
4892 .n(n)
4893 .k(k)
4894 .iterations(1)
4895 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4896 }
4897 }
4898 }
4899 }
4900
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)4901 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
4902 for (uint32_t n = 9; n < 16; n++) {
4903 for (size_t k = 1; k <= 20; k += 5) {
4904 GemmMicrokernelTester()
4905 .mr(5)
4906 .nr(8)
4907 .kr(1)
4908 .sr(1)
4909 .m(5)
4910 .n(n)
4911 .k(k)
4912 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4913 }
4914 }
4915 }
4916
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)4917 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
4918 for (uint32_t n = 9; n < 16; n++) {
4919 for (size_t k = 1; k <= 20; k += 5) {
4920 GemmMicrokernelTester()
4921 .mr(5)
4922 .nr(8)
4923 .kr(1)
4924 .sr(1)
4925 .m(5)
4926 .n(n)
4927 .k(k)
4928 .cn_stride(11)
4929 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4930 }
4931 }
4932 }
4933
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)4934 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
4935 for (uint32_t n = 9; n < 16; n++) {
4936 for (size_t k = 1; k <= 20; k += 5) {
4937 GemmMicrokernelTester()
4938 .mr(5)
4939 .nr(8)
4940 .kr(1)
4941 .sr(1)
4942 .m(5)
4943 .n(n)
4944 .k(k)
4945 .a_stride(23)
4946 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4947 }
4948 }
4949 }
4950
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)4951 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
4952 for (uint32_t n = 9; n < 16; n++) {
4953 for (size_t k = 1; k <= 20; k += 5) {
4954 for (uint32_t m = 1; m <= 5; m++) {
4955 GemmMicrokernelTester()
4956 .mr(5)
4957 .nr(8)
4958 .kr(1)
4959 .sr(1)
4960 .m(m)
4961 .n(n)
4962 .k(k)
4963 .iterations(1)
4964 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4965 }
4966 }
4967 }
4968 }
4969
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)4970 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
4971 for (uint32_t n = 16; n <= 24; n += 8) {
4972 for (size_t k = 1; k <= 20; k += 5) {
4973 GemmMicrokernelTester()
4974 .mr(5)
4975 .nr(8)
4976 .kr(1)
4977 .sr(1)
4978 .m(5)
4979 .n(n)
4980 .k(k)
4981 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4982 }
4983 }
4984 }
4985
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)4986 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
4987 for (uint32_t n = 16; n <= 24; n += 8) {
4988 for (size_t k = 1; k <= 20; k += 5) {
4989 GemmMicrokernelTester()
4990 .mr(5)
4991 .nr(8)
4992 .kr(1)
4993 .sr(1)
4994 .m(5)
4995 .n(n)
4996 .k(k)
4997 .cn_stride(11)
4998 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
4999 }
5000 }
5001 }
5002
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)5003 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
5004 for (uint32_t n = 16; n <= 24; n += 8) {
5005 for (size_t k = 1; k <= 20; k += 5) {
5006 GemmMicrokernelTester()
5007 .mr(5)
5008 .nr(8)
5009 .kr(1)
5010 .sr(1)
5011 .m(5)
5012 .n(n)
5013 .k(k)
5014 .a_stride(23)
5015 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5016 }
5017 }
5018 }
5019
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5020 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5021 for (uint32_t n = 16; n <= 24; n += 8) {
5022 for (size_t k = 1; k <= 20; k += 5) {
5023 for (uint32_t m = 1; m <= 5; m++) {
5024 GemmMicrokernelTester()
5025 .mr(5)
5026 .nr(8)
5027 .kr(1)
5028 .sr(1)
5029 .m(m)
5030 .n(n)
5031 .k(k)
5032 .iterations(1)
5033 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5034 }
5035 }
5036 }
5037 }
5038
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5039 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5040 for (size_t k = 1; k <= 20; k += 5) {
5041 for (uint32_t n = 1; n <= 8; n++) {
5042 for (uint32_t m = 1; m <= 5; m++) {
5043 GemmMicrokernelTester()
5044 .mr(5)
5045 .nr(8)
5046 .kr(1)
5047 .sr(1)
5048 .m(m)
5049 .n(n)
5050 .k(k)
5051 .cm_stride(11)
5052 .iterations(1)
5053 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5054 }
5055 }
5056 }
5057 }
5058
TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)5059 TEST(F32_GEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
5060 GemmMicrokernelTester()
5061 .mr(5)
5062 .nr(8)
5063 .kr(1)
5064 .sr(1)
5065 .m(5)
5066 .n(8)
5067 .k(4)
5068 .cm_stride(11)
5069 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5070 }
5071 #endif // XNN_ARCH_WASMRELAXEDSIMD
5072
5073
5074 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)5075 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
5076 GemmMicrokernelTester()
5077 .mr(6)
5078 .nr(8)
5079 .kr(1)
5080 .sr(1)
5081 .m(6)
5082 .n(8)
5083 .k(4)
5084 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5085 }
5086
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)5087 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
5088 GemmMicrokernelTester()
5089 .mr(6)
5090 .nr(8)
5091 .kr(1)
5092 .sr(1)
5093 .m(6)
5094 .n(8)
5095 .k(4)
5096 .cn_stride(11)
5097 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5098 }
5099
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)5100 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
5101 GemmMicrokernelTester()
5102 .mr(6)
5103 .nr(8)
5104 .kr(1)
5105 .sr(1)
5106 .m(6)
5107 .n(8)
5108 .k(4)
5109 .a_stride(7)
5110 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5111 }
5112
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)5113 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
5114 for (uint32_t n = 1; n <= 8; n++) {
5115 for (uint32_t m = 1; m <= 6; m++) {
5116 GemmMicrokernelTester()
5117 .mr(6)
5118 .nr(8)
5119 .kr(1)
5120 .sr(1)
5121 .m(m)
5122 .n(n)
5123 .k(4)
5124 .iterations(1)
5125 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5126 }
5127 }
5128 }
5129
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)5130 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
5131 for (uint32_t m = 1; m <= 6; m++) {
5132 GemmMicrokernelTester()
5133 .mr(6)
5134 .nr(8)
5135 .kr(1)
5136 .sr(1)
5137 .m(m)
5138 .n(8)
5139 .k(4)
5140 .iterations(1)
5141 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5142 }
5143 }
5144
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)5145 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
5146 for (uint32_t n = 1; n <= 8; n++) {
5147 GemmMicrokernelTester()
5148 .mr(6)
5149 .nr(8)
5150 .kr(1)
5151 .sr(1)
5152 .m(6)
5153 .n(n)
5154 .k(4)
5155 .iterations(1)
5156 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5157 }
5158 }
5159
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)5160 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
5161 for (size_t k = 1; k < 4; k++) {
5162 GemmMicrokernelTester()
5163 .mr(6)
5164 .nr(8)
5165 .kr(1)
5166 .sr(1)
5167 .m(6)
5168 .n(8)
5169 .k(k)
5170 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5171 }
5172 }
5173
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)5174 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
5175 for (size_t k = 1; k < 4; k++) {
5176 GemmMicrokernelTester()
5177 .mr(6)
5178 .nr(8)
5179 .kr(1)
5180 .sr(1)
5181 .m(6)
5182 .n(8)
5183 .k(k)
5184 .a_stride(7)
5185 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5186 }
5187 }
5188
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)5189 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
5190 for (size_t k = 1; k < 4; k++) {
5191 for (uint32_t n = 1; n <= 8; n++) {
5192 for (uint32_t m = 1; m <= 6; m++) {
5193 GemmMicrokernelTester()
5194 .mr(6)
5195 .nr(8)
5196 .kr(1)
5197 .sr(1)
5198 .m(m)
5199 .n(n)
5200 .k(k)
5201 .iterations(1)
5202 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5203 }
5204 }
5205 }
5206 }
5207
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)5208 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
5209 for (size_t k = 5; k < 8; k++) {
5210 GemmMicrokernelTester()
5211 .mr(6)
5212 .nr(8)
5213 .kr(1)
5214 .sr(1)
5215 .m(6)
5216 .n(8)
5217 .k(k)
5218 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5219 }
5220 }
5221
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)5222 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
5223 for (size_t k = 5; k < 8; k++) {
5224 GemmMicrokernelTester()
5225 .mr(6)
5226 .nr(8)
5227 .kr(1)
5228 .sr(1)
5229 .m(6)
5230 .n(8)
5231 .k(k)
5232 .a_stride(11)
5233 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5234 }
5235 }
5236
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)5237 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
5238 for (size_t k = 5; k < 8; k++) {
5239 for (uint32_t n = 1; n <= 8; n++) {
5240 for (uint32_t m = 1; m <= 6; m++) {
5241 GemmMicrokernelTester()
5242 .mr(6)
5243 .nr(8)
5244 .kr(1)
5245 .sr(1)
5246 .m(m)
5247 .n(n)
5248 .k(k)
5249 .iterations(1)
5250 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5251 }
5252 }
5253 }
5254 }
5255
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)5256 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
5257 for (size_t k = 8; k <= 40; k += 4) {
5258 GemmMicrokernelTester()
5259 .mr(6)
5260 .nr(8)
5261 .kr(1)
5262 .sr(1)
5263 .m(6)
5264 .n(8)
5265 .k(k)
5266 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5267 }
5268 }
5269
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)5270 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
5271 for (size_t k = 8; k <= 40; k += 4) {
5272 GemmMicrokernelTester()
5273 .mr(6)
5274 .nr(8)
5275 .kr(1)
5276 .sr(1)
5277 .m(6)
5278 .n(8)
5279 .k(k)
5280 .a_stride(43)
5281 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5282 }
5283 }
5284
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)5285 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
5286 for (size_t k = 8; k <= 40; k += 4) {
5287 for (uint32_t n = 1; n <= 8; n++) {
5288 for (uint32_t m = 1; m <= 6; m++) {
5289 GemmMicrokernelTester()
5290 .mr(6)
5291 .nr(8)
5292 .kr(1)
5293 .sr(1)
5294 .m(m)
5295 .n(n)
5296 .k(k)
5297 .iterations(1)
5298 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5299 }
5300 }
5301 }
5302 }
5303
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)5304 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
5305 for (uint32_t n = 9; n < 16; n++) {
5306 for (size_t k = 1; k <= 20; k += 5) {
5307 GemmMicrokernelTester()
5308 .mr(6)
5309 .nr(8)
5310 .kr(1)
5311 .sr(1)
5312 .m(6)
5313 .n(n)
5314 .k(k)
5315 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5316 }
5317 }
5318 }
5319
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)5320 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
5321 for (uint32_t n = 9; n < 16; n++) {
5322 for (size_t k = 1; k <= 20; k += 5) {
5323 GemmMicrokernelTester()
5324 .mr(6)
5325 .nr(8)
5326 .kr(1)
5327 .sr(1)
5328 .m(6)
5329 .n(n)
5330 .k(k)
5331 .cn_stride(11)
5332 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5333 }
5334 }
5335 }
5336
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)5337 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
5338 for (uint32_t n = 9; n < 16; n++) {
5339 for (size_t k = 1; k <= 20; k += 5) {
5340 GemmMicrokernelTester()
5341 .mr(6)
5342 .nr(8)
5343 .kr(1)
5344 .sr(1)
5345 .m(6)
5346 .n(n)
5347 .k(k)
5348 .a_stride(23)
5349 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5350 }
5351 }
5352 }
5353
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)5354 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
5355 for (uint32_t n = 9; n < 16; n++) {
5356 for (size_t k = 1; k <= 20; k += 5) {
5357 for (uint32_t m = 1; m <= 6; m++) {
5358 GemmMicrokernelTester()
5359 .mr(6)
5360 .nr(8)
5361 .kr(1)
5362 .sr(1)
5363 .m(m)
5364 .n(n)
5365 .k(k)
5366 .iterations(1)
5367 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5368 }
5369 }
5370 }
5371 }
5372
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)5373 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
5374 for (uint32_t n = 16; n <= 24; n += 8) {
5375 for (size_t k = 1; k <= 20; k += 5) {
5376 GemmMicrokernelTester()
5377 .mr(6)
5378 .nr(8)
5379 .kr(1)
5380 .sr(1)
5381 .m(6)
5382 .n(n)
5383 .k(k)
5384 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5385 }
5386 }
5387 }
5388
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)5389 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
5390 for (uint32_t n = 16; n <= 24; n += 8) {
5391 for (size_t k = 1; k <= 20; k += 5) {
5392 GemmMicrokernelTester()
5393 .mr(6)
5394 .nr(8)
5395 .kr(1)
5396 .sr(1)
5397 .m(6)
5398 .n(n)
5399 .k(k)
5400 .cn_stride(11)
5401 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5402 }
5403 }
5404 }
5405
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)5406 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
5407 for (uint32_t n = 16; n <= 24; n += 8) {
5408 for (size_t k = 1; k <= 20; k += 5) {
5409 GemmMicrokernelTester()
5410 .mr(6)
5411 .nr(8)
5412 .kr(1)
5413 .sr(1)
5414 .m(6)
5415 .n(n)
5416 .k(k)
5417 .a_stride(23)
5418 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5419 }
5420 }
5421 }
5422
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5423 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5424 for (uint32_t n = 16; n <= 24; n += 8) {
5425 for (size_t k = 1; k <= 20; k += 5) {
5426 for (uint32_t m = 1; m <= 6; m++) {
5427 GemmMicrokernelTester()
5428 .mr(6)
5429 .nr(8)
5430 .kr(1)
5431 .sr(1)
5432 .m(m)
5433 .n(n)
5434 .k(k)
5435 .iterations(1)
5436 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5437 }
5438 }
5439 }
5440 }
5441
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5442 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5443 for (size_t k = 1; k <= 20; k += 5) {
5444 for (uint32_t n = 1; n <= 8; n++) {
5445 for (uint32_t m = 1; m <= 6; m++) {
5446 GemmMicrokernelTester()
5447 .mr(6)
5448 .nr(8)
5449 .kr(1)
5450 .sr(1)
5451 .m(m)
5452 .n(n)
5453 .k(k)
5454 .cm_stride(11)
5455 .iterations(1)
5456 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5457 }
5458 }
5459 }
5460 }
5461
TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)5462 TEST(F32_GEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
5463 GemmMicrokernelTester()
5464 .mr(6)
5465 .nr(8)
5466 .kr(1)
5467 .sr(1)
5468 .m(6)
5469 .n(8)
5470 .k(4)
5471 .cm_stride(11)
5472 .Test(xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
5473 }
5474 #endif // XNN_ARCH_WASMRELAXEDSIMD
5475
5476
5477 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1)5478 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1) {
5479 GemmMicrokernelTester()
5480 .mr(1)
5481 .nr(4)
5482 .kr(1)
5483 .sr(1)
5484 .m(1)
5485 .n(4)
5486 .k(1)
5487 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5488 }
5489
TEST(F32_GEMM_RELU_1X4__WASM,strided_cn)5490 TEST(F32_GEMM_RELU_1X4__WASM, strided_cn) {
5491 GemmMicrokernelTester()
5492 .mr(1)
5493 .nr(4)
5494 .kr(1)
5495 .sr(1)
5496 .m(1)
5497 .n(4)
5498 .k(1)
5499 .cn_stride(7)
5500 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5501 }
5502
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_strided_a)5503 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_strided_a) {
5504 GemmMicrokernelTester()
5505 .mr(1)
5506 .nr(4)
5507 .kr(1)
5508 .sr(1)
5509 .m(1)
5510 .n(4)
5511 .k(1)
5512 .a_stride(3)
5513 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5514 }
5515
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_subtile)5516 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_subtile) {
5517 for (uint32_t n = 1; n <= 4; n++) {
5518 for (uint32_t m = 1; m <= 1; m++) {
5519 GemmMicrokernelTester()
5520 .mr(1)
5521 .nr(4)
5522 .kr(1)
5523 .sr(1)
5524 .m(m)
5525 .n(n)
5526 .k(1)
5527 .iterations(1)
5528 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5529 }
5530 }
5531 }
5532
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_subtile_m)5533 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_subtile_m) {
5534 for (uint32_t m = 1; m <= 1; m++) {
5535 GemmMicrokernelTester()
5536 .mr(1)
5537 .nr(4)
5538 .kr(1)
5539 .sr(1)
5540 .m(m)
5541 .n(4)
5542 .k(1)
5543 .iterations(1)
5544 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5545 }
5546 }
5547
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_subtile_n)5548 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_subtile_n) {
5549 for (uint32_t n = 1; n <= 4; n++) {
5550 GemmMicrokernelTester()
5551 .mr(1)
5552 .nr(4)
5553 .kr(1)
5554 .sr(1)
5555 .m(1)
5556 .n(n)
5557 .k(1)
5558 .iterations(1)
5559 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5560 }
5561 }
5562
TEST(F32_GEMM_RELU_1X4__WASM,k_gt_1)5563 TEST(F32_GEMM_RELU_1X4__WASM, k_gt_1) {
5564 for (size_t k = 2; k < 10; k++) {
5565 GemmMicrokernelTester()
5566 .mr(1)
5567 .nr(4)
5568 .kr(1)
5569 .sr(1)
5570 .m(1)
5571 .n(4)
5572 .k(k)
5573 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5574 }
5575 }
5576
TEST(F32_GEMM_RELU_1X4__WASM,k_gt_1_strided_a)5577 TEST(F32_GEMM_RELU_1X4__WASM, k_gt_1_strided_a) {
5578 for (size_t k = 2; k < 10; k++) {
5579 GemmMicrokernelTester()
5580 .mr(1)
5581 .nr(4)
5582 .kr(1)
5583 .sr(1)
5584 .m(1)
5585 .n(4)
5586 .k(k)
5587 .a_stride(11)
5588 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5589 }
5590 }
5591
TEST(F32_GEMM_RELU_1X4__WASM,k_gt_1_subtile)5592 TEST(F32_GEMM_RELU_1X4__WASM, k_gt_1_subtile) {
5593 for (size_t k = 2; k < 10; k++) {
5594 for (uint32_t n = 1; n <= 4; n++) {
5595 for (uint32_t m = 1; m <= 1; m++) {
5596 GemmMicrokernelTester()
5597 .mr(1)
5598 .nr(4)
5599 .kr(1)
5600 .sr(1)
5601 .m(m)
5602 .n(n)
5603 .k(k)
5604 .iterations(1)
5605 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5606 }
5607 }
5608 }
5609 }
5610
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4)5611 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4) {
5612 for (uint32_t n = 5; n < 8; n++) {
5613 for (size_t k = 1; k <= 5; k += 2) {
5614 GemmMicrokernelTester()
5615 .mr(1)
5616 .nr(4)
5617 .kr(1)
5618 .sr(1)
5619 .m(1)
5620 .n(n)
5621 .k(k)
5622 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5623 }
5624 }
5625 }
5626
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4_strided_cn)5627 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4_strided_cn) {
5628 for (uint32_t n = 5; n < 8; n++) {
5629 for (size_t k = 1; k <= 5; k += 2) {
5630 GemmMicrokernelTester()
5631 .mr(1)
5632 .nr(4)
5633 .kr(1)
5634 .sr(1)
5635 .m(1)
5636 .n(n)
5637 .k(k)
5638 .cn_stride(7)
5639 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5640 }
5641 }
5642 }
5643
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4_strided_a)5644 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4_strided_a) {
5645 for (uint32_t n = 5; n < 8; n++) {
5646 for (size_t k = 1; k <= 5; k += 2) {
5647 GemmMicrokernelTester()
5648 .mr(1)
5649 .nr(4)
5650 .kr(1)
5651 .sr(1)
5652 .m(1)
5653 .n(n)
5654 .k(k)
5655 .a_stride(7)
5656 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5657 }
5658 }
5659 }
5660
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4_subtile)5661 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4_subtile) {
5662 for (uint32_t n = 5; n < 8; n++) {
5663 for (size_t k = 1; k <= 5; k += 2) {
5664 for (uint32_t m = 1; m <= 1; m++) {
5665 GemmMicrokernelTester()
5666 .mr(1)
5667 .nr(4)
5668 .kr(1)
5669 .sr(1)
5670 .m(m)
5671 .n(n)
5672 .k(k)
5673 .iterations(1)
5674 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5675 }
5676 }
5677 }
5678 }
5679
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4)5680 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4) {
5681 for (uint32_t n = 8; n <= 12; n += 4) {
5682 for (size_t k = 1; k <= 5; k += 2) {
5683 GemmMicrokernelTester()
5684 .mr(1)
5685 .nr(4)
5686 .kr(1)
5687 .sr(1)
5688 .m(1)
5689 .n(n)
5690 .k(k)
5691 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5692 }
5693 }
5694 }
5695
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4_strided_cn)5696 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4_strided_cn) {
5697 for (uint32_t n = 8; n <= 12; n += 4) {
5698 for (size_t k = 1; k <= 5; k += 2) {
5699 GemmMicrokernelTester()
5700 .mr(1)
5701 .nr(4)
5702 .kr(1)
5703 .sr(1)
5704 .m(1)
5705 .n(n)
5706 .k(k)
5707 .cn_stride(7)
5708 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5709 }
5710 }
5711 }
5712
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4_strided_a)5713 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4_strided_a) {
5714 for (uint32_t n = 8; n <= 12; n += 4) {
5715 for (size_t k = 1; k <= 5; k += 2) {
5716 GemmMicrokernelTester()
5717 .mr(1)
5718 .nr(4)
5719 .kr(1)
5720 .sr(1)
5721 .m(1)
5722 .n(n)
5723 .k(k)
5724 .a_stride(7)
5725 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5726 }
5727 }
5728 }
5729
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4_subtile)5730 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4_subtile) {
5731 for (uint32_t n = 8; n <= 12; n += 4) {
5732 for (size_t k = 1; k <= 5; k += 2) {
5733 for (uint32_t m = 1; m <= 1; m++) {
5734 GemmMicrokernelTester()
5735 .mr(1)
5736 .nr(4)
5737 .kr(1)
5738 .sr(1)
5739 .m(m)
5740 .n(n)
5741 .k(k)
5742 .iterations(1)
5743 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5744 }
5745 }
5746 }
5747 }
5748
TEST(F32_GEMM_RELU_1X4__WASM,strided_cm_subtile)5749 TEST(F32_GEMM_RELU_1X4__WASM, strided_cm_subtile) {
5750 for (size_t k = 1; k <= 5; k += 2) {
5751 for (uint32_t n = 1; n <= 4; n++) {
5752 for (uint32_t m = 1; m <= 1; m++) {
5753 GemmMicrokernelTester()
5754 .mr(1)
5755 .nr(4)
5756 .kr(1)
5757 .sr(1)
5758 .m(m)
5759 .n(n)
5760 .k(k)
5761 .cm_stride(7)
5762 .iterations(1)
5763 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5764 }
5765 }
5766 }
5767 }
5768
TEST(F32_GEMM_RELU_1X4__WASM,strided_cm)5769 TEST(F32_GEMM_RELU_1X4__WASM, strided_cm) {
5770 GemmMicrokernelTester()
5771 .mr(1)
5772 .nr(4)
5773 .kr(1)
5774 .sr(1)
5775 .m(1)
5776 .n(4)
5777 .k(1)
5778 .cm_stride(7)
5779 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
5780 }
5781 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5782
5783
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1)5784 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1) {
5785 GemmMicrokernelTester()
5786 .mr(1)
5787 .nr(4)
5788 .kr(1)
5789 .sr(1)
5790 .m(1)
5791 .n(4)
5792 .k(1)
5793 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5794 }
5795
TEST(F32_GEMM_RELU_1X4__SCALAR,strided_cn)5796 TEST(F32_GEMM_RELU_1X4__SCALAR, strided_cn) {
5797 GemmMicrokernelTester()
5798 .mr(1)
5799 .nr(4)
5800 .kr(1)
5801 .sr(1)
5802 .m(1)
5803 .n(4)
5804 .k(1)
5805 .cn_stride(7)
5806 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5807 }
5808
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_strided_a)5809 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_strided_a) {
5810 GemmMicrokernelTester()
5811 .mr(1)
5812 .nr(4)
5813 .kr(1)
5814 .sr(1)
5815 .m(1)
5816 .n(4)
5817 .k(1)
5818 .a_stride(3)
5819 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5820 }
5821
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_subtile)5822 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_subtile) {
5823 for (uint32_t n = 1; n <= 4; n++) {
5824 for (uint32_t m = 1; m <= 1; m++) {
5825 GemmMicrokernelTester()
5826 .mr(1)
5827 .nr(4)
5828 .kr(1)
5829 .sr(1)
5830 .m(m)
5831 .n(n)
5832 .k(1)
5833 .iterations(1)
5834 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5835 }
5836 }
5837 }
5838
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_subtile_m)5839 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_subtile_m) {
5840 for (uint32_t m = 1; m <= 1; m++) {
5841 GemmMicrokernelTester()
5842 .mr(1)
5843 .nr(4)
5844 .kr(1)
5845 .sr(1)
5846 .m(m)
5847 .n(4)
5848 .k(1)
5849 .iterations(1)
5850 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5851 }
5852 }
5853
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_subtile_n)5854 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_subtile_n) {
5855 for (uint32_t n = 1; n <= 4; n++) {
5856 GemmMicrokernelTester()
5857 .mr(1)
5858 .nr(4)
5859 .kr(1)
5860 .sr(1)
5861 .m(1)
5862 .n(n)
5863 .k(1)
5864 .iterations(1)
5865 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5866 }
5867 }
5868
TEST(F32_GEMM_RELU_1X4__SCALAR,k_gt_1)5869 TEST(F32_GEMM_RELU_1X4__SCALAR, k_gt_1) {
5870 for (size_t k = 2; k < 10; k++) {
5871 GemmMicrokernelTester()
5872 .mr(1)
5873 .nr(4)
5874 .kr(1)
5875 .sr(1)
5876 .m(1)
5877 .n(4)
5878 .k(k)
5879 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5880 }
5881 }
5882
TEST(F32_GEMM_RELU_1X4__SCALAR,k_gt_1_strided_a)5883 TEST(F32_GEMM_RELU_1X4__SCALAR, k_gt_1_strided_a) {
5884 for (size_t k = 2; k < 10; k++) {
5885 GemmMicrokernelTester()
5886 .mr(1)
5887 .nr(4)
5888 .kr(1)
5889 .sr(1)
5890 .m(1)
5891 .n(4)
5892 .k(k)
5893 .a_stride(11)
5894 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5895 }
5896 }
5897
TEST(F32_GEMM_RELU_1X4__SCALAR,k_gt_1_subtile)5898 TEST(F32_GEMM_RELU_1X4__SCALAR, k_gt_1_subtile) {
5899 for (size_t k = 2; k < 10; k++) {
5900 for (uint32_t n = 1; n <= 4; n++) {
5901 for (uint32_t m = 1; m <= 1; m++) {
5902 GemmMicrokernelTester()
5903 .mr(1)
5904 .nr(4)
5905 .kr(1)
5906 .sr(1)
5907 .m(m)
5908 .n(n)
5909 .k(k)
5910 .iterations(1)
5911 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5912 }
5913 }
5914 }
5915 }
5916
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4)5917 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4) {
5918 for (uint32_t n = 5; n < 8; n++) {
5919 for (size_t k = 1; k <= 5; k += 2) {
5920 GemmMicrokernelTester()
5921 .mr(1)
5922 .nr(4)
5923 .kr(1)
5924 .sr(1)
5925 .m(1)
5926 .n(n)
5927 .k(k)
5928 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5929 }
5930 }
5931 }
5932
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4_strided_cn)5933 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4_strided_cn) {
5934 for (uint32_t n = 5; n < 8; n++) {
5935 for (size_t k = 1; k <= 5; k += 2) {
5936 GemmMicrokernelTester()
5937 .mr(1)
5938 .nr(4)
5939 .kr(1)
5940 .sr(1)
5941 .m(1)
5942 .n(n)
5943 .k(k)
5944 .cn_stride(7)
5945 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5946 }
5947 }
5948 }
5949
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4_strided_a)5950 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4_strided_a) {
5951 for (uint32_t n = 5; n < 8; n++) {
5952 for (size_t k = 1; k <= 5; k += 2) {
5953 GemmMicrokernelTester()
5954 .mr(1)
5955 .nr(4)
5956 .kr(1)
5957 .sr(1)
5958 .m(1)
5959 .n(n)
5960 .k(k)
5961 .a_stride(7)
5962 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5963 }
5964 }
5965 }
5966
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4_subtile)5967 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4_subtile) {
5968 for (uint32_t n = 5; n < 8; n++) {
5969 for (size_t k = 1; k <= 5; k += 2) {
5970 for (uint32_t m = 1; m <= 1; m++) {
5971 GemmMicrokernelTester()
5972 .mr(1)
5973 .nr(4)
5974 .kr(1)
5975 .sr(1)
5976 .m(m)
5977 .n(n)
5978 .k(k)
5979 .iterations(1)
5980 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5981 }
5982 }
5983 }
5984 }
5985
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4)5986 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4) {
5987 for (uint32_t n = 8; n <= 12; n += 4) {
5988 for (size_t k = 1; k <= 5; k += 2) {
5989 GemmMicrokernelTester()
5990 .mr(1)
5991 .nr(4)
5992 .kr(1)
5993 .sr(1)
5994 .m(1)
5995 .n(n)
5996 .k(k)
5997 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
5998 }
5999 }
6000 }
6001
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4_strided_cn)6002 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4_strided_cn) {
6003 for (uint32_t n = 8; n <= 12; n += 4) {
6004 for (size_t k = 1; k <= 5; k += 2) {
6005 GemmMicrokernelTester()
6006 .mr(1)
6007 .nr(4)
6008 .kr(1)
6009 .sr(1)
6010 .m(1)
6011 .n(n)
6012 .k(k)
6013 .cn_stride(7)
6014 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
6015 }
6016 }
6017 }
6018
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4_strided_a)6019 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4_strided_a) {
6020 for (uint32_t n = 8; n <= 12; n += 4) {
6021 for (size_t k = 1; k <= 5; k += 2) {
6022 GemmMicrokernelTester()
6023 .mr(1)
6024 .nr(4)
6025 .kr(1)
6026 .sr(1)
6027 .m(1)
6028 .n(n)
6029 .k(k)
6030 .a_stride(7)
6031 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
6032 }
6033 }
6034 }
6035
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4_subtile)6036 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4_subtile) {
6037 for (uint32_t n = 8; n <= 12; n += 4) {
6038 for (size_t k = 1; k <= 5; k += 2) {
6039 for (uint32_t m = 1; m <= 1; m++) {
6040 GemmMicrokernelTester()
6041 .mr(1)
6042 .nr(4)
6043 .kr(1)
6044 .sr(1)
6045 .m(m)
6046 .n(n)
6047 .k(k)
6048 .iterations(1)
6049 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
6050 }
6051 }
6052 }
6053 }
6054
TEST(F32_GEMM_RELU_1X4__SCALAR,strided_cm_subtile)6055 TEST(F32_GEMM_RELU_1X4__SCALAR, strided_cm_subtile) {
6056 for (size_t k = 1; k <= 5; k += 2) {
6057 for (uint32_t n = 1; n <= 4; n++) {
6058 for (uint32_t m = 1; m <= 1; m++) {
6059 GemmMicrokernelTester()
6060 .mr(1)
6061 .nr(4)
6062 .kr(1)
6063 .sr(1)
6064 .m(m)
6065 .n(n)
6066 .k(k)
6067 .cm_stride(7)
6068 .iterations(1)
6069 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
6070 }
6071 }
6072 }
6073 }
6074
TEST(F32_GEMM_RELU_1X4__SCALAR,strided_cm)6075 TEST(F32_GEMM_RELU_1X4__SCALAR, strided_cm) {
6076 GemmMicrokernelTester()
6077 .mr(1)
6078 .nr(4)
6079 .kr(1)
6080 .sr(1)
6081 .m(1)
6082 .n(4)
6083 .k(1)
6084 .cm_stride(7)
6085 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
6086 }
6087
6088
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1)6089 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1) {
6090 GemmMicrokernelTester()
6091 .mr(4)
6092 .nr(2)
6093 .kr(1)
6094 .sr(1)
6095 .m(4)
6096 .n(2)
6097 .k(1)
6098 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6099 }
6100
TEST(F32_GEMM_RELU_4X2__SCALAR,strided_cn)6101 TEST(F32_GEMM_RELU_4X2__SCALAR, strided_cn) {
6102 GemmMicrokernelTester()
6103 .mr(4)
6104 .nr(2)
6105 .kr(1)
6106 .sr(1)
6107 .m(4)
6108 .n(2)
6109 .k(1)
6110 .cn_stride(5)
6111 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6112 }
6113
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_strided_a)6114 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_strided_a) {
6115 GemmMicrokernelTester()
6116 .mr(4)
6117 .nr(2)
6118 .kr(1)
6119 .sr(1)
6120 .m(4)
6121 .n(2)
6122 .k(1)
6123 .a_stride(3)
6124 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6125 }
6126
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_subtile)6127 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_subtile) {
6128 for (uint32_t n = 1; n <= 2; n++) {
6129 for (uint32_t m = 1; m <= 4; m++) {
6130 GemmMicrokernelTester()
6131 .mr(4)
6132 .nr(2)
6133 .kr(1)
6134 .sr(1)
6135 .m(m)
6136 .n(n)
6137 .k(1)
6138 .iterations(1)
6139 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6140 }
6141 }
6142 }
6143
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_subtile_m)6144 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_subtile_m) {
6145 for (uint32_t m = 1; m <= 4; m++) {
6146 GemmMicrokernelTester()
6147 .mr(4)
6148 .nr(2)
6149 .kr(1)
6150 .sr(1)
6151 .m(m)
6152 .n(2)
6153 .k(1)
6154 .iterations(1)
6155 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6156 }
6157 }
6158
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_subtile_n)6159 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_subtile_n) {
6160 for (uint32_t n = 1; n <= 2; n++) {
6161 GemmMicrokernelTester()
6162 .mr(4)
6163 .nr(2)
6164 .kr(1)
6165 .sr(1)
6166 .m(4)
6167 .n(n)
6168 .k(1)
6169 .iterations(1)
6170 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6171 }
6172 }
6173
TEST(F32_GEMM_RELU_4X2__SCALAR,k_gt_1)6174 TEST(F32_GEMM_RELU_4X2__SCALAR, k_gt_1) {
6175 for (size_t k = 2; k < 10; k++) {
6176 GemmMicrokernelTester()
6177 .mr(4)
6178 .nr(2)
6179 .kr(1)
6180 .sr(1)
6181 .m(4)
6182 .n(2)
6183 .k(k)
6184 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6185 }
6186 }
6187
TEST(F32_GEMM_RELU_4X2__SCALAR,k_gt_1_strided_a)6188 TEST(F32_GEMM_RELU_4X2__SCALAR, k_gt_1_strided_a) {
6189 for (size_t k = 2; k < 10; k++) {
6190 GemmMicrokernelTester()
6191 .mr(4)
6192 .nr(2)
6193 .kr(1)
6194 .sr(1)
6195 .m(4)
6196 .n(2)
6197 .k(k)
6198 .a_stride(11)
6199 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6200 }
6201 }
6202
TEST(F32_GEMM_RELU_4X2__SCALAR,k_gt_1_subtile)6203 TEST(F32_GEMM_RELU_4X2__SCALAR, k_gt_1_subtile) {
6204 for (size_t k = 2; k < 10; k++) {
6205 for (uint32_t n = 1; n <= 2; n++) {
6206 for (uint32_t m = 1; m <= 4; m++) {
6207 GemmMicrokernelTester()
6208 .mr(4)
6209 .nr(2)
6210 .kr(1)
6211 .sr(1)
6212 .m(m)
6213 .n(n)
6214 .k(k)
6215 .iterations(1)
6216 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6217 }
6218 }
6219 }
6220 }
6221
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2)6222 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2) {
6223 for (uint32_t n = 3; n < 4; n++) {
6224 for (size_t k = 1; k <= 5; k += 2) {
6225 GemmMicrokernelTester()
6226 .mr(4)
6227 .nr(2)
6228 .kr(1)
6229 .sr(1)
6230 .m(4)
6231 .n(n)
6232 .k(k)
6233 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6234 }
6235 }
6236 }
6237
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2_strided_cn)6238 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2_strided_cn) {
6239 for (uint32_t n = 3; n < 4; n++) {
6240 for (size_t k = 1; k <= 5; k += 2) {
6241 GemmMicrokernelTester()
6242 .mr(4)
6243 .nr(2)
6244 .kr(1)
6245 .sr(1)
6246 .m(4)
6247 .n(n)
6248 .k(k)
6249 .cn_stride(5)
6250 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6251 }
6252 }
6253 }
6254
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2_strided_a)6255 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2_strided_a) {
6256 for (uint32_t n = 3; n < 4; n++) {
6257 for (size_t k = 1; k <= 5; k += 2) {
6258 GemmMicrokernelTester()
6259 .mr(4)
6260 .nr(2)
6261 .kr(1)
6262 .sr(1)
6263 .m(4)
6264 .n(n)
6265 .k(k)
6266 .a_stride(7)
6267 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6268 }
6269 }
6270 }
6271
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2_subtile)6272 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2_subtile) {
6273 for (uint32_t n = 3; n < 4; n++) {
6274 for (size_t k = 1; k <= 5; k += 2) {
6275 for (uint32_t m = 1; m <= 4; m++) {
6276 GemmMicrokernelTester()
6277 .mr(4)
6278 .nr(2)
6279 .kr(1)
6280 .sr(1)
6281 .m(m)
6282 .n(n)
6283 .k(k)
6284 .iterations(1)
6285 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6286 }
6287 }
6288 }
6289 }
6290
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2)6291 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2) {
6292 for (uint32_t n = 4; n <= 6; n += 2) {
6293 for (size_t k = 1; k <= 5; k += 2) {
6294 GemmMicrokernelTester()
6295 .mr(4)
6296 .nr(2)
6297 .kr(1)
6298 .sr(1)
6299 .m(4)
6300 .n(n)
6301 .k(k)
6302 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6303 }
6304 }
6305 }
6306
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2_strided_cn)6307 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2_strided_cn) {
6308 for (uint32_t n = 4; n <= 6; n += 2) {
6309 for (size_t k = 1; k <= 5; k += 2) {
6310 GemmMicrokernelTester()
6311 .mr(4)
6312 .nr(2)
6313 .kr(1)
6314 .sr(1)
6315 .m(4)
6316 .n(n)
6317 .k(k)
6318 .cn_stride(5)
6319 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6320 }
6321 }
6322 }
6323
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2_strided_a)6324 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2_strided_a) {
6325 for (uint32_t n = 4; n <= 6; n += 2) {
6326 for (size_t k = 1; k <= 5; k += 2) {
6327 GemmMicrokernelTester()
6328 .mr(4)
6329 .nr(2)
6330 .kr(1)
6331 .sr(1)
6332 .m(4)
6333 .n(n)
6334 .k(k)
6335 .a_stride(7)
6336 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6337 }
6338 }
6339 }
6340
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2_subtile)6341 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2_subtile) {
6342 for (uint32_t n = 4; n <= 6; n += 2) {
6343 for (size_t k = 1; k <= 5; k += 2) {
6344 for (uint32_t m = 1; m <= 4; m++) {
6345 GemmMicrokernelTester()
6346 .mr(4)
6347 .nr(2)
6348 .kr(1)
6349 .sr(1)
6350 .m(m)
6351 .n(n)
6352 .k(k)
6353 .iterations(1)
6354 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6355 }
6356 }
6357 }
6358 }
6359
TEST(F32_GEMM_RELU_4X2__SCALAR,strided_cm_subtile)6360 TEST(F32_GEMM_RELU_4X2__SCALAR, strided_cm_subtile) {
6361 for (size_t k = 1; k <= 5; k += 2) {
6362 for (uint32_t n = 1; n <= 2; n++) {
6363 for (uint32_t m = 1; m <= 4; m++) {
6364 GemmMicrokernelTester()
6365 .mr(4)
6366 .nr(2)
6367 .kr(1)
6368 .sr(1)
6369 .m(m)
6370 .n(n)
6371 .k(k)
6372 .cm_stride(5)
6373 .iterations(1)
6374 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6375 }
6376 }
6377 }
6378 }
6379
TEST(F32_GEMM_RELU_4X2__SCALAR,strided_cm)6380 TEST(F32_GEMM_RELU_4X2__SCALAR, strided_cm) {
6381 GemmMicrokernelTester()
6382 .mr(4)
6383 .nr(2)
6384 .kr(1)
6385 .sr(1)
6386 .m(4)
6387 .n(2)
6388 .k(1)
6389 .cm_stride(5)
6390 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
6391 }
6392
6393
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1)6394 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1) {
6395 GemmMicrokernelTester()
6396 .mr(4)
6397 .nr(4)
6398 .kr(1)
6399 .sr(1)
6400 .m(4)
6401 .n(4)
6402 .k(1)
6403 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6404 }
6405
TEST(F32_GEMM_RELU_4X4__SCALAR,strided_cn)6406 TEST(F32_GEMM_RELU_4X4__SCALAR, strided_cn) {
6407 GemmMicrokernelTester()
6408 .mr(4)
6409 .nr(4)
6410 .kr(1)
6411 .sr(1)
6412 .m(4)
6413 .n(4)
6414 .k(1)
6415 .cn_stride(7)
6416 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6417 }
6418
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_strided_a)6419 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_strided_a) {
6420 GemmMicrokernelTester()
6421 .mr(4)
6422 .nr(4)
6423 .kr(1)
6424 .sr(1)
6425 .m(4)
6426 .n(4)
6427 .k(1)
6428 .a_stride(3)
6429 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6430 }
6431
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_subtile)6432 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_subtile) {
6433 for (uint32_t n = 1; n <= 4; n++) {
6434 for (uint32_t m = 1; m <= 4; m++) {
6435 GemmMicrokernelTester()
6436 .mr(4)
6437 .nr(4)
6438 .kr(1)
6439 .sr(1)
6440 .m(m)
6441 .n(n)
6442 .k(1)
6443 .iterations(1)
6444 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6445 }
6446 }
6447 }
6448
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_subtile_m)6449 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_subtile_m) {
6450 for (uint32_t m = 1; m <= 4; m++) {
6451 GemmMicrokernelTester()
6452 .mr(4)
6453 .nr(4)
6454 .kr(1)
6455 .sr(1)
6456 .m(m)
6457 .n(4)
6458 .k(1)
6459 .iterations(1)
6460 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6461 }
6462 }
6463
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_subtile_n)6464 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_subtile_n) {
6465 for (uint32_t n = 1; n <= 4; n++) {
6466 GemmMicrokernelTester()
6467 .mr(4)
6468 .nr(4)
6469 .kr(1)
6470 .sr(1)
6471 .m(4)
6472 .n(n)
6473 .k(1)
6474 .iterations(1)
6475 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6476 }
6477 }
6478
TEST(F32_GEMM_RELU_4X4__SCALAR,k_gt_1)6479 TEST(F32_GEMM_RELU_4X4__SCALAR, k_gt_1) {
6480 for (size_t k = 2; k < 10; k++) {
6481 GemmMicrokernelTester()
6482 .mr(4)
6483 .nr(4)
6484 .kr(1)
6485 .sr(1)
6486 .m(4)
6487 .n(4)
6488 .k(k)
6489 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6490 }
6491 }
6492
TEST(F32_GEMM_RELU_4X4__SCALAR,k_gt_1_strided_a)6493 TEST(F32_GEMM_RELU_4X4__SCALAR, k_gt_1_strided_a) {
6494 for (size_t k = 2; k < 10; k++) {
6495 GemmMicrokernelTester()
6496 .mr(4)
6497 .nr(4)
6498 .kr(1)
6499 .sr(1)
6500 .m(4)
6501 .n(4)
6502 .k(k)
6503 .a_stride(11)
6504 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6505 }
6506 }
6507
TEST(F32_GEMM_RELU_4X4__SCALAR,k_gt_1_subtile)6508 TEST(F32_GEMM_RELU_4X4__SCALAR, k_gt_1_subtile) {
6509 for (size_t k = 2; k < 10; k++) {
6510 for (uint32_t n = 1; n <= 4; n++) {
6511 for (uint32_t m = 1; m <= 4; m++) {
6512 GemmMicrokernelTester()
6513 .mr(4)
6514 .nr(4)
6515 .kr(1)
6516 .sr(1)
6517 .m(m)
6518 .n(n)
6519 .k(k)
6520 .iterations(1)
6521 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6522 }
6523 }
6524 }
6525 }
6526
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4)6527 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4) {
6528 for (uint32_t n = 5; n < 8; n++) {
6529 for (size_t k = 1; k <= 5; k += 2) {
6530 GemmMicrokernelTester()
6531 .mr(4)
6532 .nr(4)
6533 .kr(1)
6534 .sr(1)
6535 .m(4)
6536 .n(n)
6537 .k(k)
6538 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6539 }
6540 }
6541 }
6542
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4_strided_cn)6543 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4_strided_cn) {
6544 for (uint32_t n = 5; n < 8; n++) {
6545 for (size_t k = 1; k <= 5; k += 2) {
6546 GemmMicrokernelTester()
6547 .mr(4)
6548 .nr(4)
6549 .kr(1)
6550 .sr(1)
6551 .m(4)
6552 .n(n)
6553 .k(k)
6554 .cn_stride(7)
6555 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6556 }
6557 }
6558 }
6559
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4_strided_a)6560 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4_strided_a) {
6561 for (uint32_t n = 5; n < 8; n++) {
6562 for (size_t k = 1; k <= 5; k += 2) {
6563 GemmMicrokernelTester()
6564 .mr(4)
6565 .nr(4)
6566 .kr(1)
6567 .sr(1)
6568 .m(4)
6569 .n(n)
6570 .k(k)
6571 .a_stride(7)
6572 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6573 }
6574 }
6575 }
6576
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4_subtile)6577 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4_subtile) {
6578 for (uint32_t n = 5; n < 8; n++) {
6579 for (size_t k = 1; k <= 5; k += 2) {
6580 for (uint32_t m = 1; m <= 4; m++) {
6581 GemmMicrokernelTester()
6582 .mr(4)
6583 .nr(4)
6584 .kr(1)
6585 .sr(1)
6586 .m(m)
6587 .n(n)
6588 .k(k)
6589 .iterations(1)
6590 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6591 }
6592 }
6593 }
6594 }
6595
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4)6596 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4) {
6597 for (uint32_t n = 8; n <= 12; n += 4) {
6598 for (size_t k = 1; k <= 5; k += 2) {
6599 GemmMicrokernelTester()
6600 .mr(4)
6601 .nr(4)
6602 .kr(1)
6603 .sr(1)
6604 .m(4)
6605 .n(n)
6606 .k(k)
6607 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6608 }
6609 }
6610 }
6611
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4_strided_cn)6612 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4_strided_cn) {
6613 for (uint32_t n = 8; n <= 12; n += 4) {
6614 for (size_t k = 1; k <= 5; k += 2) {
6615 GemmMicrokernelTester()
6616 .mr(4)
6617 .nr(4)
6618 .kr(1)
6619 .sr(1)
6620 .m(4)
6621 .n(n)
6622 .k(k)
6623 .cn_stride(7)
6624 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6625 }
6626 }
6627 }
6628
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4_strided_a)6629 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4_strided_a) {
6630 for (uint32_t n = 8; n <= 12; n += 4) {
6631 for (size_t k = 1; k <= 5; k += 2) {
6632 GemmMicrokernelTester()
6633 .mr(4)
6634 .nr(4)
6635 .kr(1)
6636 .sr(1)
6637 .m(4)
6638 .n(n)
6639 .k(k)
6640 .a_stride(7)
6641 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6642 }
6643 }
6644 }
6645
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4_subtile)6646 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4_subtile) {
6647 for (uint32_t n = 8; n <= 12; n += 4) {
6648 for (size_t k = 1; k <= 5; k += 2) {
6649 for (uint32_t m = 1; m <= 4; m++) {
6650 GemmMicrokernelTester()
6651 .mr(4)
6652 .nr(4)
6653 .kr(1)
6654 .sr(1)
6655 .m(m)
6656 .n(n)
6657 .k(k)
6658 .iterations(1)
6659 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6660 }
6661 }
6662 }
6663 }
6664
TEST(F32_GEMM_RELU_4X4__SCALAR,strided_cm_subtile)6665 TEST(F32_GEMM_RELU_4X4__SCALAR, strided_cm_subtile) {
6666 for (size_t k = 1; k <= 5; k += 2) {
6667 for (uint32_t n = 1; n <= 4; n++) {
6668 for (uint32_t m = 1; m <= 4; m++) {
6669 GemmMicrokernelTester()
6670 .mr(4)
6671 .nr(4)
6672 .kr(1)
6673 .sr(1)
6674 .m(m)
6675 .n(n)
6676 .k(k)
6677 .cm_stride(7)
6678 .iterations(1)
6679 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6680 }
6681 }
6682 }
6683 }
6684
TEST(F32_GEMM_RELU_4X4__SCALAR,strided_cm)6685 TEST(F32_GEMM_RELU_4X4__SCALAR, strided_cm) {
6686 GemmMicrokernelTester()
6687 .mr(4)
6688 .nr(4)
6689 .kr(1)
6690 .sr(1)
6691 .m(4)
6692 .n(4)
6693 .k(1)
6694 .cm_stride(7)
6695 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
6696 }
6697