1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/f32-spmm-minmax.yaml
8 // Generator: tools/generate-spmm-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/spmm.h>
17 #include "spmm-microkernel-tester.h"
18
19
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X1__NEON,k_eq_1)21 TEST(F32_SPMM_MINMAX_4X1__NEON, k_eq_1) {
22 TEST_REQUIRES_ARM_NEON;
23 SpMMMicrokernelTester()
24 .mr(4)
25 .nr(1)
26 .m(4)
27 .n(1)
28 .k(1)
29 .sparsity(0.0f)
30 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
31 }
32
TEST(F32_SPMM_MINMAX_4X1__NEON,k_gt_1)33 TEST(F32_SPMM_MINMAX_4X1__NEON, k_gt_1) {
34 TEST_REQUIRES_ARM_NEON;
35 for (size_t k = 2; k < 10; k++) {
36 SpMMMicrokernelTester()
37 .mr(4)
38 .nr(1)
39 .m(4)
40 .n(1)
41 .k(k)
42 .sparsity(0.0f)
43 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
44 }
45 }
46
TEST(F32_SPMM_MINMAX_4X1__NEON,n_gt_1)47 TEST(F32_SPMM_MINMAX_4X1__NEON, n_gt_1) {
48 TEST_REQUIRES_ARM_NEON;
49 for (uint32_t n = 2; n < 10; n++) {
50 for (size_t k = 1; k <= 5; k += 2) {
51 SpMMMicrokernelTester()
52 .mr(4)
53 .nr(1)
54 .m(4)
55 .n(n)
56 .k(k)
57 .sparsity(0.0f)
58 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
59 }
60 }
61 }
62
TEST(F32_SPMM_MINMAX_4X1__NEON,m_lt_4)63 TEST(F32_SPMM_MINMAX_4X1__NEON, m_lt_4) {
64 TEST_REQUIRES_ARM_NEON;
65 for (uint32_t m = 1; m < 4; m++) {
66 for (uint32_t n = 1; n < 10; n += 2) {
67 for (size_t k = 1; k <= 5; k += 2) {
68 SpMMMicrokernelTester()
69 .mr(4)
70 .nr(1)
71 .m(m)
72 .n(n)
73 .k(k)
74 .sparsity(0.0f)
75 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
76 }
77 }
78 }
79 }
80
TEST(F32_SPMM_MINMAX_4X1__NEON,m_div_4)81 TEST(F32_SPMM_MINMAX_4X1__NEON, m_div_4) {
82 TEST_REQUIRES_ARM_NEON;
83 for (uint32_t m = 8; m <= 12; m += 4) {
84 for (uint32_t n = 1; n < 10; n += 2) {
85 for (size_t k = 1; k <= 5; k += 2) {
86 SpMMMicrokernelTester()
87 .mr(4)
88 .nr(1)
89 .m(m)
90 .n(n)
91 .k(k)
92 .sparsity(0.0f)
93 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
94 }
95 }
96 }
97 }
98
TEST(F32_SPMM_MINMAX_4X1__NEON,m_gt_4)99 TEST(F32_SPMM_MINMAX_4X1__NEON, m_gt_4) {
100 TEST_REQUIRES_ARM_NEON;
101 for (uint32_t m = 5; m < 8; m++) {
102 for (uint32_t n = 1; n < 10; n += 2) {
103 for (size_t k = 1; k <= 5; k += 2) {
104 SpMMMicrokernelTester()
105 .mr(4)
106 .nr(1)
107 .m(m)
108 .n(n)
109 .k(k)
110 .sparsity(0.0f)
111 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
112 }
113 }
114 }
115 }
116
TEST(F32_SPMM_MINMAX_4X1__NEON,output_stride)117 TEST(F32_SPMM_MINMAX_4X1__NEON, output_stride) {
118 TEST_REQUIRES_ARM_NEON;
119 for (uint32_t n = 1; n < 10; n += 2) {
120 for (size_t k = 1; k <= 5; k += 2) {
121 SpMMMicrokernelTester()
122 .mr(4)
123 .nr(1)
124 .m(8)
125 .n(n)
126 .k(k)
127 .output_stride(11)
128 .sparsity(0.0f)
129 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
130 }
131 }
132 }
133
TEST(F32_SPMM_MINMAX_4X1__NEON,qmin)134 TEST(F32_SPMM_MINMAX_4X1__NEON, qmin) {
135 TEST_REQUIRES_ARM_NEON;
136 for (uint32_t n = 1; n < 10; n += 2) {
137 for (size_t k = 1; k <= 5; k += 2) {
138 SpMMMicrokernelTester()
139 .mr(4)
140 .nr(1)
141 .m(8)
142 .n(n)
143 .k(k)
144 .sparsity(0.0f)
145 .qmin(128)
146 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
147 }
148 }
149 }
150
TEST(F32_SPMM_MINMAX_4X1__NEON,qmax)151 TEST(F32_SPMM_MINMAX_4X1__NEON, qmax) {
152 TEST_REQUIRES_ARM_NEON;
153 for (uint32_t n = 1; n < 10; n += 2) {
154 for (size_t k = 1; k <= 5; k += 2) {
155 SpMMMicrokernelTester()
156 .mr(4)
157 .nr(1)
158 .m(8)
159 .n(n)
160 .k(k)
161 .sparsity(0.0f)
162 .qmax(128)
163 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
164 }
165 }
166 }
167
TEST(F32_SPMM_MINMAX_4X1__NEON,half_sparse)168 TEST(F32_SPMM_MINMAX_4X1__NEON, half_sparse) {
169 TEST_REQUIRES_ARM_NEON;
170 for (uint32_t n = 1; n < 10; n += 2) {
171 for (size_t k = 1; k <= 5; k += 2) {
172 SpMMMicrokernelTester()
173 .mr(4)
174 .nr(1)
175 .m(8)
176 .n(n)
177 .k(k)
178 .sparsity(0.5f)
179 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
180 }
181 }
182 }
183
TEST(F32_SPMM_MINMAX_4X1__NEON,zero_weights)184 TEST(F32_SPMM_MINMAX_4X1__NEON, zero_weights) {
185 TEST_REQUIRES_ARM_NEON;
186 for (uint32_t n = 1; n < 10; n += 2) {
187 for (size_t k = 1; k <= 5; k += 2) {
188 SpMMMicrokernelTester()
189 .mr(4)
190 .nr(1)
191 .m(8)
192 .n(n)
193 .k(k)
194 .sparsity(1.0f)
195 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon, xnn_init_f32_minmax_scalar_params);
196 }
197 }
198 }
199 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
200
201
202 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,k_eq_1)203 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, k_eq_1) {
204 TEST_REQUIRES_ARM_NEON;
205 SpMMMicrokernelTester()
206 .mr(4)
207 .nr(1)
208 .m(4)
209 .n(1)
210 .k(1)
211 .sparsity(0.0f)
212 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
213 }
214
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,k_gt_1)215 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, k_gt_1) {
216 TEST_REQUIRES_ARM_NEON;
217 for (size_t k = 2; k < 10; k++) {
218 SpMMMicrokernelTester()
219 .mr(4)
220 .nr(1)
221 .m(4)
222 .n(1)
223 .k(k)
224 .sparsity(0.0f)
225 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
226 }
227 }
228
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,n_gt_1)229 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, n_gt_1) {
230 TEST_REQUIRES_ARM_NEON;
231 for (uint32_t n = 2; n < 10; n++) {
232 for (size_t k = 1; k <= 5; k += 2) {
233 SpMMMicrokernelTester()
234 .mr(4)
235 .nr(1)
236 .m(4)
237 .n(n)
238 .k(k)
239 .sparsity(0.0f)
240 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
241 }
242 }
243 }
244
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,m_lt_4)245 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, m_lt_4) {
246 TEST_REQUIRES_ARM_NEON;
247 for (uint32_t m = 1; m < 4; m++) {
248 for (uint32_t n = 1; n < 10; n += 2) {
249 for (size_t k = 1; k <= 5; k += 2) {
250 SpMMMicrokernelTester()
251 .mr(4)
252 .nr(1)
253 .m(m)
254 .n(n)
255 .k(k)
256 .sparsity(0.0f)
257 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
258 }
259 }
260 }
261 }
262
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,m_div_4)263 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, m_div_4) {
264 TEST_REQUIRES_ARM_NEON;
265 for (uint32_t m = 8; m <= 12; m += 4) {
266 for (uint32_t n = 1; n < 10; n += 2) {
267 for (size_t k = 1; k <= 5; k += 2) {
268 SpMMMicrokernelTester()
269 .mr(4)
270 .nr(1)
271 .m(m)
272 .n(n)
273 .k(k)
274 .sparsity(0.0f)
275 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
276 }
277 }
278 }
279 }
280
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,m_gt_4)281 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, m_gt_4) {
282 TEST_REQUIRES_ARM_NEON;
283 for (uint32_t m = 5; m < 8; m++) {
284 for (uint32_t n = 1; n < 10; n += 2) {
285 for (size_t k = 1; k <= 5; k += 2) {
286 SpMMMicrokernelTester()
287 .mr(4)
288 .nr(1)
289 .m(m)
290 .n(n)
291 .k(k)
292 .sparsity(0.0f)
293 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
294 }
295 }
296 }
297 }
298
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,output_stride)299 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, output_stride) {
300 TEST_REQUIRES_ARM_NEON;
301 for (uint32_t n = 1; n < 10; n += 2) {
302 for (size_t k = 1; k <= 5; k += 2) {
303 SpMMMicrokernelTester()
304 .mr(4)
305 .nr(1)
306 .m(8)
307 .n(n)
308 .k(k)
309 .output_stride(11)
310 .sparsity(0.0f)
311 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
312 }
313 }
314 }
315
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,qmin)316 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, qmin) {
317 TEST_REQUIRES_ARM_NEON;
318 for (uint32_t n = 1; n < 10; n += 2) {
319 for (size_t k = 1; k <= 5; k += 2) {
320 SpMMMicrokernelTester()
321 .mr(4)
322 .nr(1)
323 .m(8)
324 .n(n)
325 .k(k)
326 .sparsity(0.0f)
327 .qmin(128)
328 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
329 }
330 }
331 }
332
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,qmax)333 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, qmax) {
334 TEST_REQUIRES_ARM_NEON;
335 for (uint32_t n = 1; n < 10; n += 2) {
336 for (size_t k = 1; k <= 5; k += 2) {
337 SpMMMicrokernelTester()
338 .mr(4)
339 .nr(1)
340 .m(8)
341 .n(n)
342 .k(k)
343 .sparsity(0.0f)
344 .qmax(128)
345 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
346 }
347 }
348 }
349
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,half_sparse)350 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, half_sparse) {
351 TEST_REQUIRES_ARM_NEON;
352 for (uint32_t n = 1; n < 10; n += 2) {
353 for (size_t k = 1; k <= 5; k += 2) {
354 SpMMMicrokernelTester()
355 .mr(4)
356 .nr(1)
357 .m(8)
358 .n(n)
359 .k(k)
360 .sparsity(0.5f)
361 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
362 }
363 }
364 }
365
TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED,zero_weights)366 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, zero_weights) {
367 TEST_REQUIRES_ARM_NEON;
368 for (uint32_t n = 1; n < 10; n += 2) {
369 for (size_t k = 1; k <= 5; k += 2) {
370 SpMMMicrokernelTester()
371 .mr(4)
372 .nr(1)
373 .m(8)
374 .n(n)
375 .k(k)
376 .sparsity(1.0f)
377 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
378 }
379 }
380 }
381 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
382
383
384 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,k_eq_2)385 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_eq_2) {
386 TEST_REQUIRES_ARM_NEON;
387 SpMMMicrokernelTester()
388 .mr(4)
389 .nr(1)
390 .m(4)
391 .n(1)
392 .k(2)
393 .sparsity(0.0f)
394 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
395 }
396
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,k_lt_2)397 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_lt_2) {
398 TEST_REQUIRES_ARM_NEON;
399 for (size_t k = 1; k < 2; k++) {
400 SpMMMicrokernelTester()
401 .mr(4)
402 .nr(1)
403 .m(4)
404 .n(1)
405 .k(k)
406 .sparsity(0.0f)
407 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
408 }
409 }
410
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,k_gt_2)411 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_gt_2) {
412 TEST_REQUIRES_ARM_NEON;
413 for (size_t k = 3; k < 4; k++) {
414 SpMMMicrokernelTester()
415 .mr(4)
416 .nr(1)
417 .m(4)
418 .n(1)
419 .k(k)
420 .sparsity(0.0f)
421 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
422 }
423 }
424
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,k_div_2)425 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_div_2) {
426 TEST_REQUIRES_ARM_NEON;
427 for (size_t k = 4; k <= 20; k += 2) {
428 SpMMMicrokernelTester()
429 .mr(4)
430 .nr(1)
431 .m(4)
432 .n(1)
433 .k(k)
434 .sparsity(0.0f)
435 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
436 }
437 }
438
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,n_gt_1)439 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, n_gt_1) {
440 TEST_REQUIRES_ARM_NEON;
441 for (uint32_t n = 2; n < 10; n++) {
442 for (size_t k = 1; k <= 10; k += 3) {
443 SpMMMicrokernelTester()
444 .mr(4)
445 .nr(1)
446 .m(4)
447 .n(n)
448 .k(k)
449 .sparsity(0.0f)
450 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
451 }
452 }
453 }
454
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,m_lt_4)455 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, m_lt_4) {
456 TEST_REQUIRES_ARM_NEON;
457 for (uint32_t m = 1; m < 4; m++) {
458 for (uint32_t n = 1; n < 10; n += 2) {
459 for (size_t k = 1; k <= 10; k += 3) {
460 SpMMMicrokernelTester()
461 .mr(4)
462 .nr(1)
463 .m(m)
464 .n(n)
465 .k(k)
466 .sparsity(0.0f)
467 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
468 }
469 }
470 }
471 }
472
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,m_div_4)473 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, m_div_4) {
474 TEST_REQUIRES_ARM_NEON;
475 for (uint32_t m = 8; m <= 12; m += 4) {
476 for (uint32_t n = 1; n < 10; n += 2) {
477 for (size_t k = 1; k <= 10; k += 3) {
478 SpMMMicrokernelTester()
479 .mr(4)
480 .nr(1)
481 .m(m)
482 .n(n)
483 .k(k)
484 .sparsity(0.0f)
485 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
486 }
487 }
488 }
489 }
490
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,m_gt_4)491 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, m_gt_4) {
492 TEST_REQUIRES_ARM_NEON;
493 for (uint32_t m = 5; m < 8; m++) {
494 for (uint32_t n = 1; n < 10; n += 2) {
495 for (size_t k = 1; k <= 10; k += 3) {
496 SpMMMicrokernelTester()
497 .mr(4)
498 .nr(1)
499 .m(m)
500 .n(n)
501 .k(k)
502 .sparsity(0.0f)
503 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
504 }
505 }
506 }
507 }
508
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,output_stride)509 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, output_stride) {
510 TEST_REQUIRES_ARM_NEON;
511 for (uint32_t n = 1; n < 10; n += 2) {
512 for (size_t k = 1; k <= 10; k += 3) {
513 SpMMMicrokernelTester()
514 .mr(4)
515 .nr(1)
516 .m(8)
517 .n(n)
518 .k(k)
519 .output_stride(11)
520 .sparsity(0.0f)
521 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
522 }
523 }
524 }
525
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,qmin)526 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, qmin) {
527 TEST_REQUIRES_ARM_NEON;
528 for (uint32_t n = 1; n < 10; n += 2) {
529 for (size_t k = 1; k <= 10; k += 3) {
530 SpMMMicrokernelTester()
531 .mr(4)
532 .nr(1)
533 .m(8)
534 .n(n)
535 .k(k)
536 .sparsity(0.0f)
537 .qmin(128)
538 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
539 }
540 }
541 }
542
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,qmax)543 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, qmax) {
544 TEST_REQUIRES_ARM_NEON;
545 for (uint32_t n = 1; n < 10; n += 2) {
546 for (size_t k = 1; k <= 10; k += 3) {
547 SpMMMicrokernelTester()
548 .mr(4)
549 .nr(1)
550 .m(8)
551 .n(n)
552 .k(k)
553 .sparsity(0.0f)
554 .qmax(128)
555 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
556 }
557 }
558 }
559
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,half_sparse)560 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, half_sparse) {
561 TEST_REQUIRES_ARM_NEON;
562 for (uint32_t n = 1; n < 10; n += 2) {
563 for (size_t k = 1; k <= 10; k += 3) {
564 SpMMMicrokernelTester()
565 .mr(4)
566 .nr(1)
567 .m(8)
568 .n(n)
569 .k(k)
570 .sparsity(0.5f)
571 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
572 }
573 }
574 }
575
TEST(F32_SPMM_MINMAX_4X1__NEON_X2,zero_weights)576 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, zero_weights) {
577 TEST_REQUIRES_ARM_NEON;
578 for (uint32_t n = 1; n < 10; n += 2) {
579 for (size_t k = 1; k <= 10; k += 3) {
580 SpMMMicrokernelTester()
581 .mr(4)
582 .nr(1)
583 .m(8)
584 .n(n)
585 .k(k)
586 .sparsity(1.0f)
587 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, xnn_init_f32_minmax_scalar_params);
588 }
589 }
590 }
591 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
592
593
594 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,k_eq_1)595 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, k_eq_1) {
596 TEST_REQUIRES_ARM_NEON_FMA;
597 SpMMMicrokernelTester()
598 .mr(4)
599 .nr(1)
600 .m(4)
601 .n(1)
602 .k(1)
603 .sparsity(0.0f)
604 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
605 }
606
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,k_gt_1)607 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, k_gt_1) {
608 TEST_REQUIRES_ARM_NEON_FMA;
609 for (size_t k = 2; k < 10; k++) {
610 SpMMMicrokernelTester()
611 .mr(4)
612 .nr(1)
613 .m(4)
614 .n(1)
615 .k(k)
616 .sparsity(0.0f)
617 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
618 }
619 }
620
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,n_gt_1)621 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, n_gt_1) {
622 TEST_REQUIRES_ARM_NEON_FMA;
623 for (uint32_t n = 2; n < 10; n++) {
624 for (size_t k = 1; k <= 5; k += 2) {
625 SpMMMicrokernelTester()
626 .mr(4)
627 .nr(1)
628 .m(4)
629 .n(n)
630 .k(k)
631 .sparsity(0.0f)
632 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
633 }
634 }
635 }
636
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,m_lt_4)637 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, m_lt_4) {
638 TEST_REQUIRES_ARM_NEON_FMA;
639 for (uint32_t m = 1; m < 4; m++) {
640 for (uint32_t n = 1; n < 10; n += 2) {
641 for (size_t k = 1; k <= 5; k += 2) {
642 SpMMMicrokernelTester()
643 .mr(4)
644 .nr(1)
645 .m(m)
646 .n(n)
647 .k(k)
648 .sparsity(0.0f)
649 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
650 }
651 }
652 }
653 }
654
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,m_div_4)655 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, m_div_4) {
656 TEST_REQUIRES_ARM_NEON_FMA;
657 for (uint32_t m = 8; m <= 12; m += 4) {
658 for (uint32_t n = 1; n < 10; n += 2) {
659 for (size_t k = 1; k <= 5; k += 2) {
660 SpMMMicrokernelTester()
661 .mr(4)
662 .nr(1)
663 .m(m)
664 .n(n)
665 .k(k)
666 .sparsity(0.0f)
667 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
668 }
669 }
670 }
671 }
672
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,m_gt_4)673 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, m_gt_4) {
674 TEST_REQUIRES_ARM_NEON_FMA;
675 for (uint32_t m = 5; m < 8; m++) {
676 for (uint32_t n = 1; n < 10; n += 2) {
677 for (size_t k = 1; k <= 5; k += 2) {
678 SpMMMicrokernelTester()
679 .mr(4)
680 .nr(1)
681 .m(m)
682 .n(n)
683 .k(k)
684 .sparsity(0.0f)
685 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
686 }
687 }
688 }
689 }
690
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,output_stride)691 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, output_stride) {
692 TEST_REQUIRES_ARM_NEON_FMA;
693 for (uint32_t n = 1; n < 10; n += 2) {
694 for (size_t k = 1; k <= 5; k += 2) {
695 SpMMMicrokernelTester()
696 .mr(4)
697 .nr(1)
698 .m(8)
699 .n(n)
700 .k(k)
701 .output_stride(11)
702 .sparsity(0.0f)
703 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
704 }
705 }
706 }
707
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,qmin)708 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, qmin) {
709 TEST_REQUIRES_ARM_NEON_FMA;
710 for (uint32_t n = 1; n < 10; n += 2) {
711 for (size_t k = 1; k <= 5; k += 2) {
712 SpMMMicrokernelTester()
713 .mr(4)
714 .nr(1)
715 .m(8)
716 .n(n)
717 .k(k)
718 .sparsity(0.0f)
719 .qmin(128)
720 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
721 }
722 }
723 }
724
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,qmax)725 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, qmax) {
726 TEST_REQUIRES_ARM_NEON_FMA;
727 for (uint32_t n = 1; n < 10; n += 2) {
728 for (size_t k = 1; k <= 5; k += 2) {
729 SpMMMicrokernelTester()
730 .mr(4)
731 .nr(1)
732 .m(8)
733 .n(n)
734 .k(k)
735 .sparsity(0.0f)
736 .qmax(128)
737 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
738 }
739 }
740 }
741
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,half_sparse)742 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, half_sparse) {
743 TEST_REQUIRES_ARM_NEON_FMA;
744 for (uint32_t n = 1; n < 10; n += 2) {
745 for (size_t k = 1; k <= 5; k += 2) {
746 SpMMMicrokernelTester()
747 .mr(4)
748 .nr(1)
749 .m(8)
750 .n(n)
751 .k(k)
752 .sparsity(0.5f)
753 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
754 }
755 }
756 }
757
TEST(F32_SPMM_MINMAX_4X1__NEONFMA,zero_weights)758 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, zero_weights) {
759 TEST_REQUIRES_ARM_NEON_FMA;
760 for (uint32_t n = 1; n < 10; n += 2) {
761 for (size_t k = 1; k <= 5; k += 2) {
762 SpMMMicrokernelTester()
763 .mr(4)
764 .nr(1)
765 .m(8)
766 .n(n)
767 .k(k)
768 .sparsity(1.0f)
769 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma, xnn_init_f32_minmax_scalar_params);
770 }
771 }
772 }
773 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
774
775
776 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,k_eq_1)777 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, k_eq_1) {
778 TEST_REQUIRES_ARM_NEON_FMA;
779 SpMMMicrokernelTester()
780 .mr(4)
781 .nr(1)
782 .m(4)
783 .n(1)
784 .k(1)
785 .sparsity(0.0f)
786 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
787 }
788
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,k_gt_1)789 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, k_gt_1) {
790 TEST_REQUIRES_ARM_NEON_FMA;
791 for (size_t k = 2; k < 10; k++) {
792 SpMMMicrokernelTester()
793 .mr(4)
794 .nr(1)
795 .m(4)
796 .n(1)
797 .k(k)
798 .sparsity(0.0f)
799 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
800 }
801 }
802
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,n_gt_1)803 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, n_gt_1) {
804 TEST_REQUIRES_ARM_NEON_FMA;
805 for (uint32_t n = 2; n < 10; n++) {
806 for (size_t k = 1; k <= 5; k += 2) {
807 SpMMMicrokernelTester()
808 .mr(4)
809 .nr(1)
810 .m(4)
811 .n(n)
812 .k(k)
813 .sparsity(0.0f)
814 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
815 }
816 }
817 }
818
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,m_lt_4)819 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, m_lt_4) {
820 TEST_REQUIRES_ARM_NEON_FMA;
821 for (uint32_t m = 1; m < 4; m++) {
822 for (uint32_t n = 1; n < 10; n += 2) {
823 for (size_t k = 1; k <= 5; k += 2) {
824 SpMMMicrokernelTester()
825 .mr(4)
826 .nr(1)
827 .m(m)
828 .n(n)
829 .k(k)
830 .sparsity(0.0f)
831 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
832 }
833 }
834 }
835 }
836
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,m_div_4)837 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, m_div_4) {
838 TEST_REQUIRES_ARM_NEON_FMA;
839 for (uint32_t m = 8; m <= 12; m += 4) {
840 for (uint32_t n = 1; n < 10; n += 2) {
841 for (size_t k = 1; k <= 5; k += 2) {
842 SpMMMicrokernelTester()
843 .mr(4)
844 .nr(1)
845 .m(m)
846 .n(n)
847 .k(k)
848 .sparsity(0.0f)
849 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
850 }
851 }
852 }
853 }
854
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,m_gt_4)855 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, m_gt_4) {
856 TEST_REQUIRES_ARM_NEON_FMA;
857 for (uint32_t m = 5; m < 8; m++) {
858 for (uint32_t n = 1; n < 10; n += 2) {
859 for (size_t k = 1; k <= 5; k += 2) {
860 SpMMMicrokernelTester()
861 .mr(4)
862 .nr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .sparsity(0.0f)
867 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
868 }
869 }
870 }
871 }
872
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,output_stride)873 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, output_stride) {
874 TEST_REQUIRES_ARM_NEON_FMA;
875 for (uint32_t n = 1; n < 10; n += 2) {
876 for (size_t k = 1; k <= 5; k += 2) {
877 SpMMMicrokernelTester()
878 .mr(4)
879 .nr(1)
880 .m(8)
881 .n(n)
882 .k(k)
883 .output_stride(11)
884 .sparsity(0.0f)
885 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
886 }
887 }
888 }
889
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,qmin)890 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, qmin) {
891 TEST_REQUIRES_ARM_NEON_FMA;
892 for (uint32_t n = 1; n < 10; n += 2) {
893 for (size_t k = 1; k <= 5; k += 2) {
894 SpMMMicrokernelTester()
895 .mr(4)
896 .nr(1)
897 .m(8)
898 .n(n)
899 .k(k)
900 .sparsity(0.0f)
901 .qmin(128)
902 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
903 }
904 }
905 }
906
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,qmax)907 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, qmax) {
908 TEST_REQUIRES_ARM_NEON_FMA;
909 for (uint32_t n = 1; n < 10; n += 2) {
910 for (size_t k = 1; k <= 5; k += 2) {
911 SpMMMicrokernelTester()
912 .mr(4)
913 .nr(1)
914 .m(8)
915 .n(n)
916 .k(k)
917 .sparsity(0.0f)
918 .qmax(128)
919 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
920 }
921 }
922 }
923
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,half_sparse)924 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, half_sparse) {
925 TEST_REQUIRES_ARM_NEON_FMA;
926 for (uint32_t n = 1; n < 10; n += 2) {
927 for (size_t k = 1; k <= 5; k += 2) {
928 SpMMMicrokernelTester()
929 .mr(4)
930 .nr(1)
931 .m(8)
932 .n(n)
933 .k(k)
934 .sparsity(0.5f)
935 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
936 }
937 }
938 }
939
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED,zero_weights)940 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, zero_weights) {
941 TEST_REQUIRES_ARM_NEON_FMA;
942 for (uint32_t n = 1; n < 10; n += 2) {
943 for (size_t k = 1; k <= 5; k += 2) {
944 SpMMMicrokernelTester()
945 .mr(4)
946 .nr(1)
947 .m(8)
948 .n(n)
949 .k(k)
950 .sparsity(1.0f)
951 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
952 }
953 }
954 }
955 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
956
957
958 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,k_eq_2)959 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_eq_2) {
960 TEST_REQUIRES_ARM_NEON_FMA;
961 SpMMMicrokernelTester()
962 .mr(4)
963 .nr(1)
964 .m(4)
965 .n(1)
966 .k(2)
967 .sparsity(0.0f)
968 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
969 }
970
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,k_lt_2)971 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_lt_2) {
972 TEST_REQUIRES_ARM_NEON_FMA;
973 for (size_t k = 1; k < 2; k++) {
974 SpMMMicrokernelTester()
975 .mr(4)
976 .nr(1)
977 .m(4)
978 .n(1)
979 .k(k)
980 .sparsity(0.0f)
981 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
982 }
983 }
984
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,k_gt_2)985 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_gt_2) {
986 TEST_REQUIRES_ARM_NEON_FMA;
987 for (size_t k = 3; k < 4; k++) {
988 SpMMMicrokernelTester()
989 .mr(4)
990 .nr(1)
991 .m(4)
992 .n(1)
993 .k(k)
994 .sparsity(0.0f)
995 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
996 }
997 }
998
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,k_div_2)999 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_div_2) {
1000 TEST_REQUIRES_ARM_NEON_FMA;
1001 for (size_t k = 4; k <= 20; k += 2) {
1002 SpMMMicrokernelTester()
1003 .mr(4)
1004 .nr(1)
1005 .m(4)
1006 .n(1)
1007 .k(k)
1008 .sparsity(0.0f)
1009 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1010 }
1011 }
1012
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,n_gt_1)1013 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, n_gt_1) {
1014 TEST_REQUIRES_ARM_NEON_FMA;
1015 for (uint32_t n = 2; n < 10; n++) {
1016 for (size_t k = 1; k <= 10; k += 3) {
1017 SpMMMicrokernelTester()
1018 .mr(4)
1019 .nr(1)
1020 .m(4)
1021 .n(n)
1022 .k(k)
1023 .sparsity(0.0f)
1024 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1025 }
1026 }
1027 }
1028
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,m_lt_4)1029 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, m_lt_4) {
1030 TEST_REQUIRES_ARM_NEON_FMA;
1031 for (uint32_t m = 1; m < 4; m++) {
1032 for (uint32_t n = 1; n < 10; n += 2) {
1033 for (size_t k = 1; k <= 10; k += 3) {
1034 SpMMMicrokernelTester()
1035 .mr(4)
1036 .nr(1)
1037 .m(m)
1038 .n(n)
1039 .k(k)
1040 .sparsity(0.0f)
1041 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1042 }
1043 }
1044 }
1045 }
1046
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,m_div_4)1047 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, m_div_4) {
1048 TEST_REQUIRES_ARM_NEON_FMA;
1049 for (uint32_t m = 8; m <= 12; m += 4) {
1050 for (uint32_t n = 1; n < 10; n += 2) {
1051 for (size_t k = 1; k <= 10; k += 3) {
1052 SpMMMicrokernelTester()
1053 .mr(4)
1054 .nr(1)
1055 .m(m)
1056 .n(n)
1057 .k(k)
1058 .sparsity(0.0f)
1059 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1060 }
1061 }
1062 }
1063 }
1064
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,m_gt_4)1065 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, m_gt_4) {
1066 TEST_REQUIRES_ARM_NEON_FMA;
1067 for (uint32_t m = 5; m < 8; m++) {
1068 for (uint32_t n = 1; n < 10; n += 2) {
1069 for (size_t k = 1; k <= 10; k += 3) {
1070 SpMMMicrokernelTester()
1071 .mr(4)
1072 .nr(1)
1073 .m(m)
1074 .n(n)
1075 .k(k)
1076 .sparsity(0.0f)
1077 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1078 }
1079 }
1080 }
1081 }
1082
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,output_stride)1083 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, output_stride) {
1084 TEST_REQUIRES_ARM_NEON_FMA;
1085 for (uint32_t n = 1; n < 10; n += 2) {
1086 for (size_t k = 1; k <= 10; k += 3) {
1087 SpMMMicrokernelTester()
1088 .mr(4)
1089 .nr(1)
1090 .m(8)
1091 .n(n)
1092 .k(k)
1093 .output_stride(11)
1094 .sparsity(0.0f)
1095 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1096 }
1097 }
1098 }
1099
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,qmin)1100 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, qmin) {
1101 TEST_REQUIRES_ARM_NEON_FMA;
1102 for (uint32_t n = 1; n < 10; n += 2) {
1103 for (size_t k = 1; k <= 10; k += 3) {
1104 SpMMMicrokernelTester()
1105 .mr(4)
1106 .nr(1)
1107 .m(8)
1108 .n(n)
1109 .k(k)
1110 .sparsity(0.0f)
1111 .qmin(128)
1112 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1113 }
1114 }
1115 }
1116
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,qmax)1117 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, qmax) {
1118 TEST_REQUIRES_ARM_NEON_FMA;
1119 for (uint32_t n = 1; n < 10; n += 2) {
1120 for (size_t k = 1; k <= 10; k += 3) {
1121 SpMMMicrokernelTester()
1122 .mr(4)
1123 .nr(1)
1124 .m(8)
1125 .n(n)
1126 .k(k)
1127 .sparsity(0.0f)
1128 .qmax(128)
1129 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1130 }
1131 }
1132 }
1133
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,half_sparse)1134 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, half_sparse) {
1135 TEST_REQUIRES_ARM_NEON_FMA;
1136 for (uint32_t n = 1; n < 10; n += 2) {
1137 for (size_t k = 1; k <= 10; k += 3) {
1138 SpMMMicrokernelTester()
1139 .mr(4)
1140 .nr(1)
1141 .m(8)
1142 .n(n)
1143 .k(k)
1144 .sparsity(0.5f)
1145 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1146 }
1147 }
1148 }
1149
TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2,zero_weights)1150 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, zero_weights) {
1151 TEST_REQUIRES_ARM_NEON_FMA;
1152 for (uint32_t n = 1; n < 10; n += 2) {
1153 for (size_t k = 1; k <= 10; k += 3) {
1154 SpMMMicrokernelTester()
1155 .mr(4)
1156 .nr(1)
1157 .m(8)
1158 .n(n)
1159 .k(k)
1160 .sparsity(1.0f)
1161 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
1162 }
1163 }
1164 }
1165 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1166
1167
1168 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,k_eq_1)1169 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_eq_1) {
1170 TEST_REQUIRES_ARM_NEON_FMA;
1171 SpMMMicrokernelTester()
1172 .mr(4)
1173 .nr(2)
1174 .m(4)
1175 .n(2)
1176 .k(1)
1177 .sparsity(0.0f)
1178 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1179 }
1180
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,k_eq_1_subtile)1181 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_eq_1_subtile) {
1182 TEST_REQUIRES_ARM_NEON_FMA;
1183 for (uint32_t n = 1; n <= 2; n++) {
1184 SpMMMicrokernelTester()
1185 .mr(4)
1186 .nr(2)
1187 .m(4)
1188 .n(n)
1189 .k(1)
1190 .sparsity(0.0f)
1191 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1192 }
1193 }
1194
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,k_gt_1)1195 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_gt_1) {
1196 TEST_REQUIRES_ARM_NEON_FMA;
1197 for (size_t k = 2; k < 10; k++) {
1198 SpMMMicrokernelTester()
1199 .mr(4)
1200 .nr(2)
1201 .m(4)
1202 .n(2)
1203 .k(k)
1204 .sparsity(0.0f)
1205 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1206 }
1207 }
1208
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,k_gt_1_subtile)1209 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_gt_1_subtile) {
1210 TEST_REQUIRES_ARM_NEON_FMA;
1211 for (size_t k = 2; k < 10; k++) {
1212 for (uint32_t n = 1; n <= 2; n++) {
1213 SpMMMicrokernelTester()
1214 .mr(4)
1215 .nr(2)
1216 .m(4)
1217 .n(n)
1218 .k(k)
1219 .sparsity(0.0f)
1220 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1221 }
1222 }
1223 }
1224
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,n_gt_2)1225 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, n_gt_2) {
1226 TEST_REQUIRES_ARM_NEON_FMA;
1227 for (uint32_t n = 3; n < 10; n++) {
1228 for (size_t k = 1; k <= 5; k += 2) {
1229 SpMMMicrokernelTester()
1230 .mr(4)
1231 .nr(2)
1232 .m(4)
1233 .n(n)
1234 .k(k)
1235 .sparsity(0.0f)
1236 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1237 }
1238 }
1239 }
1240
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,n_div_2)1241 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, n_div_2) {
1242 TEST_REQUIRES_ARM_NEON_FMA;
1243 for (uint32_t n = 4; n <= 6; n += 2) {
1244 for (size_t k = 1; k <= 5; k += 2) {
1245 SpMMMicrokernelTester()
1246 .mr(4)
1247 .nr(2)
1248 .m(4)
1249 .n(n)
1250 .k(k)
1251 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1252 }
1253 }
1254 }
1255
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,m_lt_4)1256 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, m_lt_4) {
1257 TEST_REQUIRES_ARM_NEON_FMA;
1258 for (uint32_t m = 1; m < 4; m++) {
1259 for (uint32_t n = 1; n < 10; n += 3) {
1260 for (size_t k = 1; k <= 5; k += 2) {
1261 SpMMMicrokernelTester()
1262 .mr(4)
1263 .nr(2)
1264 .m(m)
1265 .n(n)
1266 .k(k)
1267 .sparsity(0.0f)
1268 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1269 }
1270 }
1271 }
1272 }
1273
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,m_div_4)1274 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, m_div_4) {
1275 TEST_REQUIRES_ARM_NEON_FMA;
1276 for (uint32_t m = 8; m <= 12; m += 4) {
1277 for (uint32_t n = 1; n < 10; n += 3) {
1278 for (size_t k = 1; k <= 5; k += 2) {
1279 SpMMMicrokernelTester()
1280 .mr(4)
1281 .nr(2)
1282 .m(m)
1283 .n(n)
1284 .k(k)
1285 .sparsity(0.0f)
1286 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1287 }
1288 }
1289 }
1290 }
1291
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,m_gt_4)1292 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, m_gt_4) {
1293 TEST_REQUIRES_ARM_NEON_FMA;
1294 for (uint32_t m = 5; m < 8; m++) {
1295 for (uint32_t n = 1; n < 10; n += 3) {
1296 for (size_t k = 1; k <= 5; k += 2) {
1297 SpMMMicrokernelTester()
1298 .mr(4)
1299 .nr(2)
1300 .m(m)
1301 .n(n)
1302 .k(k)
1303 .sparsity(0.0f)
1304 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1305 }
1306 }
1307 }
1308 }
1309
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,output_stride)1310 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, output_stride) {
1311 TEST_REQUIRES_ARM_NEON_FMA;
1312 for (uint32_t n = 1; n < 10; n += 3) {
1313 for (size_t k = 1; k <= 5; k += 2) {
1314 SpMMMicrokernelTester()
1315 .mr(4)
1316 .nr(2)
1317 .m(8)
1318 .n(n)
1319 .k(k)
1320 .output_stride(11)
1321 .sparsity(0.0f)
1322 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1323 }
1324 }
1325 }
1326
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,qmin)1327 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, qmin) {
1328 TEST_REQUIRES_ARM_NEON_FMA;
1329 for (uint32_t n = 1; n < 10; n += 3) {
1330 for (size_t k = 1; k <= 5; k += 2) {
1331 SpMMMicrokernelTester()
1332 .mr(4)
1333 .nr(2)
1334 .m(8)
1335 .n(n)
1336 .k(k)
1337 .sparsity(0.0f)
1338 .qmin(128)
1339 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1340 }
1341 }
1342 }
1343
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,qmax)1344 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, qmax) {
1345 TEST_REQUIRES_ARM_NEON_FMA;
1346 for (uint32_t n = 1; n < 10; n += 3) {
1347 for (size_t k = 1; k <= 5; k += 2) {
1348 SpMMMicrokernelTester()
1349 .mr(4)
1350 .nr(2)
1351 .m(8)
1352 .n(n)
1353 .k(k)
1354 .sparsity(0.0f)
1355 .qmax(128)
1356 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1357 }
1358 }
1359 }
1360
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,half_sparse)1361 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, half_sparse) {
1362 TEST_REQUIRES_ARM_NEON_FMA;
1363 for (uint32_t n = 1; n < 10; n += 3) {
1364 for (size_t k = 1; k <= 5; k += 2) {
1365 SpMMMicrokernelTester()
1366 .mr(4)
1367 .nr(2)
1368 .m(8)
1369 .n(n)
1370 .k(k)
1371 .sparsity(0.5f)
1372 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1373 }
1374 }
1375 }
1376
TEST(F32_SPMM_MINMAX_4X2__NEONFMA,zero_weights)1377 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, zero_weights) {
1378 TEST_REQUIRES_ARM_NEON_FMA;
1379 for (uint32_t n = 1; n < 10; n += 3) {
1380 for (size_t k = 1; k <= 5; k += 2) {
1381 SpMMMicrokernelTester()
1382 .mr(4)
1383 .nr(2)
1384 .m(8)
1385 .n(n)
1386 .k(k)
1387 .sparsity(1.0f)
1388 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma, xnn_init_f32_minmax_scalar_params);
1389 }
1390 }
1391 }
1392 #endif // XNN_ARCH_ARM64
1393
1394
1395 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,k_eq_1)1396 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_eq_1) {
1397 TEST_REQUIRES_ARM_NEON_FMA;
1398 SpMMMicrokernelTester()
1399 .mr(4)
1400 .nr(4)
1401 .m(4)
1402 .n(4)
1403 .k(1)
1404 .sparsity(0.0f)
1405 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1406 }
1407
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,k_eq_1_subtile)1408 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_eq_1_subtile) {
1409 TEST_REQUIRES_ARM_NEON_FMA;
1410 for (uint32_t n = 1; n <= 4; n++) {
1411 SpMMMicrokernelTester()
1412 .mr(4)
1413 .nr(4)
1414 .m(4)
1415 .n(n)
1416 .k(1)
1417 .sparsity(0.0f)
1418 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1419 }
1420 }
1421
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,k_gt_1)1422 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_gt_1) {
1423 TEST_REQUIRES_ARM_NEON_FMA;
1424 for (size_t k = 2; k < 10; k++) {
1425 SpMMMicrokernelTester()
1426 .mr(4)
1427 .nr(4)
1428 .m(4)
1429 .n(4)
1430 .k(k)
1431 .sparsity(0.0f)
1432 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1433 }
1434 }
1435
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,k_gt_1_subtile)1436 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_gt_1_subtile) {
1437 TEST_REQUIRES_ARM_NEON_FMA;
1438 for (size_t k = 2; k < 10; k++) {
1439 for (uint32_t n = 1; n <= 4; n++) {
1440 SpMMMicrokernelTester()
1441 .mr(4)
1442 .nr(4)
1443 .m(4)
1444 .n(n)
1445 .k(k)
1446 .sparsity(0.0f)
1447 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1448 }
1449 }
1450 }
1451
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,n_gt_4)1452 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, n_gt_4) {
1453 TEST_REQUIRES_ARM_NEON_FMA;
1454 for (uint32_t n = 5; n < 10; n++) {
1455 for (size_t k = 1; k <= 5; k += 2) {
1456 SpMMMicrokernelTester()
1457 .mr(4)
1458 .nr(4)
1459 .m(4)
1460 .n(n)
1461 .k(k)
1462 .sparsity(0.0f)
1463 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1464 }
1465 }
1466 }
1467
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,n_div_4)1468 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, n_div_4) {
1469 TEST_REQUIRES_ARM_NEON_FMA;
1470 for (uint32_t n = 8; n <= 12; n += 4) {
1471 for (size_t k = 1; k <= 5; k += 2) {
1472 SpMMMicrokernelTester()
1473 .mr(4)
1474 .nr(4)
1475 .m(4)
1476 .n(n)
1477 .k(k)
1478 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1479 }
1480 }
1481 }
1482
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,m_lt_4)1483 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, m_lt_4) {
1484 TEST_REQUIRES_ARM_NEON_FMA;
1485 for (uint32_t m = 1; m < 4; m++) {
1486 for (uint32_t n = 1; n < 20; n += 5) {
1487 for (size_t k = 1; k <= 5; k += 2) {
1488 SpMMMicrokernelTester()
1489 .mr(4)
1490 .nr(4)
1491 .m(m)
1492 .n(n)
1493 .k(k)
1494 .sparsity(0.0f)
1495 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1496 }
1497 }
1498 }
1499 }
1500
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,m_div_4)1501 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, m_div_4) {
1502 TEST_REQUIRES_ARM_NEON_FMA;
1503 for (uint32_t m = 8; m <= 12; m += 4) {
1504 for (uint32_t n = 1; n < 20; n += 5) {
1505 for (size_t k = 1; k <= 5; k += 2) {
1506 SpMMMicrokernelTester()
1507 .mr(4)
1508 .nr(4)
1509 .m(m)
1510 .n(n)
1511 .k(k)
1512 .sparsity(0.0f)
1513 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1514 }
1515 }
1516 }
1517 }
1518
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,m_gt_4)1519 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, m_gt_4) {
1520 TEST_REQUIRES_ARM_NEON_FMA;
1521 for (uint32_t m = 5; m < 8; m++) {
1522 for (uint32_t n = 1; n < 20; n += 5) {
1523 for (size_t k = 1; k <= 5; k += 2) {
1524 SpMMMicrokernelTester()
1525 .mr(4)
1526 .nr(4)
1527 .m(m)
1528 .n(n)
1529 .k(k)
1530 .sparsity(0.0f)
1531 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1532 }
1533 }
1534 }
1535 }
1536
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,output_stride)1537 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, output_stride) {
1538 TEST_REQUIRES_ARM_NEON_FMA;
1539 for (uint32_t n = 1; n < 20; n += 5) {
1540 for (size_t k = 1; k <= 5; k += 2) {
1541 SpMMMicrokernelTester()
1542 .mr(4)
1543 .nr(4)
1544 .m(8)
1545 .n(n)
1546 .k(k)
1547 .output_stride(11)
1548 .sparsity(0.0f)
1549 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1550 }
1551 }
1552 }
1553
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,qmin)1554 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, qmin) {
1555 TEST_REQUIRES_ARM_NEON_FMA;
1556 for (uint32_t n = 1; n < 20; n += 5) {
1557 for (size_t k = 1; k <= 5; k += 2) {
1558 SpMMMicrokernelTester()
1559 .mr(4)
1560 .nr(4)
1561 .m(8)
1562 .n(n)
1563 .k(k)
1564 .sparsity(0.0f)
1565 .qmin(128)
1566 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1567 }
1568 }
1569 }
1570
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,qmax)1571 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, qmax) {
1572 TEST_REQUIRES_ARM_NEON_FMA;
1573 for (uint32_t n = 1; n < 20; n += 5) {
1574 for (size_t k = 1; k <= 5; k += 2) {
1575 SpMMMicrokernelTester()
1576 .mr(4)
1577 .nr(4)
1578 .m(8)
1579 .n(n)
1580 .k(k)
1581 .sparsity(0.0f)
1582 .qmax(128)
1583 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1584 }
1585 }
1586 }
1587
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,half_sparse)1588 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, half_sparse) {
1589 TEST_REQUIRES_ARM_NEON_FMA;
1590 for (uint32_t n = 1; n < 20; n += 5) {
1591 for (size_t k = 1; k <= 5; k += 2) {
1592 SpMMMicrokernelTester()
1593 .mr(4)
1594 .nr(4)
1595 .m(8)
1596 .n(n)
1597 .k(k)
1598 .sparsity(0.5f)
1599 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1600 }
1601 }
1602 }
1603
TEST(F32_SPMM_MINMAX_4X4__NEONFMA,zero_weights)1604 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, zero_weights) {
1605 TEST_REQUIRES_ARM_NEON_FMA;
1606 for (uint32_t n = 1; n < 20; n += 5) {
1607 for (size_t k = 1; k <= 5; k += 2) {
1608 SpMMMicrokernelTester()
1609 .mr(4)
1610 .nr(4)
1611 .m(8)
1612 .n(n)
1613 .k(k)
1614 .sparsity(1.0f)
1615 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma, xnn_init_f32_minmax_scalar_params);
1616 }
1617 }
1618 }
1619 #endif // XNN_ARCH_ARM64
1620
1621
1622 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X1__NEON,k_eq_1)1623 TEST(F32_SPMM_MINMAX_8X1__NEON, k_eq_1) {
1624 TEST_REQUIRES_ARM_NEON;
1625 SpMMMicrokernelTester()
1626 .mr(8)
1627 .nr(1)
1628 .m(8)
1629 .n(1)
1630 .k(1)
1631 .sparsity(0.0f)
1632 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1633 }
1634
TEST(F32_SPMM_MINMAX_8X1__NEON,k_gt_1)1635 TEST(F32_SPMM_MINMAX_8X1__NEON, k_gt_1) {
1636 TEST_REQUIRES_ARM_NEON;
1637 for (size_t k = 2; k < 10; k++) {
1638 SpMMMicrokernelTester()
1639 .mr(8)
1640 .nr(1)
1641 .m(8)
1642 .n(1)
1643 .k(k)
1644 .sparsity(0.0f)
1645 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1646 }
1647 }
1648
TEST(F32_SPMM_MINMAX_8X1__NEON,n_gt_1)1649 TEST(F32_SPMM_MINMAX_8X1__NEON, n_gt_1) {
1650 TEST_REQUIRES_ARM_NEON;
1651 for (uint32_t n = 2; n < 10; n++) {
1652 for (size_t k = 1; k <= 5; k += 2) {
1653 SpMMMicrokernelTester()
1654 .mr(8)
1655 .nr(1)
1656 .m(8)
1657 .n(n)
1658 .k(k)
1659 .sparsity(0.0f)
1660 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1661 }
1662 }
1663 }
1664
TEST(F32_SPMM_MINMAX_8X1__NEON,m_lt_8)1665 TEST(F32_SPMM_MINMAX_8X1__NEON, m_lt_8) {
1666 TEST_REQUIRES_ARM_NEON;
1667 for (uint32_t m = 1; m < 8; m++) {
1668 for (uint32_t n = 1; n < 10; n += 2) {
1669 for (size_t k = 1; k <= 5; k += 2) {
1670 SpMMMicrokernelTester()
1671 .mr(8)
1672 .nr(1)
1673 .m(m)
1674 .n(n)
1675 .k(k)
1676 .sparsity(0.0f)
1677 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1678 }
1679 }
1680 }
1681 }
1682
TEST(F32_SPMM_MINMAX_8X1__NEON,m_div_8)1683 TEST(F32_SPMM_MINMAX_8X1__NEON, m_div_8) {
1684 TEST_REQUIRES_ARM_NEON;
1685 for (uint32_t m = 16; m <= 24; m += 8) {
1686 for (uint32_t n = 1; n < 10; n += 2) {
1687 for (size_t k = 1; k <= 5; k += 2) {
1688 SpMMMicrokernelTester()
1689 .mr(8)
1690 .nr(1)
1691 .m(m)
1692 .n(n)
1693 .k(k)
1694 .sparsity(0.0f)
1695 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1696 }
1697 }
1698 }
1699 }
1700
TEST(F32_SPMM_MINMAX_8X1__NEON,m_gt_8)1701 TEST(F32_SPMM_MINMAX_8X1__NEON, m_gt_8) {
1702 TEST_REQUIRES_ARM_NEON;
1703 for (uint32_t m = 9; m < 16; m++) {
1704 for (uint32_t n = 1; n < 10; n += 2) {
1705 for (size_t k = 1; k <= 5; k += 2) {
1706 SpMMMicrokernelTester()
1707 .mr(8)
1708 .nr(1)
1709 .m(m)
1710 .n(n)
1711 .k(k)
1712 .sparsity(0.0f)
1713 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1714 }
1715 }
1716 }
1717 }
1718
TEST(F32_SPMM_MINMAX_8X1__NEON,output_stride)1719 TEST(F32_SPMM_MINMAX_8X1__NEON, output_stride) {
1720 TEST_REQUIRES_ARM_NEON;
1721 for (uint32_t n = 1; n < 10; n += 2) {
1722 for (size_t k = 1; k <= 5; k += 2) {
1723 SpMMMicrokernelTester()
1724 .mr(8)
1725 .nr(1)
1726 .m(16)
1727 .n(n)
1728 .k(k)
1729 .output_stride(19)
1730 .sparsity(0.0f)
1731 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1732 }
1733 }
1734 }
1735
TEST(F32_SPMM_MINMAX_8X1__NEON,qmin)1736 TEST(F32_SPMM_MINMAX_8X1__NEON, qmin) {
1737 TEST_REQUIRES_ARM_NEON;
1738 for (uint32_t n = 1; n < 10; n += 2) {
1739 for (size_t k = 1; k <= 5; k += 2) {
1740 SpMMMicrokernelTester()
1741 .mr(8)
1742 .nr(1)
1743 .m(16)
1744 .n(n)
1745 .k(k)
1746 .sparsity(0.0f)
1747 .qmin(128)
1748 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1749 }
1750 }
1751 }
1752
TEST(F32_SPMM_MINMAX_8X1__NEON,qmax)1753 TEST(F32_SPMM_MINMAX_8X1__NEON, qmax) {
1754 TEST_REQUIRES_ARM_NEON;
1755 for (uint32_t n = 1; n < 10; n += 2) {
1756 for (size_t k = 1; k <= 5; k += 2) {
1757 SpMMMicrokernelTester()
1758 .mr(8)
1759 .nr(1)
1760 .m(16)
1761 .n(n)
1762 .k(k)
1763 .sparsity(0.0f)
1764 .qmax(128)
1765 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1766 }
1767 }
1768 }
1769
TEST(F32_SPMM_MINMAX_8X1__NEON,half_sparse)1770 TEST(F32_SPMM_MINMAX_8X1__NEON, half_sparse) {
1771 TEST_REQUIRES_ARM_NEON;
1772 for (uint32_t n = 1; n < 10; n += 2) {
1773 for (size_t k = 1; k <= 5; k += 2) {
1774 SpMMMicrokernelTester()
1775 .mr(8)
1776 .nr(1)
1777 .m(16)
1778 .n(n)
1779 .k(k)
1780 .sparsity(0.5f)
1781 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1782 }
1783 }
1784 }
1785
TEST(F32_SPMM_MINMAX_8X1__NEON,zero_weights)1786 TEST(F32_SPMM_MINMAX_8X1__NEON, zero_weights) {
1787 TEST_REQUIRES_ARM_NEON;
1788 for (uint32_t n = 1; n < 10; n += 2) {
1789 for (size_t k = 1; k <= 5; k += 2) {
1790 SpMMMicrokernelTester()
1791 .mr(8)
1792 .nr(1)
1793 .m(16)
1794 .n(n)
1795 .k(k)
1796 .sparsity(1.0f)
1797 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon, xnn_init_f32_minmax_scalar_params);
1798 }
1799 }
1800 }
1801 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1802
1803
1804 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,k_eq_1)1805 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, k_eq_1) {
1806 TEST_REQUIRES_ARM_NEON;
1807 SpMMMicrokernelTester()
1808 .mr(8)
1809 .nr(1)
1810 .m(8)
1811 .n(1)
1812 .k(1)
1813 .sparsity(0.0f)
1814 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1815 }
1816
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,k_gt_1)1817 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, k_gt_1) {
1818 TEST_REQUIRES_ARM_NEON;
1819 for (size_t k = 2; k < 10; k++) {
1820 SpMMMicrokernelTester()
1821 .mr(8)
1822 .nr(1)
1823 .m(8)
1824 .n(1)
1825 .k(k)
1826 .sparsity(0.0f)
1827 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1828 }
1829 }
1830
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,n_gt_1)1831 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, n_gt_1) {
1832 TEST_REQUIRES_ARM_NEON;
1833 for (uint32_t n = 2; n < 10; n++) {
1834 for (size_t k = 1; k <= 5; k += 2) {
1835 SpMMMicrokernelTester()
1836 .mr(8)
1837 .nr(1)
1838 .m(8)
1839 .n(n)
1840 .k(k)
1841 .sparsity(0.0f)
1842 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1843 }
1844 }
1845 }
1846
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,m_lt_8)1847 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, m_lt_8) {
1848 TEST_REQUIRES_ARM_NEON;
1849 for (uint32_t m = 1; m < 8; m++) {
1850 for (uint32_t n = 1; n < 10; n += 2) {
1851 for (size_t k = 1; k <= 5; k += 2) {
1852 SpMMMicrokernelTester()
1853 .mr(8)
1854 .nr(1)
1855 .m(m)
1856 .n(n)
1857 .k(k)
1858 .sparsity(0.0f)
1859 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1860 }
1861 }
1862 }
1863 }
1864
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,m_div_8)1865 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, m_div_8) {
1866 TEST_REQUIRES_ARM_NEON;
1867 for (uint32_t m = 16; m <= 24; m += 8) {
1868 for (uint32_t n = 1; n < 10; n += 2) {
1869 for (size_t k = 1; k <= 5; k += 2) {
1870 SpMMMicrokernelTester()
1871 .mr(8)
1872 .nr(1)
1873 .m(m)
1874 .n(n)
1875 .k(k)
1876 .sparsity(0.0f)
1877 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1878 }
1879 }
1880 }
1881 }
1882
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,m_gt_8)1883 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, m_gt_8) {
1884 TEST_REQUIRES_ARM_NEON;
1885 for (uint32_t m = 9; m < 16; m++) {
1886 for (uint32_t n = 1; n < 10; n += 2) {
1887 for (size_t k = 1; k <= 5; k += 2) {
1888 SpMMMicrokernelTester()
1889 .mr(8)
1890 .nr(1)
1891 .m(m)
1892 .n(n)
1893 .k(k)
1894 .sparsity(0.0f)
1895 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1896 }
1897 }
1898 }
1899 }
1900
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,output_stride)1901 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, output_stride) {
1902 TEST_REQUIRES_ARM_NEON;
1903 for (uint32_t n = 1; n < 10; n += 2) {
1904 for (size_t k = 1; k <= 5; k += 2) {
1905 SpMMMicrokernelTester()
1906 .mr(8)
1907 .nr(1)
1908 .m(16)
1909 .n(n)
1910 .k(k)
1911 .output_stride(19)
1912 .sparsity(0.0f)
1913 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1914 }
1915 }
1916 }
1917
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,qmin)1918 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, qmin) {
1919 TEST_REQUIRES_ARM_NEON;
1920 for (uint32_t n = 1; n < 10; n += 2) {
1921 for (size_t k = 1; k <= 5; k += 2) {
1922 SpMMMicrokernelTester()
1923 .mr(8)
1924 .nr(1)
1925 .m(16)
1926 .n(n)
1927 .k(k)
1928 .sparsity(0.0f)
1929 .qmin(128)
1930 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1931 }
1932 }
1933 }
1934
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,qmax)1935 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, qmax) {
1936 TEST_REQUIRES_ARM_NEON;
1937 for (uint32_t n = 1; n < 10; n += 2) {
1938 for (size_t k = 1; k <= 5; k += 2) {
1939 SpMMMicrokernelTester()
1940 .mr(8)
1941 .nr(1)
1942 .m(16)
1943 .n(n)
1944 .k(k)
1945 .sparsity(0.0f)
1946 .qmax(128)
1947 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1948 }
1949 }
1950 }
1951
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,half_sparse)1952 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, half_sparse) {
1953 TEST_REQUIRES_ARM_NEON;
1954 for (uint32_t n = 1; n < 10; n += 2) {
1955 for (size_t k = 1; k <= 5; k += 2) {
1956 SpMMMicrokernelTester()
1957 .mr(8)
1958 .nr(1)
1959 .m(16)
1960 .n(n)
1961 .k(k)
1962 .sparsity(0.5f)
1963 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1964 }
1965 }
1966 }
1967
TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED,zero_weights)1968 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, zero_weights) {
1969 TEST_REQUIRES_ARM_NEON;
1970 for (uint32_t n = 1; n < 10; n += 2) {
1971 for (size_t k = 1; k <= 5; k += 2) {
1972 SpMMMicrokernelTester()
1973 .mr(8)
1974 .nr(1)
1975 .m(16)
1976 .n(n)
1977 .k(k)
1978 .sparsity(1.0f)
1979 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
1980 }
1981 }
1982 }
1983 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1984
1985
1986 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,k_eq_2)1987 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_eq_2) {
1988 TEST_REQUIRES_ARM_NEON;
1989 SpMMMicrokernelTester()
1990 .mr(8)
1991 .nr(1)
1992 .m(8)
1993 .n(1)
1994 .k(2)
1995 .sparsity(0.0f)
1996 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
1997 }
1998
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,k_lt_2)1999 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_lt_2) {
2000 TEST_REQUIRES_ARM_NEON;
2001 for (size_t k = 1; k < 2; k++) {
2002 SpMMMicrokernelTester()
2003 .mr(8)
2004 .nr(1)
2005 .m(8)
2006 .n(1)
2007 .k(k)
2008 .sparsity(0.0f)
2009 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2010 }
2011 }
2012
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,k_gt_2)2013 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_gt_2) {
2014 TEST_REQUIRES_ARM_NEON;
2015 for (size_t k = 3; k < 4; k++) {
2016 SpMMMicrokernelTester()
2017 .mr(8)
2018 .nr(1)
2019 .m(8)
2020 .n(1)
2021 .k(k)
2022 .sparsity(0.0f)
2023 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2024 }
2025 }
2026
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,k_div_2)2027 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_div_2) {
2028 TEST_REQUIRES_ARM_NEON;
2029 for (size_t k = 4; k <= 20; k += 2) {
2030 SpMMMicrokernelTester()
2031 .mr(8)
2032 .nr(1)
2033 .m(8)
2034 .n(1)
2035 .k(k)
2036 .sparsity(0.0f)
2037 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2038 }
2039 }
2040
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,n_gt_1)2041 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, n_gt_1) {
2042 TEST_REQUIRES_ARM_NEON;
2043 for (uint32_t n = 2; n < 10; n++) {
2044 for (size_t k = 1; k <= 10; k += 3) {
2045 SpMMMicrokernelTester()
2046 .mr(8)
2047 .nr(1)
2048 .m(8)
2049 .n(n)
2050 .k(k)
2051 .sparsity(0.0f)
2052 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2053 }
2054 }
2055 }
2056
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,m_lt_8)2057 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, m_lt_8) {
2058 TEST_REQUIRES_ARM_NEON;
2059 for (uint32_t m = 1; m < 8; m++) {
2060 for (uint32_t n = 1; n < 10; n += 2) {
2061 for (size_t k = 1; k <= 10; k += 3) {
2062 SpMMMicrokernelTester()
2063 .mr(8)
2064 .nr(1)
2065 .m(m)
2066 .n(n)
2067 .k(k)
2068 .sparsity(0.0f)
2069 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2070 }
2071 }
2072 }
2073 }
2074
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,m_div_8)2075 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, m_div_8) {
2076 TEST_REQUIRES_ARM_NEON;
2077 for (uint32_t m = 16; m <= 24; m += 8) {
2078 for (uint32_t n = 1; n < 10; n += 2) {
2079 for (size_t k = 1; k <= 10; k += 3) {
2080 SpMMMicrokernelTester()
2081 .mr(8)
2082 .nr(1)
2083 .m(m)
2084 .n(n)
2085 .k(k)
2086 .sparsity(0.0f)
2087 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2088 }
2089 }
2090 }
2091 }
2092
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,m_gt_8)2093 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, m_gt_8) {
2094 TEST_REQUIRES_ARM_NEON;
2095 for (uint32_t m = 9; m < 16; m++) {
2096 for (uint32_t n = 1; n < 10; n += 2) {
2097 for (size_t k = 1; k <= 10; k += 3) {
2098 SpMMMicrokernelTester()
2099 .mr(8)
2100 .nr(1)
2101 .m(m)
2102 .n(n)
2103 .k(k)
2104 .sparsity(0.0f)
2105 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2106 }
2107 }
2108 }
2109 }
2110
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,output_stride)2111 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, output_stride) {
2112 TEST_REQUIRES_ARM_NEON;
2113 for (uint32_t n = 1; n < 10; n += 2) {
2114 for (size_t k = 1; k <= 10; k += 3) {
2115 SpMMMicrokernelTester()
2116 .mr(8)
2117 .nr(1)
2118 .m(16)
2119 .n(n)
2120 .k(k)
2121 .output_stride(19)
2122 .sparsity(0.0f)
2123 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2124 }
2125 }
2126 }
2127
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,qmin)2128 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, qmin) {
2129 TEST_REQUIRES_ARM_NEON;
2130 for (uint32_t n = 1; n < 10; n += 2) {
2131 for (size_t k = 1; k <= 10; k += 3) {
2132 SpMMMicrokernelTester()
2133 .mr(8)
2134 .nr(1)
2135 .m(16)
2136 .n(n)
2137 .k(k)
2138 .sparsity(0.0f)
2139 .qmin(128)
2140 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2141 }
2142 }
2143 }
2144
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,qmax)2145 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, qmax) {
2146 TEST_REQUIRES_ARM_NEON;
2147 for (uint32_t n = 1; n < 10; n += 2) {
2148 for (size_t k = 1; k <= 10; k += 3) {
2149 SpMMMicrokernelTester()
2150 .mr(8)
2151 .nr(1)
2152 .m(16)
2153 .n(n)
2154 .k(k)
2155 .sparsity(0.0f)
2156 .qmax(128)
2157 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2158 }
2159 }
2160 }
2161
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,half_sparse)2162 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, half_sparse) {
2163 TEST_REQUIRES_ARM_NEON;
2164 for (uint32_t n = 1; n < 10; n += 2) {
2165 for (size_t k = 1; k <= 10; k += 3) {
2166 SpMMMicrokernelTester()
2167 .mr(8)
2168 .nr(1)
2169 .m(16)
2170 .n(n)
2171 .k(k)
2172 .sparsity(0.5f)
2173 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2174 }
2175 }
2176 }
2177
TEST(F32_SPMM_MINMAX_8X1__NEON_X2,zero_weights)2178 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, zero_weights) {
2179 TEST_REQUIRES_ARM_NEON;
2180 for (uint32_t n = 1; n < 10; n += 2) {
2181 for (size_t k = 1; k <= 10; k += 3) {
2182 SpMMMicrokernelTester()
2183 .mr(8)
2184 .nr(1)
2185 .m(16)
2186 .n(n)
2187 .k(k)
2188 .sparsity(1.0f)
2189 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, xnn_init_f32_minmax_scalar_params);
2190 }
2191 }
2192 }
2193 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2194
2195
2196 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,k_eq_1)2197 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, k_eq_1) {
2198 TEST_REQUIRES_ARM_NEON_FMA;
2199 SpMMMicrokernelTester()
2200 .mr(8)
2201 .nr(1)
2202 .m(8)
2203 .n(1)
2204 .k(1)
2205 .sparsity(0.0f)
2206 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2207 }
2208
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,k_gt_1)2209 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, k_gt_1) {
2210 TEST_REQUIRES_ARM_NEON_FMA;
2211 for (size_t k = 2; k < 10; k++) {
2212 SpMMMicrokernelTester()
2213 .mr(8)
2214 .nr(1)
2215 .m(8)
2216 .n(1)
2217 .k(k)
2218 .sparsity(0.0f)
2219 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2220 }
2221 }
2222
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,n_gt_1)2223 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, n_gt_1) {
2224 TEST_REQUIRES_ARM_NEON_FMA;
2225 for (uint32_t n = 2; n < 10; n++) {
2226 for (size_t k = 1; k <= 5; k += 2) {
2227 SpMMMicrokernelTester()
2228 .mr(8)
2229 .nr(1)
2230 .m(8)
2231 .n(n)
2232 .k(k)
2233 .sparsity(0.0f)
2234 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2235 }
2236 }
2237 }
2238
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,m_lt_8)2239 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, m_lt_8) {
2240 TEST_REQUIRES_ARM_NEON_FMA;
2241 for (uint32_t m = 1; m < 8; m++) {
2242 for (uint32_t n = 1; n < 10; n += 2) {
2243 for (size_t k = 1; k <= 5; k += 2) {
2244 SpMMMicrokernelTester()
2245 .mr(8)
2246 .nr(1)
2247 .m(m)
2248 .n(n)
2249 .k(k)
2250 .sparsity(0.0f)
2251 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2252 }
2253 }
2254 }
2255 }
2256
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,m_div_8)2257 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, m_div_8) {
2258 TEST_REQUIRES_ARM_NEON_FMA;
2259 for (uint32_t m = 16; m <= 24; m += 8) {
2260 for (uint32_t n = 1; n < 10; n += 2) {
2261 for (size_t k = 1; k <= 5; k += 2) {
2262 SpMMMicrokernelTester()
2263 .mr(8)
2264 .nr(1)
2265 .m(m)
2266 .n(n)
2267 .k(k)
2268 .sparsity(0.0f)
2269 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2270 }
2271 }
2272 }
2273 }
2274
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,m_gt_8)2275 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, m_gt_8) {
2276 TEST_REQUIRES_ARM_NEON_FMA;
2277 for (uint32_t m = 9; m < 16; m++) {
2278 for (uint32_t n = 1; n < 10; n += 2) {
2279 for (size_t k = 1; k <= 5; k += 2) {
2280 SpMMMicrokernelTester()
2281 .mr(8)
2282 .nr(1)
2283 .m(m)
2284 .n(n)
2285 .k(k)
2286 .sparsity(0.0f)
2287 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2288 }
2289 }
2290 }
2291 }
2292
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,output_stride)2293 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, output_stride) {
2294 TEST_REQUIRES_ARM_NEON_FMA;
2295 for (uint32_t n = 1; n < 10; n += 2) {
2296 for (size_t k = 1; k <= 5; k += 2) {
2297 SpMMMicrokernelTester()
2298 .mr(8)
2299 .nr(1)
2300 .m(16)
2301 .n(n)
2302 .k(k)
2303 .output_stride(19)
2304 .sparsity(0.0f)
2305 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2306 }
2307 }
2308 }
2309
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,qmin)2310 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, qmin) {
2311 TEST_REQUIRES_ARM_NEON_FMA;
2312 for (uint32_t n = 1; n < 10; n += 2) {
2313 for (size_t k = 1; k <= 5; k += 2) {
2314 SpMMMicrokernelTester()
2315 .mr(8)
2316 .nr(1)
2317 .m(16)
2318 .n(n)
2319 .k(k)
2320 .sparsity(0.0f)
2321 .qmin(128)
2322 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2323 }
2324 }
2325 }
2326
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,qmax)2327 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, qmax) {
2328 TEST_REQUIRES_ARM_NEON_FMA;
2329 for (uint32_t n = 1; n < 10; n += 2) {
2330 for (size_t k = 1; k <= 5; k += 2) {
2331 SpMMMicrokernelTester()
2332 .mr(8)
2333 .nr(1)
2334 .m(16)
2335 .n(n)
2336 .k(k)
2337 .sparsity(0.0f)
2338 .qmax(128)
2339 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2340 }
2341 }
2342 }
2343
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,half_sparse)2344 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, half_sparse) {
2345 TEST_REQUIRES_ARM_NEON_FMA;
2346 for (uint32_t n = 1; n < 10; n += 2) {
2347 for (size_t k = 1; k <= 5; k += 2) {
2348 SpMMMicrokernelTester()
2349 .mr(8)
2350 .nr(1)
2351 .m(16)
2352 .n(n)
2353 .k(k)
2354 .sparsity(0.5f)
2355 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2356 }
2357 }
2358 }
2359
TEST(F32_SPMM_MINMAX_8X1__NEONFMA,zero_weights)2360 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, zero_weights) {
2361 TEST_REQUIRES_ARM_NEON_FMA;
2362 for (uint32_t n = 1; n < 10; n += 2) {
2363 for (size_t k = 1; k <= 5; k += 2) {
2364 SpMMMicrokernelTester()
2365 .mr(8)
2366 .nr(1)
2367 .m(16)
2368 .n(n)
2369 .k(k)
2370 .sparsity(1.0f)
2371 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma, xnn_init_f32_minmax_scalar_params);
2372 }
2373 }
2374 }
2375 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2376
2377
2378 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,k_eq_1)2379 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, k_eq_1) {
2380 TEST_REQUIRES_ARM_NEON_FMA;
2381 SpMMMicrokernelTester()
2382 .mr(8)
2383 .nr(1)
2384 .m(8)
2385 .n(1)
2386 .k(1)
2387 .sparsity(0.0f)
2388 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2389 }
2390
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,k_gt_1)2391 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, k_gt_1) {
2392 TEST_REQUIRES_ARM_NEON_FMA;
2393 for (size_t k = 2; k < 10; k++) {
2394 SpMMMicrokernelTester()
2395 .mr(8)
2396 .nr(1)
2397 .m(8)
2398 .n(1)
2399 .k(k)
2400 .sparsity(0.0f)
2401 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2402 }
2403 }
2404
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,n_gt_1)2405 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, n_gt_1) {
2406 TEST_REQUIRES_ARM_NEON_FMA;
2407 for (uint32_t n = 2; n < 10; n++) {
2408 for (size_t k = 1; k <= 5; k += 2) {
2409 SpMMMicrokernelTester()
2410 .mr(8)
2411 .nr(1)
2412 .m(8)
2413 .n(n)
2414 .k(k)
2415 .sparsity(0.0f)
2416 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2417 }
2418 }
2419 }
2420
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,m_lt_8)2421 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, m_lt_8) {
2422 TEST_REQUIRES_ARM_NEON_FMA;
2423 for (uint32_t m = 1; m < 8; m++) {
2424 for (uint32_t n = 1; n < 10; n += 2) {
2425 for (size_t k = 1; k <= 5; k += 2) {
2426 SpMMMicrokernelTester()
2427 .mr(8)
2428 .nr(1)
2429 .m(m)
2430 .n(n)
2431 .k(k)
2432 .sparsity(0.0f)
2433 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2434 }
2435 }
2436 }
2437 }
2438
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,m_div_8)2439 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, m_div_8) {
2440 TEST_REQUIRES_ARM_NEON_FMA;
2441 for (uint32_t m = 16; m <= 24; m += 8) {
2442 for (uint32_t n = 1; n < 10; n += 2) {
2443 for (size_t k = 1; k <= 5; k += 2) {
2444 SpMMMicrokernelTester()
2445 .mr(8)
2446 .nr(1)
2447 .m(m)
2448 .n(n)
2449 .k(k)
2450 .sparsity(0.0f)
2451 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2452 }
2453 }
2454 }
2455 }
2456
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,m_gt_8)2457 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, m_gt_8) {
2458 TEST_REQUIRES_ARM_NEON_FMA;
2459 for (uint32_t m = 9; m < 16; m++) {
2460 for (uint32_t n = 1; n < 10; n += 2) {
2461 for (size_t k = 1; k <= 5; k += 2) {
2462 SpMMMicrokernelTester()
2463 .mr(8)
2464 .nr(1)
2465 .m(m)
2466 .n(n)
2467 .k(k)
2468 .sparsity(0.0f)
2469 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2470 }
2471 }
2472 }
2473 }
2474
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,output_stride)2475 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, output_stride) {
2476 TEST_REQUIRES_ARM_NEON_FMA;
2477 for (uint32_t n = 1; n < 10; n += 2) {
2478 for (size_t k = 1; k <= 5; k += 2) {
2479 SpMMMicrokernelTester()
2480 .mr(8)
2481 .nr(1)
2482 .m(16)
2483 .n(n)
2484 .k(k)
2485 .output_stride(19)
2486 .sparsity(0.0f)
2487 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2488 }
2489 }
2490 }
2491
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,qmin)2492 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, qmin) {
2493 TEST_REQUIRES_ARM_NEON_FMA;
2494 for (uint32_t n = 1; n < 10; n += 2) {
2495 for (size_t k = 1; k <= 5; k += 2) {
2496 SpMMMicrokernelTester()
2497 .mr(8)
2498 .nr(1)
2499 .m(16)
2500 .n(n)
2501 .k(k)
2502 .sparsity(0.0f)
2503 .qmin(128)
2504 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2505 }
2506 }
2507 }
2508
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,qmax)2509 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, qmax) {
2510 TEST_REQUIRES_ARM_NEON_FMA;
2511 for (uint32_t n = 1; n < 10; n += 2) {
2512 for (size_t k = 1; k <= 5; k += 2) {
2513 SpMMMicrokernelTester()
2514 .mr(8)
2515 .nr(1)
2516 .m(16)
2517 .n(n)
2518 .k(k)
2519 .sparsity(0.0f)
2520 .qmax(128)
2521 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2522 }
2523 }
2524 }
2525
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,half_sparse)2526 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, half_sparse) {
2527 TEST_REQUIRES_ARM_NEON_FMA;
2528 for (uint32_t n = 1; n < 10; n += 2) {
2529 for (size_t k = 1; k <= 5; k += 2) {
2530 SpMMMicrokernelTester()
2531 .mr(8)
2532 .nr(1)
2533 .m(16)
2534 .n(n)
2535 .k(k)
2536 .sparsity(0.5f)
2537 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2538 }
2539 }
2540 }
2541
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED,zero_weights)2542 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, zero_weights) {
2543 TEST_REQUIRES_ARM_NEON_FMA;
2544 for (uint32_t n = 1; n < 10; n += 2) {
2545 for (size_t k = 1; k <= 5; k += 2) {
2546 SpMMMicrokernelTester()
2547 .mr(8)
2548 .nr(1)
2549 .m(16)
2550 .n(n)
2551 .k(k)
2552 .sparsity(1.0f)
2553 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
2554 }
2555 }
2556 }
2557 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2558
2559
2560 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,k_eq_2)2561 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_eq_2) {
2562 TEST_REQUIRES_ARM_NEON_FMA;
2563 SpMMMicrokernelTester()
2564 .mr(8)
2565 .nr(1)
2566 .m(8)
2567 .n(1)
2568 .k(2)
2569 .sparsity(0.0f)
2570 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2571 }
2572
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,k_lt_2)2573 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_lt_2) {
2574 TEST_REQUIRES_ARM_NEON_FMA;
2575 for (size_t k = 1; k < 2; k++) {
2576 SpMMMicrokernelTester()
2577 .mr(8)
2578 .nr(1)
2579 .m(8)
2580 .n(1)
2581 .k(k)
2582 .sparsity(0.0f)
2583 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2584 }
2585 }
2586
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,k_gt_2)2587 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_gt_2) {
2588 TEST_REQUIRES_ARM_NEON_FMA;
2589 for (size_t k = 3; k < 4; k++) {
2590 SpMMMicrokernelTester()
2591 .mr(8)
2592 .nr(1)
2593 .m(8)
2594 .n(1)
2595 .k(k)
2596 .sparsity(0.0f)
2597 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2598 }
2599 }
2600
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,k_div_2)2601 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_div_2) {
2602 TEST_REQUIRES_ARM_NEON_FMA;
2603 for (size_t k = 4; k <= 20; k += 2) {
2604 SpMMMicrokernelTester()
2605 .mr(8)
2606 .nr(1)
2607 .m(8)
2608 .n(1)
2609 .k(k)
2610 .sparsity(0.0f)
2611 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2612 }
2613 }
2614
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,n_gt_1)2615 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, n_gt_1) {
2616 TEST_REQUIRES_ARM_NEON_FMA;
2617 for (uint32_t n = 2; n < 10; n++) {
2618 for (size_t k = 1; k <= 10; k += 3) {
2619 SpMMMicrokernelTester()
2620 .mr(8)
2621 .nr(1)
2622 .m(8)
2623 .n(n)
2624 .k(k)
2625 .sparsity(0.0f)
2626 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2627 }
2628 }
2629 }
2630
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,m_lt_8)2631 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, m_lt_8) {
2632 TEST_REQUIRES_ARM_NEON_FMA;
2633 for (uint32_t m = 1; m < 8; m++) {
2634 for (uint32_t n = 1; n < 10; n += 2) {
2635 for (size_t k = 1; k <= 10; k += 3) {
2636 SpMMMicrokernelTester()
2637 .mr(8)
2638 .nr(1)
2639 .m(m)
2640 .n(n)
2641 .k(k)
2642 .sparsity(0.0f)
2643 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2644 }
2645 }
2646 }
2647 }
2648
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,m_div_8)2649 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, m_div_8) {
2650 TEST_REQUIRES_ARM_NEON_FMA;
2651 for (uint32_t m = 16; m <= 24; m += 8) {
2652 for (uint32_t n = 1; n < 10; n += 2) {
2653 for (size_t k = 1; k <= 10; k += 3) {
2654 SpMMMicrokernelTester()
2655 .mr(8)
2656 .nr(1)
2657 .m(m)
2658 .n(n)
2659 .k(k)
2660 .sparsity(0.0f)
2661 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2662 }
2663 }
2664 }
2665 }
2666
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,m_gt_8)2667 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, m_gt_8) {
2668 TEST_REQUIRES_ARM_NEON_FMA;
2669 for (uint32_t m = 9; m < 16; m++) {
2670 for (uint32_t n = 1; n < 10; n += 2) {
2671 for (size_t k = 1; k <= 10; k += 3) {
2672 SpMMMicrokernelTester()
2673 .mr(8)
2674 .nr(1)
2675 .m(m)
2676 .n(n)
2677 .k(k)
2678 .sparsity(0.0f)
2679 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2680 }
2681 }
2682 }
2683 }
2684
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,output_stride)2685 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, output_stride) {
2686 TEST_REQUIRES_ARM_NEON_FMA;
2687 for (uint32_t n = 1; n < 10; n += 2) {
2688 for (size_t k = 1; k <= 10; k += 3) {
2689 SpMMMicrokernelTester()
2690 .mr(8)
2691 .nr(1)
2692 .m(16)
2693 .n(n)
2694 .k(k)
2695 .output_stride(19)
2696 .sparsity(0.0f)
2697 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2698 }
2699 }
2700 }
2701
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,qmin)2702 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, qmin) {
2703 TEST_REQUIRES_ARM_NEON_FMA;
2704 for (uint32_t n = 1; n < 10; n += 2) {
2705 for (size_t k = 1; k <= 10; k += 3) {
2706 SpMMMicrokernelTester()
2707 .mr(8)
2708 .nr(1)
2709 .m(16)
2710 .n(n)
2711 .k(k)
2712 .sparsity(0.0f)
2713 .qmin(128)
2714 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2715 }
2716 }
2717 }
2718
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,qmax)2719 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, qmax) {
2720 TEST_REQUIRES_ARM_NEON_FMA;
2721 for (uint32_t n = 1; n < 10; n += 2) {
2722 for (size_t k = 1; k <= 10; k += 3) {
2723 SpMMMicrokernelTester()
2724 .mr(8)
2725 .nr(1)
2726 .m(16)
2727 .n(n)
2728 .k(k)
2729 .sparsity(0.0f)
2730 .qmax(128)
2731 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2732 }
2733 }
2734 }
2735
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,half_sparse)2736 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, half_sparse) {
2737 TEST_REQUIRES_ARM_NEON_FMA;
2738 for (uint32_t n = 1; n < 10; n += 2) {
2739 for (size_t k = 1; k <= 10; k += 3) {
2740 SpMMMicrokernelTester()
2741 .mr(8)
2742 .nr(1)
2743 .m(16)
2744 .n(n)
2745 .k(k)
2746 .sparsity(0.5f)
2747 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2748 }
2749 }
2750 }
2751
TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2,zero_weights)2752 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, zero_weights) {
2753 TEST_REQUIRES_ARM_NEON_FMA;
2754 for (uint32_t n = 1; n < 10; n += 2) {
2755 for (size_t k = 1; k <= 10; k += 3) {
2756 SpMMMicrokernelTester()
2757 .mr(8)
2758 .nr(1)
2759 .m(16)
2760 .n(n)
2761 .k(k)
2762 .sparsity(1.0f)
2763 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
2764 }
2765 }
2766 }
2767 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2768
2769
2770 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,k_eq_1)2771 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_eq_1) {
2772 TEST_REQUIRES_ARM_NEON_FMA;
2773 SpMMMicrokernelTester()
2774 .mr(8)
2775 .nr(2)
2776 .m(8)
2777 .n(2)
2778 .k(1)
2779 .sparsity(0.0f)
2780 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2781 }
2782
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,k_eq_1_subtile)2783 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_eq_1_subtile) {
2784 TEST_REQUIRES_ARM_NEON_FMA;
2785 for (uint32_t n = 1; n <= 2; n++) {
2786 SpMMMicrokernelTester()
2787 .mr(8)
2788 .nr(2)
2789 .m(8)
2790 .n(n)
2791 .k(1)
2792 .sparsity(0.0f)
2793 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2794 }
2795 }
2796
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,k_gt_1)2797 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_gt_1) {
2798 TEST_REQUIRES_ARM_NEON_FMA;
2799 for (size_t k = 2; k < 10; k++) {
2800 SpMMMicrokernelTester()
2801 .mr(8)
2802 .nr(2)
2803 .m(8)
2804 .n(2)
2805 .k(k)
2806 .sparsity(0.0f)
2807 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2808 }
2809 }
2810
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,k_gt_1_subtile)2811 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_gt_1_subtile) {
2812 TEST_REQUIRES_ARM_NEON_FMA;
2813 for (size_t k = 2; k < 10; k++) {
2814 for (uint32_t n = 1; n <= 2; n++) {
2815 SpMMMicrokernelTester()
2816 .mr(8)
2817 .nr(2)
2818 .m(8)
2819 .n(n)
2820 .k(k)
2821 .sparsity(0.0f)
2822 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2823 }
2824 }
2825 }
2826
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,n_gt_2)2827 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, n_gt_2) {
2828 TEST_REQUIRES_ARM_NEON_FMA;
2829 for (uint32_t n = 3; n < 10; n++) {
2830 for (size_t k = 1; k <= 5; k += 2) {
2831 SpMMMicrokernelTester()
2832 .mr(8)
2833 .nr(2)
2834 .m(8)
2835 .n(n)
2836 .k(k)
2837 .sparsity(0.0f)
2838 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2839 }
2840 }
2841 }
2842
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,n_div_2)2843 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, n_div_2) {
2844 TEST_REQUIRES_ARM_NEON_FMA;
2845 for (uint32_t n = 4; n <= 6; n += 2) {
2846 for (size_t k = 1; k <= 5; k += 2) {
2847 SpMMMicrokernelTester()
2848 .mr(8)
2849 .nr(2)
2850 .m(8)
2851 .n(n)
2852 .k(k)
2853 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2854 }
2855 }
2856 }
2857
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,m_lt_8)2858 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, m_lt_8) {
2859 TEST_REQUIRES_ARM_NEON_FMA;
2860 for (uint32_t m = 1; m < 8; m++) {
2861 for (uint32_t n = 1; n < 10; n += 3) {
2862 for (size_t k = 1; k <= 5; k += 2) {
2863 SpMMMicrokernelTester()
2864 .mr(8)
2865 .nr(2)
2866 .m(m)
2867 .n(n)
2868 .k(k)
2869 .sparsity(0.0f)
2870 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2871 }
2872 }
2873 }
2874 }
2875
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,m_div_8)2876 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, m_div_8) {
2877 TEST_REQUIRES_ARM_NEON_FMA;
2878 for (uint32_t m = 16; m <= 24; m += 8) {
2879 for (uint32_t n = 1; n < 10; n += 3) {
2880 for (size_t k = 1; k <= 5; k += 2) {
2881 SpMMMicrokernelTester()
2882 .mr(8)
2883 .nr(2)
2884 .m(m)
2885 .n(n)
2886 .k(k)
2887 .sparsity(0.0f)
2888 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2889 }
2890 }
2891 }
2892 }
2893
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,m_gt_8)2894 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, m_gt_8) {
2895 TEST_REQUIRES_ARM_NEON_FMA;
2896 for (uint32_t m = 9; m < 16; m++) {
2897 for (uint32_t n = 1; n < 10; n += 3) {
2898 for (size_t k = 1; k <= 5; k += 2) {
2899 SpMMMicrokernelTester()
2900 .mr(8)
2901 .nr(2)
2902 .m(m)
2903 .n(n)
2904 .k(k)
2905 .sparsity(0.0f)
2906 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2907 }
2908 }
2909 }
2910 }
2911
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,output_stride)2912 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, output_stride) {
2913 TEST_REQUIRES_ARM_NEON_FMA;
2914 for (uint32_t n = 1; n < 10; n += 3) {
2915 for (size_t k = 1; k <= 5; k += 2) {
2916 SpMMMicrokernelTester()
2917 .mr(8)
2918 .nr(2)
2919 .m(16)
2920 .n(n)
2921 .k(k)
2922 .output_stride(19)
2923 .sparsity(0.0f)
2924 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2925 }
2926 }
2927 }
2928
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,qmin)2929 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, qmin) {
2930 TEST_REQUIRES_ARM_NEON_FMA;
2931 for (uint32_t n = 1; n < 10; n += 3) {
2932 for (size_t k = 1; k <= 5; k += 2) {
2933 SpMMMicrokernelTester()
2934 .mr(8)
2935 .nr(2)
2936 .m(16)
2937 .n(n)
2938 .k(k)
2939 .sparsity(0.0f)
2940 .qmin(128)
2941 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2942 }
2943 }
2944 }
2945
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,qmax)2946 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, qmax) {
2947 TEST_REQUIRES_ARM_NEON_FMA;
2948 for (uint32_t n = 1; n < 10; n += 3) {
2949 for (size_t k = 1; k <= 5; k += 2) {
2950 SpMMMicrokernelTester()
2951 .mr(8)
2952 .nr(2)
2953 .m(16)
2954 .n(n)
2955 .k(k)
2956 .sparsity(0.0f)
2957 .qmax(128)
2958 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2959 }
2960 }
2961 }
2962
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,half_sparse)2963 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, half_sparse) {
2964 TEST_REQUIRES_ARM_NEON_FMA;
2965 for (uint32_t n = 1; n < 10; n += 3) {
2966 for (size_t k = 1; k <= 5; k += 2) {
2967 SpMMMicrokernelTester()
2968 .mr(8)
2969 .nr(2)
2970 .m(16)
2971 .n(n)
2972 .k(k)
2973 .sparsity(0.5f)
2974 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2975 }
2976 }
2977 }
2978
TEST(F32_SPMM_MINMAX_8X2__NEONFMA,zero_weights)2979 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, zero_weights) {
2980 TEST_REQUIRES_ARM_NEON_FMA;
2981 for (uint32_t n = 1; n < 10; n += 3) {
2982 for (size_t k = 1; k <= 5; k += 2) {
2983 SpMMMicrokernelTester()
2984 .mr(8)
2985 .nr(2)
2986 .m(16)
2987 .n(n)
2988 .k(k)
2989 .sparsity(1.0f)
2990 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma, xnn_init_f32_minmax_scalar_params);
2991 }
2992 }
2993 }
2994 #endif // XNN_ARCH_ARM64
2995
2996
2997 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,k_eq_1)2998 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_eq_1) {
2999 TEST_REQUIRES_ARM_NEON_FMA;
3000 SpMMMicrokernelTester()
3001 .mr(8)
3002 .nr(4)
3003 .m(8)
3004 .n(4)
3005 .k(1)
3006 .sparsity(0.0f)
3007 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3008 }
3009
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,k_eq_1_subtile)3010 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_eq_1_subtile) {
3011 TEST_REQUIRES_ARM_NEON_FMA;
3012 for (uint32_t n = 1; n <= 4; n++) {
3013 SpMMMicrokernelTester()
3014 .mr(8)
3015 .nr(4)
3016 .m(8)
3017 .n(n)
3018 .k(1)
3019 .sparsity(0.0f)
3020 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3021 }
3022 }
3023
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,k_gt_1)3024 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_gt_1) {
3025 TEST_REQUIRES_ARM_NEON_FMA;
3026 for (size_t k = 2; k < 10; k++) {
3027 SpMMMicrokernelTester()
3028 .mr(8)
3029 .nr(4)
3030 .m(8)
3031 .n(4)
3032 .k(k)
3033 .sparsity(0.0f)
3034 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3035 }
3036 }
3037
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,k_gt_1_subtile)3038 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_gt_1_subtile) {
3039 TEST_REQUIRES_ARM_NEON_FMA;
3040 for (size_t k = 2; k < 10; k++) {
3041 for (uint32_t n = 1; n <= 4; n++) {
3042 SpMMMicrokernelTester()
3043 .mr(8)
3044 .nr(4)
3045 .m(8)
3046 .n(n)
3047 .k(k)
3048 .sparsity(0.0f)
3049 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3050 }
3051 }
3052 }
3053
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,n_gt_4)3054 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, n_gt_4) {
3055 TEST_REQUIRES_ARM_NEON_FMA;
3056 for (uint32_t n = 5; n < 10; n++) {
3057 for (size_t k = 1; k <= 5; k += 2) {
3058 SpMMMicrokernelTester()
3059 .mr(8)
3060 .nr(4)
3061 .m(8)
3062 .n(n)
3063 .k(k)
3064 .sparsity(0.0f)
3065 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3066 }
3067 }
3068 }
3069
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,n_div_4)3070 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, n_div_4) {
3071 TEST_REQUIRES_ARM_NEON_FMA;
3072 for (uint32_t n = 8; n <= 12; n += 4) {
3073 for (size_t k = 1; k <= 5; k += 2) {
3074 SpMMMicrokernelTester()
3075 .mr(8)
3076 .nr(4)
3077 .m(8)
3078 .n(n)
3079 .k(k)
3080 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3081 }
3082 }
3083 }
3084
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,m_lt_8)3085 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, m_lt_8) {
3086 TEST_REQUIRES_ARM_NEON_FMA;
3087 for (uint32_t m = 1; m < 8; m++) {
3088 for (uint32_t n = 1; n < 20; n += 5) {
3089 for (size_t k = 1; k <= 5; k += 2) {
3090 SpMMMicrokernelTester()
3091 .mr(8)
3092 .nr(4)
3093 .m(m)
3094 .n(n)
3095 .k(k)
3096 .sparsity(0.0f)
3097 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3098 }
3099 }
3100 }
3101 }
3102
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,m_div_8)3103 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, m_div_8) {
3104 TEST_REQUIRES_ARM_NEON_FMA;
3105 for (uint32_t m = 16; m <= 24; m += 8) {
3106 for (uint32_t n = 1; n < 20; n += 5) {
3107 for (size_t k = 1; k <= 5; k += 2) {
3108 SpMMMicrokernelTester()
3109 .mr(8)
3110 .nr(4)
3111 .m(m)
3112 .n(n)
3113 .k(k)
3114 .sparsity(0.0f)
3115 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3116 }
3117 }
3118 }
3119 }
3120
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,m_gt_8)3121 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, m_gt_8) {
3122 TEST_REQUIRES_ARM_NEON_FMA;
3123 for (uint32_t m = 9; m < 16; m++) {
3124 for (uint32_t n = 1; n < 20; n += 5) {
3125 for (size_t k = 1; k <= 5; k += 2) {
3126 SpMMMicrokernelTester()
3127 .mr(8)
3128 .nr(4)
3129 .m(m)
3130 .n(n)
3131 .k(k)
3132 .sparsity(0.0f)
3133 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3134 }
3135 }
3136 }
3137 }
3138
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,output_stride)3139 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, output_stride) {
3140 TEST_REQUIRES_ARM_NEON_FMA;
3141 for (uint32_t n = 1; n < 20; n += 5) {
3142 for (size_t k = 1; k <= 5; k += 2) {
3143 SpMMMicrokernelTester()
3144 .mr(8)
3145 .nr(4)
3146 .m(16)
3147 .n(n)
3148 .k(k)
3149 .output_stride(19)
3150 .sparsity(0.0f)
3151 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3152 }
3153 }
3154 }
3155
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,qmin)3156 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, qmin) {
3157 TEST_REQUIRES_ARM_NEON_FMA;
3158 for (uint32_t n = 1; n < 20; n += 5) {
3159 for (size_t k = 1; k <= 5; k += 2) {
3160 SpMMMicrokernelTester()
3161 .mr(8)
3162 .nr(4)
3163 .m(16)
3164 .n(n)
3165 .k(k)
3166 .sparsity(0.0f)
3167 .qmin(128)
3168 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3169 }
3170 }
3171 }
3172
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,qmax)3173 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, qmax) {
3174 TEST_REQUIRES_ARM_NEON_FMA;
3175 for (uint32_t n = 1; n < 20; n += 5) {
3176 for (size_t k = 1; k <= 5; k += 2) {
3177 SpMMMicrokernelTester()
3178 .mr(8)
3179 .nr(4)
3180 .m(16)
3181 .n(n)
3182 .k(k)
3183 .sparsity(0.0f)
3184 .qmax(128)
3185 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3186 }
3187 }
3188 }
3189
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,half_sparse)3190 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, half_sparse) {
3191 TEST_REQUIRES_ARM_NEON_FMA;
3192 for (uint32_t n = 1; n < 20; n += 5) {
3193 for (size_t k = 1; k <= 5; k += 2) {
3194 SpMMMicrokernelTester()
3195 .mr(8)
3196 .nr(4)
3197 .m(16)
3198 .n(n)
3199 .k(k)
3200 .sparsity(0.5f)
3201 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3202 }
3203 }
3204 }
3205
TEST(F32_SPMM_MINMAX_8X4__NEONFMA,zero_weights)3206 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, zero_weights) {
3207 TEST_REQUIRES_ARM_NEON_FMA;
3208 for (uint32_t n = 1; n < 20; n += 5) {
3209 for (size_t k = 1; k <= 5; k += 2) {
3210 SpMMMicrokernelTester()
3211 .mr(8)
3212 .nr(4)
3213 .m(16)
3214 .n(n)
3215 .k(k)
3216 .sparsity(1.0f)
3217 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma, xnn_init_f32_minmax_scalar_params);
3218 }
3219 }
3220 }
3221 #endif // XNN_ARCH_ARM64
3222
3223
3224 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_12X1__NEON,k_eq_1)3225 TEST(F32_SPMM_MINMAX_12X1__NEON, k_eq_1) {
3226 TEST_REQUIRES_ARM_NEON;
3227 SpMMMicrokernelTester()
3228 .mr(12)
3229 .nr(1)
3230 .m(12)
3231 .n(1)
3232 .k(1)
3233 .sparsity(0.0f)
3234 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3235 }
3236
TEST(F32_SPMM_MINMAX_12X1__NEON,k_gt_1)3237 TEST(F32_SPMM_MINMAX_12X1__NEON, k_gt_1) {
3238 TEST_REQUIRES_ARM_NEON;
3239 for (size_t k = 2; k < 10; k++) {
3240 SpMMMicrokernelTester()
3241 .mr(12)
3242 .nr(1)
3243 .m(12)
3244 .n(1)
3245 .k(k)
3246 .sparsity(0.0f)
3247 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3248 }
3249 }
3250
TEST(F32_SPMM_MINMAX_12X1__NEON,n_gt_1)3251 TEST(F32_SPMM_MINMAX_12X1__NEON, n_gt_1) {
3252 TEST_REQUIRES_ARM_NEON;
3253 for (uint32_t n = 2; n < 10; n++) {
3254 for (size_t k = 1; k <= 5; k += 2) {
3255 SpMMMicrokernelTester()
3256 .mr(12)
3257 .nr(1)
3258 .m(12)
3259 .n(n)
3260 .k(k)
3261 .sparsity(0.0f)
3262 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3263 }
3264 }
3265 }
3266
TEST(F32_SPMM_MINMAX_12X1__NEON,m_lt_12)3267 TEST(F32_SPMM_MINMAX_12X1__NEON, m_lt_12) {
3268 TEST_REQUIRES_ARM_NEON;
3269 for (uint32_t m = 1; m < 12; m++) {
3270 for (uint32_t n = 1; n < 10; n += 2) {
3271 for (size_t k = 1; k <= 5; k += 2) {
3272 SpMMMicrokernelTester()
3273 .mr(12)
3274 .nr(1)
3275 .m(m)
3276 .n(n)
3277 .k(k)
3278 .sparsity(0.0f)
3279 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3280 }
3281 }
3282 }
3283 }
3284
TEST(F32_SPMM_MINMAX_12X1__NEON,m_div_12)3285 TEST(F32_SPMM_MINMAX_12X1__NEON, m_div_12) {
3286 TEST_REQUIRES_ARM_NEON;
3287 for (uint32_t m = 24; m <= 36; m += 12) {
3288 for (uint32_t n = 1; n < 10; n += 2) {
3289 for (size_t k = 1; k <= 5; k += 2) {
3290 SpMMMicrokernelTester()
3291 .mr(12)
3292 .nr(1)
3293 .m(m)
3294 .n(n)
3295 .k(k)
3296 .sparsity(0.0f)
3297 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3298 }
3299 }
3300 }
3301 }
3302
TEST(F32_SPMM_MINMAX_12X1__NEON,m_gt_12)3303 TEST(F32_SPMM_MINMAX_12X1__NEON, m_gt_12) {
3304 TEST_REQUIRES_ARM_NEON;
3305 for (uint32_t m = 13; m < 24; m++) {
3306 for (uint32_t n = 1; n < 10; n += 2) {
3307 for (size_t k = 1; k <= 5; k += 2) {
3308 SpMMMicrokernelTester()
3309 .mr(12)
3310 .nr(1)
3311 .m(m)
3312 .n(n)
3313 .k(k)
3314 .sparsity(0.0f)
3315 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3316 }
3317 }
3318 }
3319 }
3320
TEST(F32_SPMM_MINMAX_12X1__NEON,output_stride)3321 TEST(F32_SPMM_MINMAX_12X1__NEON, output_stride) {
3322 TEST_REQUIRES_ARM_NEON;
3323 for (uint32_t n = 1; n < 10; n += 2) {
3324 for (size_t k = 1; k <= 5; k += 2) {
3325 SpMMMicrokernelTester()
3326 .mr(12)
3327 .nr(1)
3328 .m(24)
3329 .n(n)
3330 .k(k)
3331 .output_stride(29)
3332 .sparsity(0.0f)
3333 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3334 }
3335 }
3336 }
3337
TEST(F32_SPMM_MINMAX_12X1__NEON,qmin)3338 TEST(F32_SPMM_MINMAX_12X1__NEON, qmin) {
3339 TEST_REQUIRES_ARM_NEON;
3340 for (uint32_t n = 1; n < 10; n += 2) {
3341 for (size_t k = 1; k <= 5; k += 2) {
3342 SpMMMicrokernelTester()
3343 .mr(12)
3344 .nr(1)
3345 .m(24)
3346 .n(n)
3347 .k(k)
3348 .sparsity(0.0f)
3349 .qmin(128)
3350 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3351 }
3352 }
3353 }
3354
TEST(F32_SPMM_MINMAX_12X1__NEON,qmax)3355 TEST(F32_SPMM_MINMAX_12X1__NEON, qmax) {
3356 TEST_REQUIRES_ARM_NEON;
3357 for (uint32_t n = 1; n < 10; n += 2) {
3358 for (size_t k = 1; k <= 5; k += 2) {
3359 SpMMMicrokernelTester()
3360 .mr(12)
3361 .nr(1)
3362 .m(24)
3363 .n(n)
3364 .k(k)
3365 .sparsity(0.0f)
3366 .qmax(128)
3367 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3368 }
3369 }
3370 }
3371
TEST(F32_SPMM_MINMAX_12X1__NEON,half_sparse)3372 TEST(F32_SPMM_MINMAX_12X1__NEON, half_sparse) {
3373 TEST_REQUIRES_ARM_NEON;
3374 for (uint32_t n = 1; n < 10; n += 2) {
3375 for (size_t k = 1; k <= 5; k += 2) {
3376 SpMMMicrokernelTester()
3377 .mr(12)
3378 .nr(1)
3379 .m(24)
3380 .n(n)
3381 .k(k)
3382 .sparsity(0.5f)
3383 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3384 }
3385 }
3386 }
3387
TEST(F32_SPMM_MINMAX_12X1__NEON,zero_weights)3388 TEST(F32_SPMM_MINMAX_12X1__NEON, zero_weights) {
3389 TEST_REQUIRES_ARM_NEON;
3390 for (uint32_t n = 1; n < 10; n += 2) {
3391 for (size_t k = 1; k <= 5; k += 2) {
3392 SpMMMicrokernelTester()
3393 .mr(12)
3394 .nr(1)
3395 .m(24)
3396 .n(n)
3397 .k(k)
3398 .sparsity(1.0f)
3399 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon, xnn_init_f32_minmax_scalar_params);
3400 }
3401 }
3402 }
3403 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3404
3405
3406 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,k_eq_1)3407 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, k_eq_1) {
3408 TEST_REQUIRES_ARM_NEON_FMA;
3409 SpMMMicrokernelTester()
3410 .mr(12)
3411 .nr(1)
3412 .m(12)
3413 .n(1)
3414 .k(1)
3415 .sparsity(0.0f)
3416 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3417 }
3418
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,k_gt_1)3419 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, k_gt_1) {
3420 TEST_REQUIRES_ARM_NEON_FMA;
3421 for (size_t k = 2; k < 10; k++) {
3422 SpMMMicrokernelTester()
3423 .mr(12)
3424 .nr(1)
3425 .m(12)
3426 .n(1)
3427 .k(k)
3428 .sparsity(0.0f)
3429 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3430 }
3431 }
3432
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,n_gt_1)3433 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, n_gt_1) {
3434 TEST_REQUIRES_ARM_NEON_FMA;
3435 for (uint32_t n = 2; n < 10; n++) {
3436 for (size_t k = 1; k <= 5; k += 2) {
3437 SpMMMicrokernelTester()
3438 .mr(12)
3439 .nr(1)
3440 .m(12)
3441 .n(n)
3442 .k(k)
3443 .sparsity(0.0f)
3444 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3445 }
3446 }
3447 }
3448
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,m_lt_12)3449 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, m_lt_12) {
3450 TEST_REQUIRES_ARM_NEON_FMA;
3451 for (uint32_t m = 1; m < 12; m++) {
3452 for (uint32_t n = 1; n < 10; n += 2) {
3453 for (size_t k = 1; k <= 5; k += 2) {
3454 SpMMMicrokernelTester()
3455 .mr(12)
3456 .nr(1)
3457 .m(m)
3458 .n(n)
3459 .k(k)
3460 .sparsity(0.0f)
3461 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3462 }
3463 }
3464 }
3465 }
3466
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,m_div_12)3467 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, m_div_12) {
3468 TEST_REQUIRES_ARM_NEON_FMA;
3469 for (uint32_t m = 24; m <= 36; m += 12) {
3470 for (uint32_t n = 1; n < 10; n += 2) {
3471 for (size_t k = 1; k <= 5; k += 2) {
3472 SpMMMicrokernelTester()
3473 .mr(12)
3474 .nr(1)
3475 .m(m)
3476 .n(n)
3477 .k(k)
3478 .sparsity(0.0f)
3479 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3480 }
3481 }
3482 }
3483 }
3484
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,m_gt_12)3485 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, m_gt_12) {
3486 TEST_REQUIRES_ARM_NEON_FMA;
3487 for (uint32_t m = 13; m < 24; m++) {
3488 for (uint32_t n = 1; n < 10; n += 2) {
3489 for (size_t k = 1; k <= 5; k += 2) {
3490 SpMMMicrokernelTester()
3491 .mr(12)
3492 .nr(1)
3493 .m(m)
3494 .n(n)
3495 .k(k)
3496 .sparsity(0.0f)
3497 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3498 }
3499 }
3500 }
3501 }
3502
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,output_stride)3503 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, output_stride) {
3504 TEST_REQUIRES_ARM_NEON_FMA;
3505 for (uint32_t n = 1; n < 10; n += 2) {
3506 for (size_t k = 1; k <= 5; k += 2) {
3507 SpMMMicrokernelTester()
3508 .mr(12)
3509 .nr(1)
3510 .m(24)
3511 .n(n)
3512 .k(k)
3513 .output_stride(29)
3514 .sparsity(0.0f)
3515 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3516 }
3517 }
3518 }
3519
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,qmin)3520 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, qmin) {
3521 TEST_REQUIRES_ARM_NEON_FMA;
3522 for (uint32_t n = 1; n < 10; n += 2) {
3523 for (size_t k = 1; k <= 5; k += 2) {
3524 SpMMMicrokernelTester()
3525 .mr(12)
3526 .nr(1)
3527 .m(24)
3528 .n(n)
3529 .k(k)
3530 .sparsity(0.0f)
3531 .qmin(128)
3532 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3533 }
3534 }
3535 }
3536
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,qmax)3537 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, qmax) {
3538 TEST_REQUIRES_ARM_NEON_FMA;
3539 for (uint32_t n = 1; n < 10; n += 2) {
3540 for (size_t k = 1; k <= 5; k += 2) {
3541 SpMMMicrokernelTester()
3542 .mr(12)
3543 .nr(1)
3544 .m(24)
3545 .n(n)
3546 .k(k)
3547 .sparsity(0.0f)
3548 .qmax(128)
3549 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3550 }
3551 }
3552 }
3553
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,half_sparse)3554 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, half_sparse) {
3555 TEST_REQUIRES_ARM_NEON_FMA;
3556 for (uint32_t n = 1; n < 10; n += 2) {
3557 for (size_t k = 1; k <= 5; k += 2) {
3558 SpMMMicrokernelTester()
3559 .mr(12)
3560 .nr(1)
3561 .m(24)
3562 .n(n)
3563 .k(k)
3564 .sparsity(0.5f)
3565 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3566 }
3567 }
3568 }
3569
TEST(F32_SPMM_MINMAX_12X1__NEONFMA,zero_weights)3570 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, zero_weights) {
3571 TEST_REQUIRES_ARM_NEON_FMA;
3572 for (uint32_t n = 1; n < 10; n += 2) {
3573 for (size_t k = 1; k <= 5; k += 2) {
3574 SpMMMicrokernelTester()
3575 .mr(12)
3576 .nr(1)
3577 .m(24)
3578 .n(n)
3579 .k(k)
3580 .sparsity(1.0f)
3581 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma, xnn_init_f32_minmax_scalar_params);
3582 }
3583 }
3584 }
3585 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3586
3587
3588 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,k_eq_1)3589 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_eq_1) {
3590 TEST_REQUIRES_ARM_NEON_FMA;
3591 SpMMMicrokernelTester()
3592 .mr(12)
3593 .nr(2)
3594 .m(12)
3595 .n(2)
3596 .k(1)
3597 .sparsity(0.0f)
3598 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3599 }
3600
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,k_eq_1_subtile)3601 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_eq_1_subtile) {
3602 TEST_REQUIRES_ARM_NEON_FMA;
3603 for (uint32_t n = 1; n <= 2; n++) {
3604 SpMMMicrokernelTester()
3605 .mr(12)
3606 .nr(2)
3607 .m(12)
3608 .n(n)
3609 .k(1)
3610 .sparsity(0.0f)
3611 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3612 }
3613 }
3614
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,k_gt_1)3615 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_gt_1) {
3616 TEST_REQUIRES_ARM_NEON_FMA;
3617 for (size_t k = 2; k < 10; k++) {
3618 SpMMMicrokernelTester()
3619 .mr(12)
3620 .nr(2)
3621 .m(12)
3622 .n(2)
3623 .k(k)
3624 .sparsity(0.0f)
3625 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3626 }
3627 }
3628
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,k_gt_1_subtile)3629 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_gt_1_subtile) {
3630 TEST_REQUIRES_ARM_NEON_FMA;
3631 for (size_t k = 2; k < 10; k++) {
3632 for (uint32_t n = 1; n <= 2; n++) {
3633 SpMMMicrokernelTester()
3634 .mr(12)
3635 .nr(2)
3636 .m(12)
3637 .n(n)
3638 .k(k)
3639 .sparsity(0.0f)
3640 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3641 }
3642 }
3643 }
3644
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,n_gt_2)3645 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, n_gt_2) {
3646 TEST_REQUIRES_ARM_NEON_FMA;
3647 for (uint32_t n = 3; n < 10; n++) {
3648 for (size_t k = 1; k <= 5; k += 2) {
3649 SpMMMicrokernelTester()
3650 .mr(12)
3651 .nr(2)
3652 .m(12)
3653 .n(n)
3654 .k(k)
3655 .sparsity(0.0f)
3656 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3657 }
3658 }
3659 }
3660
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,n_div_2)3661 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, n_div_2) {
3662 TEST_REQUIRES_ARM_NEON_FMA;
3663 for (uint32_t n = 4; n <= 6; n += 2) {
3664 for (size_t k = 1; k <= 5; k += 2) {
3665 SpMMMicrokernelTester()
3666 .mr(12)
3667 .nr(2)
3668 .m(12)
3669 .n(n)
3670 .k(k)
3671 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3672 }
3673 }
3674 }
3675
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,m_lt_12)3676 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, m_lt_12) {
3677 TEST_REQUIRES_ARM_NEON_FMA;
3678 for (uint32_t m = 1; m < 12; m++) {
3679 for (uint32_t n = 1; n < 10; n += 3) {
3680 for (size_t k = 1; k <= 5; k += 2) {
3681 SpMMMicrokernelTester()
3682 .mr(12)
3683 .nr(2)
3684 .m(m)
3685 .n(n)
3686 .k(k)
3687 .sparsity(0.0f)
3688 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3689 }
3690 }
3691 }
3692 }
3693
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,m_div_12)3694 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, m_div_12) {
3695 TEST_REQUIRES_ARM_NEON_FMA;
3696 for (uint32_t m = 24; m <= 36; m += 12) {
3697 for (uint32_t n = 1; n < 10; n += 3) {
3698 for (size_t k = 1; k <= 5; k += 2) {
3699 SpMMMicrokernelTester()
3700 .mr(12)
3701 .nr(2)
3702 .m(m)
3703 .n(n)
3704 .k(k)
3705 .sparsity(0.0f)
3706 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3707 }
3708 }
3709 }
3710 }
3711
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,m_gt_12)3712 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, m_gt_12) {
3713 TEST_REQUIRES_ARM_NEON_FMA;
3714 for (uint32_t m = 13; m < 24; m++) {
3715 for (uint32_t n = 1; n < 10; n += 3) {
3716 for (size_t k = 1; k <= 5; k += 2) {
3717 SpMMMicrokernelTester()
3718 .mr(12)
3719 .nr(2)
3720 .m(m)
3721 .n(n)
3722 .k(k)
3723 .sparsity(0.0f)
3724 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3725 }
3726 }
3727 }
3728 }
3729
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,output_stride)3730 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, output_stride) {
3731 TEST_REQUIRES_ARM_NEON_FMA;
3732 for (uint32_t n = 1; n < 10; n += 3) {
3733 for (size_t k = 1; k <= 5; k += 2) {
3734 SpMMMicrokernelTester()
3735 .mr(12)
3736 .nr(2)
3737 .m(24)
3738 .n(n)
3739 .k(k)
3740 .output_stride(29)
3741 .sparsity(0.0f)
3742 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3743 }
3744 }
3745 }
3746
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,qmin)3747 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, qmin) {
3748 TEST_REQUIRES_ARM_NEON_FMA;
3749 for (uint32_t n = 1; n < 10; n += 3) {
3750 for (size_t k = 1; k <= 5; k += 2) {
3751 SpMMMicrokernelTester()
3752 .mr(12)
3753 .nr(2)
3754 .m(24)
3755 .n(n)
3756 .k(k)
3757 .sparsity(0.0f)
3758 .qmin(128)
3759 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3760 }
3761 }
3762 }
3763
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,qmax)3764 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, qmax) {
3765 TEST_REQUIRES_ARM_NEON_FMA;
3766 for (uint32_t n = 1; n < 10; n += 3) {
3767 for (size_t k = 1; k <= 5; k += 2) {
3768 SpMMMicrokernelTester()
3769 .mr(12)
3770 .nr(2)
3771 .m(24)
3772 .n(n)
3773 .k(k)
3774 .sparsity(0.0f)
3775 .qmax(128)
3776 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3777 }
3778 }
3779 }
3780
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,half_sparse)3781 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, half_sparse) {
3782 TEST_REQUIRES_ARM_NEON_FMA;
3783 for (uint32_t n = 1; n < 10; n += 3) {
3784 for (size_t k = 1; k <= 5; k += 2) {
3785 SpMMMicrokernelTester()
3786 .mr(12)
3787 .nr(2)
3788 .m(24)
3789 .n(n)
3790 .k(k)
3791 .sparsity(0.5f)
3792 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3793 }
3794 }
3795 }
3796
TEST(F32_SPMM_MINMAX_12X2__NEONFMA,zero_weights)3797 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, zero_weights) {
3798 TEST_REQUIRES_ARM_NEON_FMA;
3799 for (uint32_t n = 1; n < 10; n += 3) {
3800 for (size_t k = 1; k <= 5; k += 2) {
3801 SpMMMicrokernelTester()
3802 .mr(12)
3803 .nr(2)
3804 .m(24)
3805 .n(n)
3806 .k(k)
3807 .sparsity(1.0f)
3808 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma, xnn_init_f32_minmax_scalar_params);
3809 }
3810 }
3811 }
3812 #endif // XNN_ARCH_ARM64
3813
3814
3815 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,k_eq_1)3816 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_eq_1) {
3817 TEST_REQUIRES_ARM_NEON_FMA;
3818 SpMMMicrokernelTester()
3819 .mr(12)
3820 .nr(4)
3821 .m(12)
3822 .n(4)
3823 .k(1)
3824 .sparsity(0.0f)
3825 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3826 }
3827
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,k_eq_1_subtile)3828 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_eq_1_subtile) {
3829 TEST_REQUIRES_ARM_NEON_FMA;
3830 for (uint32_t n = 1; n <= 4; n++) {
3831 SpMMMicrokernelTester()
3832 .mr(12)
3833 .nr(4)
3834 .m(12)
3835 .n(n)
3836 .k(1)
3837 .sparsity(0.0f)
3838 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3839 }
3840 }
3841
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,k_gt_1)3842 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_gt_1) {
3843 TEST_REQUIRES_ARM_NEON_FMA;
3844 for (size_t k = 2; k < 10; k++) {
3845 SpMMMicrokernelTester()
3846 .mr(12)
3847 .nr(4)
3848 .m(12)
3849 .n(4)
3850 .k(k)
3851 .sparsity(0.0f)
3852 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3853 }
3854 }
3855
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,k_gt_1_subtile)3856 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_gt_1_subtile) {
3857 TEST_REQUIRES_ARM_NEON_FMA;
3858 for (size_t k = 2; k < 10; k++) {
3859 for (uint32_t n = 1; n <= 4; n++) {
3860 SpMMMicrokernelTester()
3861 .mr(12)
3862 .nr(4)
3863 .m(12)
3864 .n(n)
3865 .k(k)
3866 .sparsity(0.0f)
3867 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3868 }
3869 }
3870 }
3871
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,n_gt_4)3872 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, n_gt_4) {
3873 TEST_REQUIRES_ARM_NEON_FMA;
3874 for (uint32_t n = 5; n < 10; n++) {
3875 for (size_t k = 1; k <= 5; k += 2) {
3876 SpMMMicrokernelTester()
3877 .mr(12)
3878 .nr(4)
3879 .m(12)
3880 .n(n)
3881 .k(k)
3882 .sparsity(0.0f)
3883 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3884 }
3885 }
3886 }
3887
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,n_div_4)3888 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, n_div_4) {
3889 TEST_REQUIRES_ARM_NEON_FMA;
3890 for (uint32_t n = 8; n <= 12; n += 4) {
3891 for (size_t k = 1; k <= 5; k += 2) {
3892 SpMMMicrokernelTester()
3893 .mr(12)
3894 .nr(4)
3895 .m(12)
3896 .n(n)
3897 .k(k)
3898 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3899 }
3900 }
3901 }
3902
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,m_lt_12)3903 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, m_lt_12) {
3904 TEST_REQUIRES_ARM_NEON_FMA;
3905 for (uint32_t m = 1; m < 12; m++) {
3906 for (uint32_t n = 1; n < 20; n += 5) {
3907 for (size_t k = 1; k <= 5; k += 2) {
3908 SpMMMicrokernelTester()
3909 .mr(12)
3910 .nr(4)
3911 .m(m)
3912 .n(n)
3913 .k(k)
3914 .sparsity(0.0f)
3915 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3916 }
3917 }
3918 }
3919 }
3920
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,m_div_12)3921 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, m_div_12) {
3922 TEST_REQUIRES_ARM_NEON_FMA;
3923 for (uint32_t m = 24; m <= 36; m += 12) {
3924 for (uint32_t n = 1; n < 20; n += 5) {
3925 for (size_t k = 1; k <= 5; k += 2) {
3926 SpMMMicrokernelTester()
3927 .mr(12)
3928 .nr(4)
3929 .m(m)
3930 .n(n)
3931 .k(k)
3932 .sparsity(0.0f)
3933 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3934 }
3935 }
3936 }
3937 }
3938
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,m_gt_12)3939 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, m_gt_12) {
3940 TEST_REQUIRES_ARM_NEON_FMA;
3941 for (uint32_t m = 13; m < 24; m++) {
3942 for (uint32_t n = 1; n < 20; n += 5) {
3943 for (size_t k = 1; k <= 5; k += 2) {
3944 SpMMMicrokernelTester()
3945 .mr(12)
3946 .nr(4)
3947 .m(m)
3948 .n(n)
3949 .k(k)
3950 .sparsity(0.0f)
3951 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3952 }
3953 }
3954 }
3955 }
3956
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,output_stride)3957 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, output_stride) {
3958 TEST_REQUIRES_ARM_NEON_FMA;
3959 for (uint32_t n = 1; n < 20; n += 5) {
3960 for (size_t k = 1; k <= 5; k += 2) {
3961 SpMMMicrokernelTester()
3962 .mr(12)
3963 .nr(4)
3964 .m(24)
3965 .n(n)
3966 .k(k)
3967 .output_stride(29)
3968 .sparsity(0.0f)
3969 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3970 }
3971 }
3972 }
3973
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,qmin)3974 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, qmin) {
3975 TEST_REQUIRES_ARM_NEON_FMA;
3976 for (uint32_t n = 1; n < 20; n += 5) {
3977 for (size_t k = 1; k <= 5; k += 2) {
3978 SpMMMicrokernelTester()
3979 .mr(12)
3980 .nr(4)
3981 .m(24)
3982 .n(n)
3983 .k(k)
3984 .sparsity(0.0f)
3985 .qmin(128)
3986 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
3987 }
3988 }
3989 }
3990
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,qmax)3991 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, qmax) {
3992 TEST_REQUIRES_ARM_NEON_FMA;
3993 for (uint32_t n = 1; n < 20; n += 5) {
3994 for (size_t k = 1; k <= 5; k += 2) {
3995 SpMMMicrokernelTester()
3996 .mr(12)
3997 .nr(4)
3998 .m(24)
3999 .n(n)
4000 .k(k)
4001 .sparsity(0.0f)
4002 .qmax(128)
4003 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
4004 }
4005 }
4006 }
4007
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,half_sparse)4008 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, half_sparse) {
4009 TEST_REQUIRES_ARM_NEON_FMA;
4010 for (uint32_t n = 1; n < 20; n += 5) {
4011 for (size_t k = 1; k <= 5; k += 2) {
4012 SpMMMicrokernelTester()
4013 .mr(12)
4014 .nr(4)
4015 .m(24)
4016 .n(n)
4017 .k(k)
4018 .sparsity(0.5f)
4019 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
4020 }
4021 }
4022 }
4023
TEST(F32_SPMM_MINMAX_12X4__NEONFMA,zero_weights)4024 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, zero_weights) {
4025 TEST_REQUIRES_ARM_NEON_FMA;
4026 for (uint32_t n = 1; n < 20; n += 5) {
4027 for (size_t k = 1; k <= 5; k += 2) {
4028 SpMMMicrokernelTester()
4029 .mr(12)
4030 .nr(4)
4031 .m(24)
4032 .n(n)
4033 .k(k)
4034 .sparsity(1.0f)
4035 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma, xnn_init_f32_minmax_scalar_params);
4036 }
4037 }
4038 }
4039 #endif // XNN_ARCH_ARM64
4040
4041
4042 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X1__NEON,k_eq_1)4043 TEST(F32_SPMM_MINMAX_16X1__NEON, k_eq_1) {
4044 TEST_REQUIRES_ARM_NEON;
4045 SpMMMicrokernelTester()
4046 .mr(16)
4047 .nr(1)
4048 .m(16)
4049 .n(1)
4050 .k(1)
4051 .sparsity(0.0f)
4052 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4053 }
4054
TEST(F32_SPMM_MINMAX_16X1__NEON,k_gt_1)4055 TEST(F32_SPMM_MINMAX_16X1__NEON, k_gt_1) {
4056 TEST_REQUIRES_ARM_NEON;
4057 for (size_t k = 2; k < 10; k++) {
4058 SpMMMicrokernelTester()
4059 .mr(16)
4060 .nr(1)
4061 .m(16)
4062 .n(1)
4063 .k(k)
4064 .sparsity(0.0f)
4065 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4066 }
4067 }
4068
TEST(F32_SPMM_MINMAX_16X1__NEON,n_gt_1)4069 TEST(F32_SPMM_MINMAX_16X1__NEON, n_gt_1) {
4070 TEST_REQUIRES_ARM_NEON;
4071 for (uint32_t n = 2; n < 10; n++) {
4072 for (size_t k = 1; k <= 5; k += 2) {
4073 SpMMMicrokernelTester()
4074 .mr(16)
4075 .nr(1)
4076 .m(16)
4077 .n(n)
4078 .k(k)
4079 .sparsity(0.0f)
4080 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4081 }
4082 }
4083 }
4084
TEST(F32_SPMM_MINMAX_16X1__NEON,m_lt_16)4085 TEST(F32_SPMM_MINMAX_16X1__NEON, m_lt_16) {
4086 TEST_REQUIRES_ARM_NEON;
4087 for (uint32_t m = 1; m < 16; m++) {
4088 for (uint32_t n = 1; n < 10; n += 2) {
4089 for (size_t k = 1; k <= 5; k += 2) {
4090 SpMMMicrokernelTester()
4091 .mr(16)
4092 .nr(1)
4093 .m(m)
4094 .n(n)
4095 .k(k)
4096 .sparsity(0.0f)
4097 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4098 }
4099 }
4100 }
4101 }
4102
TEST(F32_SPMM_MINMAX_16X1__NEON,m_div_16)4103 TEST(F32_SPMM_MINMAX_16X1__NEON, m_div_16) {
4104 TEST_REQUIRES_ARM_NEON;
4105 for (uint32_t m = 32; m <= 48; m += 16) {
4106 for (uint32_t n = 1; n < 10; n += 2) {
4107 for (size_t k = 1; k <= 5; k += 2) {
4108 SpMMMicrokernelTester()
4109 .mr(16)
4110 .nr(1)
4111 .m(m)
4112 .n(n)
4113 .k(k)
4114 .sparsity(0.0f)
4115 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4116 }
4117 }
4118 }
4119 }
4120
TEST(F32_SPMM_MINMAX_16X1__NEON,m_gt_16)4121 TEST(F32_SPMM_MINMAX_16X1__NEON, m_gt_16) {
4122 TEST_REQUIRES_ARM_NEON;
4123 for (uint32_t m = 17; m < 32; m++) {
4124 for (uint32_t n = 1; n < 10; n += 2) {
4125 for (size_t k = 1; k <= 5; k += 2) {
4126 SpMMMicrokernelTester()
4127 .mr(16)
4128 .nr(1)
4129 .m(m)
4130 .n(n)
4131 .k(k)
4132 .sparsity(0.0f)
4133 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4134 }
4135 }
4136 }
4137 }
4138
TEST(F32_SPMM_MINMAX_16X1__NEON,output_stride)4139 TEST(F32_SPMM_MINMAX_16X1__NEON, output_stride) {
4140 TEST_REQUIRES_ARM_NEON;
4141 for (uint32_t n = 1; n < 10; n += 2) {
4142 for (size_t k = 1; k <= 5; k += 2) {
4143 SpMMMicrokernelTester()
4144 .mr(16)
4145 .nr(1)
4146 .m(32)
4147 .n(n)
4148 .k(k)
4149 .output_stride(37)
4150 .sparsity(0.0f)
4151 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4152 }
4153 }
4154 }
4155
TEST(F32_SPMM_MINMAX_16X1__NEON,qmin)4156 TEST(F32_SPMM_MINMAX_16X1__NEON, qmin) {
4157 TEST_REQUIRES_ARM_NEON;
4158 for (uint32_t n = 1; n < 10; n += 2) {
4159 for (size_t k = 1; k <= 5; k += 2) {
4160 SpMMMicrokernelTester()
4161 .mr(16)
4162 .nr(1)
4163 .m(32)
4164 .n(n)
4165 .k(k)
4166 .sparsity(0.0f)
4167 .qmin(128)
4168 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4169 }
4170 }
4171 }
4172
TEST(F32_SPMM_MINMAX_16X1__NEON,qmax)4173 TEST(F32_SPMM_MINMAX_16X1__NEON, qmax) {
4174 TEST_REQUIRES_ARM_NEON;
4175 for (uint32_t n = 1; n < 10; n += 2) {
4176 for (size_t k = 1; k <= 5; k += 2) {
4177 SpMMMicrokernelTester()
4178 .mr(16)
4179 .nr(1)
4180 .m(32)
4181 .n(n)
4182 .k(k)
4183 .sparsity(0.0f)
4184 .qmax(128)
4185 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4186 }
4187 }
4188 }
4189
TEST(F32_SPMM_MINMAX_16X1__NEON,half_sparse)4190 TEST(F32_SPMM_MINMAX_16X1__NEON, half_sparse) {
4191 TEST_REQUIRES_ARM_NEON;
4192 for (uint32_t n = 1; n < 10; n += 2) {
4193 for (size_t k = 1; k <= 5; k += 2) {
4194 SpMMMicrokernelTester()
4195 .mr(16)
4196 .nr(1)
4197 .m(32)
4198 .n(n)
4199 .k(k)
4200 .sparsity(0.5f)
4201 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4202 }
4203 }
4204 }
4205
TEST(F32_SPMM_MINMAX_16X1__NEON,zero_weights)4206 TEST(F32_SPMM_MINMAX_16X1__NEON, zero_weights) {
4207 TEST_REQUIRES_ARM_NEON;
4208 for (uint32_t n = 1; n < 10; n += 2) {
4209 for (size_t k = 1; k <= 5; k += 2) {
4210 SpMMMicrokernelTester()
4211 .mr(16)
4212 .nr(1)
4213 .m(32)
4214 .n(n)
4215 .k(k)
4216 .sparsity(1.0f)
4217 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon, xnn_init_f32_minmax_scalar_params);
4218 }
4219 }
4220 }
4221 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4222
4223
4224 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,k_eq_1)4225 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, k_eq_1) {
4226 TEST_REQUIRES_ARM_NEON;
4227 SpMMMicrokernelTester()
4228 .mr(16)
4229 .nr(1)
4230 .m(16)
4231 .n(1)
4232 .k(1)
4233 .sparsity(0.0f)
4234 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4235 }
4236
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,k_gt_1)4237 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, k_gt_1) {
4238 TEST_REQUIRES_ARM_NEON;
4239 for (size_t k = 2; k < 10; k++) {
4240 SpMMMicrokernelTester()
4241 .mr(16)
4242 .nr(1)
4243 .m(16)
4244 .n(1)
4245 .k(k)
4246 .sparsity(0.0f)
4247 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4248 }
4249 }
4250
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,n_gt_1)4251 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, n_gt_1) {
4252 TEST_REQUIRES_ARM_NEON;
4253 for (uint32_t n = 2; n < 10; n++) {
4254 for (size_t k = 1; k <= 5; k += 2) {
4255 SpMMMicrokernelTester()
4256 .mr(16)
4257 .nr(1)
4258 .m(16)
4259 .n(n)
4260 .k(k)
4261 .sparsity(0.0f)
4262 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4263 }
4264 }
4265 }
4266
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,m_lt_16)4267 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, m_lt_16) {
4268 TEST_REQUIRES_ARM_NEON;
4269 for (uint32_t m = 1; m < 16; m++) {
4270 for (uint32_t n = 1; n < 10; n += 2) {
4271 for (size_t k = 1; k <= 5; k += 2) {
4272 SpMMMicrokernelTester()
4273 .mr(16)
4274 .nr(1)
4275 .m(m)
4276 .n(n)
4277 .k(k)
4278 .sparsity(0.0f)
4279 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4280 }
4281 }
4282 }
4283 }
4284
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,m_div_16)4285 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, m_div_16) {
4286 TEST_REQUIRES_ARM_NEON;
4287 for (uint32_t m = 32; m <= 48; m += 16) {
4288 for (uint32_t n = 1; n < 10; n += 2) {
4289 for (size_t k = 1; k <= 5; k += 2) {
4290 SpMMMicrokernelTester()
4291 .mr(16)
4292 .nr(1)
4293 .m(m)
4294 .n(n)
4295 .k(k)
4296 .sparsity(0.0f)
4297 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4298 }
4299 }
4300 }
4301 }
4302
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,m_gt_16)4303 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, m_gt_16) {
4304 TEST_REQUIRES_ARM_NEON;
4305 for (uint32_t m = 17; m < 32; m++) {
4306 for (uint32_t n = 1; n < 10; n += 2) {
4307 for (size_t k = 1; k <= 5; k += 2) {
4308 SpMMMicrokernelTester()
4309 .mr(16)
4310 .nr(1)
4311 .m(m)
4312 .n(n)
4313 .k(k)
4314 .sparsity(0.0f)
4315 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4316 }
4317 }
4318 }
4319 }
4320
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,output_stride)4321 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, output_stride) {
4322 TEST_REQUIRES_ARM_NEON;
4323 for (uint32_t n = 1; n < 10; n += 2) {
4324 for (size_t k = 1; k <= 5; k += 2) {
4325 SpMMMicrokernelTester()
4326 .mr(16)
4327 .nr(1)
4328 .m(32)
4329 .n(n)
4330 .k(k)
4331 .output_stride(37)
4332 .sparsity(0.0f)
4333 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4334 }
4335 }
4336 }
4337
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,qmin)4338 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, qmin) {
4339 TEST_REQUIRES_ARM_NEON;
4340 for (uint32_t n = 1; n < 10; n += 2) {
4341 for (size_t k = 1; k <= 5; k += 2) {
4342 SpMMMicrokernelTester()
4343 .mr(16)
4344 .nr(1)
4345 .m(32)
4346 .n(n)
4347 .k(k)
4348 .sparsity(0.0f)
4349 .qmin(128)
4350 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4351 }
4352 }
4353 }
4354
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,qmax)4355 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, qmax) {
4356 TEST_REQUIRES_ARM_NEON;
4357 for (uint32_t n = 1; n < 10; n += 2) {
4358 for (size_t k = 1; k <= 5; k += 2) {
4359 SpMMMicrokernelTester()
4360 .mr(16)
4361 .nr(1)
4362 .m(32)
4363 .n(n)
4364 .k(k)
4365 .sparsity(0.0f)
4366 .qmax(128)
4367 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4368 }
4369 }
4370 }
4371
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,half_sparse)4372 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, half_sparse) {
4373 TEST_REQUIRES_ARM_NEON;
4374 for (uint32_t n = 1; n < 10; n += 2) {
4375 for (size_t k = 1; k <= 5; k += 2) {
4376 SpMMMicrokernelTester()
4377 .mr(16)
4378 .nr(1)
4379 .m(32)
4380 .n(n)
4381 .k(k)
4382 .sparsity(0.5f)
4383 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4384 }
4385 }
4386 }
4387
TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED,zero_weights)4388 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, zero_weights) {
4389 TEST_REQUIRES_ARM_NEON;
4390 for (uint32_t n = 1; n < 10; n += 2) {
4391 for (size_t k = 1; k <= 5; k += 2) {
4392 SpMMMicrokernelTester()
4393 .mr(16)
4394 .nr(1)
4395 .m(32)
4396 .n(n)
4397 .k(k)
4398 .sparsity(1.0f)
4399 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
4400 }
4401 }
4402 }
4403 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4404
4405
4406 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,k_eq_2)4407 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_eq_2) {
4408 TEST_REQUIRES_ARM_NEON;
4409 SpMMMicrokernelTester()
4410 .mr(16)
4411 .nr(1)
4412 .m(16)
4413 .n(1)
4414 .k(2)
4415 .sparsity(0.0f)
4416 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4417 }
4418
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,k_lt_2)4419 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_lt_2) {
4420 TEST_REQUIRES_ARM_NEON;
4421 for (size_t k = 1; k < 2; k++) {
4422 SpMMMicrokernelTester()
4423 .mr(16)
4424 .nr(1)
4425 .m(16)
4426 .n(1)
4427 .k(k)
4428 .sparsity(0.0f)
4429 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4430 }
4431 }
4432
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,k_gt_2)4433 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_gt_2) {
4434 TEST_REQUIRES_ARM_NEON;
4435 for (size_t k = 3; k < 4; k++) {
4436 SpMMMicrokernelTester()
4437 .mr(16)
4438 .nr(1)
4439 .m(16)
4440 .n(1)
4441 .k(k)
4442 .sparsity(0.0f)
4443 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4444 }
4445 }
4446
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,k_div_2)4447 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_div_2) {
4448 TEST_REQUIRES_ARM_NEON;
4449 for (size_t k = 4; k <= 20; k += 2) {
4450 SpMMMicrokernelTester()
4451 .mr(16)
4452 .nr(1)
4453 .m(16)
4454 .n(1)
4455 .k(k)
4456 .sparsity(0.0f)
4457 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4458 }
4459 }
4460
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,n_gt_1)4461 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, n_gt_1) {
4462 TEST_REQUIRES_ARM_NEON;
4463 for (uint32_t n = 2; n < 10; n++) {
4464 for (size_t k = 1; k <= 10; k += 3) {
4465 SpMMMicrokernelTester()
4466 .mr(16)
4467 .nr(1)
4468 .m(16)
4469 .n(n)
4470 .k(k)
4471 .sparsity(0.0f)
4472 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4473 }
4474 }
4475 }
4476
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,m_lt_16)4477 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, m_lt_16) {
4478 TEST_REQUIRES_ARM_NEON;
4479 for (uint32_t m = 1; m < 16; m++) {
4480 for (uint32_t n = 1; n < 10; n += 2) {
4481 for (size_t k = 1; k <= 10; k += 3) {
4482 SpMMMicrokernelTester()
4483 .mr(16)
4484 .nr(1)
4485 .m(m)
4486 .n(n)
4487 .k(k)
4488 .sparsity(0.0f)
4489 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4490 }
4491 }
4492 }
4493 }
4494
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,m_div_16)4495 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, m_div_16) {
4496 TEST_REQUIRES_ARM_NEON;
4497 for (uint32_t m = 32; m <= 48; m += 16) {
4498 for (uint32_t n = 1; n < 10; n += 2) {
4499 for (size_t k = 1; k <= 10; k += 3) {
4500 SpMMMicrokernelTester()
4501 .mr(16)
4502 .nr(1)
4503 .m(m)
4504 .n(n)
4505 .k(k)
4506 .sparsity(0.0f)
4507 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4508 }
4509 }
4510 }
4511 }
4512
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,m_gt_16)4513 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, m_gt_16) {
4514 TEST_REQUIRES_ARM_NEON;
4515 for (uint32_t m = 17; m < 32; m++) {
4516 for (uint32_t n = 1; n < 10; n += 2) {
4517 for (size_t k = 1; k <= 10; k += 3) {
4518 SpMMMicrokernelTester()
4519 .mr(16)
4520 .nr(1)
4521 .m(m)
4522 .n(n)
4523 .k(k)
4524 .sparsity(0.0f)
4525 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4526 }
4527 }
4528 }
4529 }
4530
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,output_stride)4531 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, output_stride) {
4532 TEST_REQUIRES_ARM_NEON;
4533 for (uint32_t n = 1; n < 10; n += 2) {
4534 for (size_t k = 1; k <= 10; k += 3) {
4535 SpMMMicrokernelTester()
4536 .mr(16)
4537 .nr(1)
4538 .m(32)
4539 .n(n)
4540 .k(k)
4541 .output_stride(37)
4542 .sparsity(0.0f)
4543 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4544 }
4545 }
4546 }
4547
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,qmin)4548 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, qmin) {
4549 TEST_REQUIRES_ARM_NEON;
4550 for (uint32_t n = 1; n < 10; n += 2) {
4551 for (size_t k = 1; k <= 10; k += 3) {
4552 SpMMMicrokernelTester()
4553 .mr(16)
4554 .nr(1)
4555 .m(32)
4556 .n(n)
4557 .k(k)
4558 .sparsity(0.0f)
4559 .qmin(128)
4560 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4561 }
4562 }
4563 }
4564
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,qmax)4565 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, qmax) {
4566 TEST_REQUIRES_ARM_NEON;
4567 for (uint32_t n = 1; n < 10; n += 2) {
4568 for (size_t k = 1; k <= 10; k += 3) {
4569 SpMMMicrokernelTester()
4570 .mr(16)
4571 .nr(1)
4572 .m(32)
4573 .n(n)
4574 .k(k)
4575 .sparsity(0.0f)
4576 .qmax(128)
4577 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4578 }
4579 }
4580 }
4581
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,half_sparse)4582 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, half_sparse) {
4583 TEST_REQUIRES_ARM_NEON;
4584 for (uint32_t n = 1; n < 10; n += 2) {
4585 for (size_t k = 1; k <= 10; k += 3) {
4586 SpMMMicrokernelTester()
4587 .mr(16)
4588 .nr(1)
4589 .m(32)
4590 .n(n)
4591 .k(k)
4592 .sparsity(0.5f)
4593 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4594 }
4595 }
4596 }
4597
TEST(F32_SPMM_MINMAX_16X1__NEON_X2,zero_weights)4598 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, zero_weights) {
4599 TEST_REQUIRES_ARM_NEON;
4600 for (uint32_t n = 1; n < 10; n += 2) {
4601 for (size_t k = 1; k <= 10; k += 3) {
4602 SpMMMicrokernelTester()
4603 .mr(16)
4604 .nr(1)
4605 .m(32)
4606 .n(n)
4607 .k(k)
4608 .sparsity(1.0f)
4609 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, xnn_init_f32_minmax_scalar_params);
4610 }
4611 }
4612 }
4613 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4614
4615
4616 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,k_eq_1)4617 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, k_eq_1) {
4618 TEST_REQUIRES_ARM_NEON_FMA;
4619 SpMMMicrokernelTester()
4620 .mr(16)
4621 .nr(1)
4622 .m(16)
4623 .n(1)
4624 .k(1)
4625 .sparsity(0.0f)
4626 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4627 }
4628
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,k_gt_1)4629 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, k_gt_1) {
4630 TEST_REQUIRES_ARM_NEON_FMA;
4631 for (size_t k = 2; k < 10; k++) {
4632 SpMMMicrokernelTester()
4633 .mr(16)
4634 .nr(1)
4635 .m(16)
4636 .n(1)
4637 .k(k)
4638 .sparsity(0.0f)
4639 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4640 }
4641 }
4642
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,n_gt_1)4643 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, n_gt_1) {
4644 TEST_REQUIRES_ARM_NEON_FMA;
4645 for (uint32_t n = 2; n < 10; n++) {
4646 for (size_t k = 1; k <= 5; k += 2) {
4647 SpMMMicrokernelTester()
4648 .mr(16)
4649 .nr(1)
4650 .m(16)
4651 .n(n)
4652 .k(k)
4653 .sparsity(0.0f)
4654 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4655 }
4656 }
4657 }
4658
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,m_lt_16)4659 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, m_lt_16) {
4660 TEST_REQUIRES_ARM_NEON_FMA;
4661 for (uint32_t m = 1; m < 16; m++) {
4662 for (uint32_t n = 1; n < 10; n += 2) {
4663 for (size_t k = 1; k <= 5; k += 2) {
4664 SpMMMicrokernelTester()
4665 .mr(16)
4666 .nr(1)
4667 .m(m)
4668 .n(n)
4669 .k(k)
4670 .sparsity(0.0f)
4671 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4672 }
4673 }
4674 }
4675 }
4676
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,m_div_16)4677 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, m_div_16) {
4678 TEST_REQUIRES_ARM_NEON_FMA;
4679 for (uint32_t m = 32; m <= 48; m += 16) {
4680 for (uint32_t n = 1; n < 10; n += 2) {
4681 for (size_t k = 1; k <= 5; k += 2) {
4682 SpMMMicrokernelTester()
4683 .mr(16)
4684 .nr(1)
4685 .m(m)
4686 .n(n)
4687 .k(k)
4688 .sparsity(0.0f)
4689 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4690 }
4691 }
4692 }
4693 }
4694
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,m_gt_16)4695 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, m_gt_16) {
4696 TEST_REQUIRES_ARM_NEON_FMA;
4697 for (uint32_t m = 17; m < 32; m++) {
4698 for (uint32_t n = 1; n < 10; n += 2) {
4699 for (size_t k = 1; k <= 5; k += 2) {
4700 SpMMMicrokernelTester()
4701 .mr(16)
4702 .nr(1)
4703 .m(m)
4704 .n(n)
4705 .k(k)
4706 .sparsity(0.0f)
4707 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4708 }
4709 }
4710 }
4711 }
4712
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,output_stride)4713 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, output_stride) {
4714 TEST_REQUIRES_ARM_NEON_FMA;
4715 for (uint32_t n = 1; n < 10; n += 2) {
4716 for (size_t k = 1; k <= 5; k += 2) {
4717 SpMMMicrokernelTester()
4718 .mr(16)
4719 .nr(1)
4720 .m(32)
4721 .n(n)
4722 .k(k)
4723 .output_stride(37)
4724 .sparsity(0.0f)
4725 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4726 }
4727 }
4728 }
4729
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,qmin)4730 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, qmin) {
4731 TEST_REQUIRES_ARM_NEON_FMA;
4732 for (uint32_t n = 1; n < 10; n += 2) {
4733 for (size_t k = 1; k <= 5; k += 2) {
4734 SpMMMicrokernelTester()
4735 .mr(16)
4736 .nr(1)
4737 .m(32)
4738 .n(n)
4739 .k(k)
4740 .sparsity(0.0f)
4741 .qmin(128)
4742 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4743 }
4744 }
4745 }
4746
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,qmax)4747 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, qmax) {
4748 TEST_REQUIRES_ARM_NEON_FMA;
4749 for (uint32_t n = 1; n < 10; n += 2) {
4750 for (size_t k = 1; k <= 5; k += 2) {
4751 SpMMMicrokernelTester()
4752 .mr(16)
4753 .nr(1)
4754 .m(32)
4755 .n(n)
4756 .k(k)
4757 .sparsity(0.0f)
4758 .qmax(128)
4759 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4760 }
4761 }
4762 }
4763
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,half_sparse)4764 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, half_sparse) {
4765 TEST_REQUIRES_ARM_NEON_FMA;
4766 for (uint32_t n = 1; n < 10; n += 2) {
4767 for (size_t k = 1; k <= 5; k += 2) {
4768 SpMMMicrokernelTester()
4769 .mr(16)
4770 .nr(1)
4771 .m(32)
4772 .n(n)
4773 .k(k)
4774 .sparsity(0.5f)
4775 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4776 }
4777 }
4778 }
4779
TEST(F32_SPMM_MINMAX_16X1__NEONFMA,zero_weights)4780 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, zero_weights) {
4781 TEST_REQUIRES_ARM_NEON_FMA;
4782 for (uint32_t n = 1; n < 10; n += 2) {
4783 for (size_t k = 1; k <= 5; k += 2) {
4784 SpMMMicrokernelTester()
4785 .mr(16)
4786 .nr(1)
4787 .m(32)
4788 .n(n)
4789 .k(k)
4790 .sparsity(1.0f)
4791 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma, xnn_init_f32_minmax_scalar_params);
4792 }
4793 }
4794 }
4795 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4796
4797
4798 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,k_eq_1)4799 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, k_eq_1) {
4800 TEST_REQUIRES_ARM_NEON_FMA;
4801 SpMMMicrokernelTester()
4802 .mr(16)
4803 .nr(1)
4804 .m(16)
4805 .n(1)
4806 .k(1)
4807 .sparsity(0.0f)
4808 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4809 }
4810
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,k_gt_1)4811 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, k_gt_1) {
4812 TEST_REQUIRES_ARM_NEON_FMA;
4813 for (size_t k = 2; k < 10; k++) {
4814 SpMMMicrokernelTester()
4815 .mr(16)
4816 .nr(1)
4817 .m(16)
4818 .n(1)
4819 .k(k)
4820 .sparsity(0.0f)
4821 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4822 }
4823 }
4824
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,n_gt_1)4825 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, n_gt_1) {
4826 TEST_REQUIRES_ARM_NEON_FMA;
4827 for (uint32_t n = 2; n < 10; n++) {
4828 for (size_t k = 1; k <= 5; k += 2) {
4829 SpMMMicrokernelTester()
4830 .mr(16)
4831 .nr(1)
4832 .m(16)
4833 .n(n)
4834 .k(k)
4835 .sparsity(0.0f)
4836 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4837 }
4838 }
4839 }
4840
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,m_lt_16)4841 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, m_lt_16) {
4842 TEST_REQUIRES_ARM_NEON_FMA;
4843 for (uint32_t m = 1; m < 16; m++) {
4844 for (uint32_t n = 1; n < 10; n += 2) {
4845 for (size_t k = 1; k <= 5; k += 2) {
4846 SpMMMicrokernelTester()
4847 .mr(16)
4848 .nr(1)
4849 .m(m)
4850 .n(n)
4851 .k(k)
4852 .sparsity(0.0f)
4853 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4854 }
4855 }
4856 }
4857 }
4858
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,m_div_16)4859 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, m_div_16) {
4860 TEST_REQUIRES_ARM_NEON_FMA;
4861 for (uint32_t m = 32; m <= 48; m += 16) {
4862 for (uint32_t n = 1; n < 10; n += 2) {
4863 for (size_t k = 1; k <= 5; k += 2) {
4864 SpMMMicrokernelTester()
4865 .mr(16)
4866 .nr(1)
4867 .m(m)
4868 .n(n)
4869 .k(k)
4870 .sparsity(0.0f)
4871 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4872 }
4873 }
4874 }
4875 }
4876
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,m_gt_16)4877 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, m_gt_16) {
4878 TEST_REQUIRES_ARM_NEON_FMA;
4879 for (uint32_t m = 17; m < 32; m++) {
4880 for (uint32_t n = 1; n < 10; n += 2) {
4881 for (size_t k = 1; k <= 5; k += 2) {
4882 SpMMMicrokernelTester()
4883 .mr(16)
4884 .nr(1)
4885 .m(m)
4886 .n(n)
4887 .k(k)
4888 .sparsity(0.0f)
4889 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4890 }
4891 }
4892 }
4893 }
4894
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,output_stride)4895 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, output_stride) {
4896 TEST_REQUIRES_ARM_NEON_FMA;
4897 for (uint32_t n = 1; n < 10; n += 2) {
4898 for (size_t k = 1; k <= 5; k += 2) {
4899 SpMMMicrokernelTester()
4900 .mr(16)
4901 .nr(1)
4902 .m(32)
4903 .n(n)
4904 .k(k)
4905 .output_stride(37)
4906 .sparsity(0.0f)
4907 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4908 }
4909 }
4910 }
4911
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,qmin)4912 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, qmin) {
4913 TEST_REQUIRES_ARM_NEON_FMA;
4914 for (uint32_t n = 1; n < 10; n += 2) {
4915 for (size_t k = 1; k <= 5; k += 2) {
4916 SpMMMicrokernelTester()
4917 .mr(16)
4918 .nr(1)
4919 .m(32)
4920 .n(n)
4921 .k(k)
4922 .sparsity(0.0f)
4923 .qmin(128)
4924 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4925 }
4926 }
4927 }
4928
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,qmax)4929 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, qmax) {
4930 TEST_REQUIRES_ARM_NEON_FMA;
4931 for (uint32_t n = 1; n < 10; n += 2) {
4932 for (size_t k = 1; k <= 5; k += 2) {
4933 SpMMMicrokernelTester()
4934 .mr(16)
4935 .nr(1)
4936 .m(32)
4937 .n(n)
4938 .k(k)
4939 .sparsity(0.0f)
4940 .qmax(128)
4941 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4942 }
4943 }
4944 }
4945
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,half_sparse)4946 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, half_sparse) {
4947 TEST_REQUIRES_ARM_NEON_FMA;
4948 for (uint32_t n = 1; n < 10; n += 2) {
4949 for (size_t k = 1; k <= 5; k += 2) {
4950 SpMMMicrokernelTester()
4951 .mr(16)
4952 .nr(1)
4953 .m(32)
4954 .n(n)
4955 .k(k)
4956 .sparsity(0.5f)
4957 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4958 }
4959 }
4960 }
4961
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED,zero_weights)4962 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, zero_weights) {
4963 TEST_REQUIRES_ARM_NEON_FMA;
4964 for (uint32_t n = 1; n < 10; n += 2) {
4965 for (size_t k = 1; k <= 5; k += 2) {
4966 SpMMMicrokernelTester()
4967 .mr(16)
4968 .nr(1)
4969 .m(32)
4970 .n(n)
4971 .k(k)
4972 .sparsity(1.0f)
4973 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
4974 }
4975 }
4976 }
4977 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4978
4979
4980 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,k_eq_2)4981 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_eq_2) {
4982 TEST_REQUIRES_ARM_NEON_FMA;
4983 SpMMMicrokernelTester()
4984 .mr(16)
4985 .nr(1)
4986 .m(16)
4987 .n(1)
4988 .k(2)
4989 .sparsity(0.0f)
4990 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
4991 }
4992
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,k_lt_2)4993 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_lt_2) {
4994 TEST_REQUIRES_ARM_NEON_FMA;
4995 for (size_t k = 1; k < 2; k++) {
4996 SpMMMicrokernelTester()
4997 .mr(16)
4998 .nr(1)
4999 .m(16)
5000 .n(1)
5001 .k(k)
5002 .sparsity(0.0f)
5003 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5004 }
5005 }
5006
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,k_gt_2)5007 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_gt_2) {
5008 TEST_REQUIRES_ARM_NEON_FMA;
5009 for (size_t k = 3; k < 4; k++) {
5010 SpMMMicrokernelTester()
5011 .mr(16)
5012 .nr(1)
5013 .m(16)
5014 .n(1)
5015 .k(k)
5016 .sparsity(0.0f)
5017 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5018 }
5019 }
5020
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,k_div_2)5021 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_div_2) {
5022 TEST_REQUIRES_ARM_NEON_FMA;
5023 for (size_t k = 4; k <= 20; k += 2) {
5024 SpMMMicrokernelTester()
5025 .mr(16)
5026 .nr(1)
5027 .m(16)
5028 .n(1)
5029 .k(k)
5030 .sparsity(0.0f)
5031 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5032 }
5033 }
5034
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,n_gt_1)5035 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, n_gt_1) {
5036 TEST_REQUIRES_ARM_NEON_FMA;
5037 for (uint32_t n = 2; n < 10; n++) {
5038 for (size_t k = 1; k <= 10; k += 3) {
5039 SpMMMicrokernelTester()
5040 .mr(16)
5041 .nr(1)
5042 .m(16)
5043 .n(n)
5044 .k(k)
5045 .sparsity(0.0f)
5046 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5047 }
5048 }
5049 }
5050
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,m_lt_16)5051 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, m_lt_16) {
5052 TEST_REQUIRES_ARM_NEON_FMA;
5053 for (uint32_t m = 1; m < 16; m++) {
5054 for (uint32_t n = 1; n < 10; n += 2) {
5055 for (size_t k = 1; k <= 10; k += 3) {
5056 SpMMMicrokernelTester()
5057 .mr(16)
5058 .nr(1)
5059 .m(m)
5060 .n(n)
5061 .k(k)
5062 .sparsity(0.0f)
5063 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5064 }
5065 }
5066 }
5067 }
5068
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,m_div_16)5069 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, m_div_16) {
5070 TEST_REQUIRES_ARM_NEON_FMA;
5071 for (uint32_t m = 32; m <= 48; m += 16) {
5072 for (uint32_t n = 1; n < 10; n += 2) {
5073 for (size_t k = 1; k <= 10; k += 3) {
5074 SpMMMicrokernelTester()
5075 .mr(16)
5076 .nr(1)
5077 .m(m)
5078 .n(n)
5079 .k(k)
5080 .sparsity(0.0f)
5081 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5082 }
5083 }
5084 }
5085 }
5086
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,m_gt_16)5087 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, m_gt_16) {
5088 TEST_REQUIRES_ARM_NEON_FMA;
5089 for (uint32_t m = 17; m < 32; m++) {
5090 for (uint32_t n = 1; n < 10; n += 2) {
5091 for (size_t k = 1; k <= 10; k += 3) {
5092 SpMMMicrokernelTester()
5093 .mr(16)
5094 .nr(1)
5095 .m(m)
5096 .n(n)
5097 .k(k)
5098 .sparsity(0.0f)
5099 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5100 }
5101 }
5102 }
5103 }
5104
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,output_stride)5105 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, output_stride) {
5106 TEST_REQUIRES_ARM_NEON_FMA;
5107 for (uint32_t n = 1; n < 10; n += 2) {
5108 for (size_t k = 1; k <= 10; k += 3) {
5109 SpMMMicrokernelTester()
5110 .mr(16)
5111 .nr(1)
5112 .m(32)
5113 .n(n)
5114 .k(k)
5115 .output_stride(37)
5116 .sparsity(0.0f)
5117 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5118 }
5119 }
5120 }
5121
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,qmin)5122 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, qmin) {
5123 TEST_REQUIRES_ARM_NEON_FMA;
5124 for (uint32_t n = 1; n < 10; n += 2) {
5125 for (size_t k = 1; k <= 10; k += 3) {
5126 SpMMMicrokernelTester()
5127 .mr(16)
5128 .nr(1)
5129 .m(32)
5130 .n(n)
5131 .k(k)
5132 .sparsity(0.0f)
5133 .qmin(128)
5134 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5135 }
5136 }
5137 }
5138
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,qmax)5139 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, qmax) {
5140 TEST_REQUIRES_ARM_NEON_FMA;
5141 for (uint32_t n = 1; n < 10; n += 2) {
5142 for (size_t k = 1; k <= 10; k += 3) {
5143 SpMMMicrokernelTester()
5144 .mr(16)
5145 .nr(1)
5146 .m(32)
5147 .n(n)
5148 .k(k)
5149 .sparsity(0.0f)
5150 .qmax(128)
5151 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5152 }
5153 }
5154 }
5155
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,half_sparse)5156 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, half_sparse) {
5157 TEST_REQUIRES_ARM_NEON_FMA;
5158 for (uint32_t n = 1; n < 10; n += 2) {
5159 for (size_t k = 1; k <= 10; k += 3) {
5160 SpMMMicrokernelTester()
5161 .mr(16)
5162 .nr(1)
5163 .m(32)
5164 .n(n)
5165 .k(k)
5166 .sparsity(0.5f)
5167 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5168 }
5169 }
5170 }
5171
TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2,zero_weights)5172 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, zero_weights) {
5173 TEST_REQUIRES_ARM_NEON_FMA;
5174 for (uint32_t n = 1; n < 10; n += 2) {
5175 for (size_t k = 1; k <= 10; k += 3) {
5176 SpMMMicrokernelTester()
5177 .mr(16)
5178 .nr(1)
5179 .m(32)
5180 .n(n)
5181 .k(k)
5182 .sparsity(1.0f)
5183 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
5184 }
5185 }
5186 }
5187 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5188
5189
5190 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,k_eq_1)5191 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_eq_1) {
5192 TEST_REQUIRES_ARM_NEON_FMA;
5193 SpMMMicrokernelTester()
5194 .mr(16)
5195 .nr(2)
5196 .m(16)
5197 .n(2)
5198 .k(1)
5199 .sparsity(0.0f)
5200 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5201 }
5202
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,k_eq_1_subtile)5203 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_eq_1_subtile) {
5204 TEST_REQUIRES_ARM_NEON_FMA;
5205 for (uint32_t n = 1; n <= 2; n++) {
5206 SpMMMicrokernelTester()
5207 .mr(16)
5208 .nr(2)
5209 .m(16)
5210 .n(n)
5211 .k(1)
5212 .sparsity(0.0f)
5213 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5214 }
5215 }
5216
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,k_gt_1)5217 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_gt_1) {
5218 TEST_REQUIRES_ARM_NEON_FMA;
5219 for (size_t k = 2; k < 10; k++) {
5220 SpMMMicrokernelTester()
5221 .mr(16)
5222 .nr(2)
5223 .m(16)
5224 .n(2)
5225 .k(k)
5226 .sparsity(0.0f)
5227 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5228 }
5229 }
5230
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,k_gt_1_subtile)5231 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_gt_1_subtile) {
5232 TEST_REQUIRES_ARM_NEON_FMA;
5233 for (size_t k = 2; k < 10; k++) {
5234 for (uint32_t n = 1; n <= 2; n++) {
5235 SpMMMicrokernelTester()
5236 .mr(16)
5237 .nr(2)
5238 .m(16)
5239 .n(n)
5240 .k(k)
5241 .sparsity(0.0f)
5242 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5243 }
5244 }
5245 }
5246
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,n_gt_2)5247 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, n_gt_2) {
5248 TEST_REQUIRES_ARM_NEON_FMA;
5249 for (uint32_t n = 3; n < 10; n++) {
5250 for (size_t k = 1; k <= 5; k += 2) {
5251 SpMMMicrokernelTester()
5252 .mr(16)
5253 .nr(2)
5254 .m(16)
5255 .n(n)
5256 .k(k)
5257 .sparsity(0.0f)
5258 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5259 }
5260 }
5261 }
5262
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,n_div_2)5263 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, n_div_2) {
5264 TEST_REQUIRES_ARM_NEON_FMA;
5265 for (uint32_t n = 4; n <= 6; n += 2) {
5266 for (size_t k = 1; k <= 5; k += 2) {
5267 SpMMMicrokernelTester()
5268 .mr(16)
5269 .nr(2)
5270 .m(16)
5271 .n(n)
5272 .k(k)
5273 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5274 }
5275 }
5276 }
5277
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,m_lt_16)5278 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, m_lt_16) {
5279 TEST_REQUIRES_ARM_NEON_FMA;
5280 for (uint32_t m = 1; m < 16; m++) {
5281 for (uint32_t n = 1; n < 10; n += 3) {
5282 for (size_t k = 1; k <= 5; k += 2) {
5283 SpMMMicrokernelTester()
5284 .mr(16)
5285 .nr(2)
5286 .m(m)
5287 .n(n)
5288 .k(k)
5289 .sparsity(0.0f)
5290 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5291 }
5292 }
5293 }
5294 }
5295
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,m_div_16)5296 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, m_div_16) {
5297 TEST_REQUIRES_ARM_NEON_FMA;
5298 for (uint32_t m = 32; m <= 48; m += 16) {
5299 for (uint32_t n = 1; n < 10; n += 3) {
5300 for (size_t k = 1; k <= 5; k += 2) {
5301 SpMMMicrokernelTester()
5302 .mr(16)
5303 .nr(2)
5304 .m(m)
5305 .n(n)
5306 .k(k)
5307 .sparsity(0.0f)
5308 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5309 }
5310 }
5311 }
5312 }
5313
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,m_gt_16)5314 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, m_gt_16) {
5315 TEST_REQUIRES_ARM_NEON_FMA;
5316 for (uint32_t m = 17; m < 32; m++) {
5317 for (uint32_t n = 1; n < 10; n += 3) {
5318 for (size_t k = 1; k <= 5; k += 2) {
5319 SpMMMicrokernelTester()
5320 .mr(16)
5321 .nr(2)
5322 .m(m)
5323 .n(n)
5324 .k(k)
5325 .sparsity(0.0f)
5326 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5327 }
5328 }
5329 }
5330 }
5331
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,output_stride)5332 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, output_stride) {
5333 TEST_REQUIRES_ARM_NEON_FMA;
5334 for (uint32_t n = 1; n < 10; n += 3) {
5335 for (size_t k = 1; k <= 5; k += 2) {
5336 SpMMMicrokernelTester()
5337 .mr(16)
5338 .nr(2)
5339 .m(32)
5340 .n(n)
5341 .k(k)
5342 .output_stride(37)
5343 .sparsity(0.0f)
5344 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5345 }
5346 }
5347 }
5348
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,qmin)5349 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, qmin) {
5350 TEST_REQUIRES_ARM_NEON_FMA;
5351 for (uint32_t n = 1; n < 10; n += 3) {
5352 for (size_t k = 1; k <= 5; k += 2) {
5353 SpMMMicrokernelTester()
5354 .mr(16)
5355 .nr(2)
5356 .m(32)
5357 .n(n)
5358 .k(k)
5359 .sparsity(0.0f)
5360 .qmin(128)
5361 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5362 }
5363 }
5364 }
5365
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,qmax)5366 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, qmax) {
5367 TEST_REQUIRES_ARM_NEON_FMA;
5368 for (uint32_t n = 1; n < 10; n += 3) {
5369 for (size_t k = 1; k <= 5; k += 2) {
5370 SpMMMicrokernelTester()
5371 .mr(16)
5372 .nr(2)
5373 .m(32)
5374 .n(n)
5375 .k(k)
5376 .sparsity(0.0f)
5377 .qmax(128)
5378 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5379 }
5380 }
5381 }
5382
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,half_sparse)5383 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, half_sparse) {
5384 TEST_REQUIRES_ARM_NEON_FMA;
5385 for (uint32_t n = 1; n < 10; n += 3) {
5386 for (size_t k = 1; k <= 5; k += 2) {
5387 SpMMMicrokernelTester()
5388 .mr(16)
5389 .nr(2)
5390 .m(32)
5391 .n(n)
5392 .k(k)
5393 .sparsity(0.5f)
5394 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5395 }
5396 }
5397 }
5398
TEST(F32_SPMM_MINMAX_16X2__NEONFMA,zero_weights)5399 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, zero_weights) {
5400 TEST_REQUIRES_ARM_NEON_FMA;
5401 for (uint32_t n = 1; n < 10; n += 3) {
5402 for (size_t k = 1; k <= 5; k += 2) {
5403 SpMMMicrokernelTester()
5404 .mr(16)
5405 .nr(2)
5406 .m(32)
5407 .n(n)
5408 .k(k)
5409 .sparsity(1.0f)
5410 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma, xnn_init_f32_minmax_scalar_params);
5411 }
5412 }
5413 }
5414 #endif // XNN_ARCH_ARM64
5415
5416
5417 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,k_eq_1)5418 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_eq_1) {
5419 TEST_REQUIRES_ARM_NEON_FMA;
5420 SpMMMicrokernelTester()
5421 .mr(16)
5422 .nr(4)
5423 .m(16)
5424 .n(4)
5425 .k(1)
5426 .sparsity(0.0f)
5427 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5428 }
5429
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,k_eq_1_subtile)5430 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_eq_1_subtile) {
5431 TEST_REQUIRES_ARM_NEON_FMA;
5432 for (uint32_t n = 1; n <= 4; n++) {
5433 SpMMMicrokernelTester()
5434 .mr(16)
5435 .nr(4)
5436 .m(16)
5437 .n(n)
5438 .k(1)
5439 .sparsity(0.0f)
5440 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5441 }
5442 }
5443
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,k_gt_1)5444 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_gt_1) {
5445 TEST_REQUIRES_ARM_NEON_FMA;
5446 for (size_t k = 2; k < 10; k++) {
5447 SpMMMicrokernelTester()
5448 .mr(16)
5449 .nr(4)
5450 .m(16)
5451 .n(4)
5452 .k(k)
5453 .sparsity(0.0f)
5454 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5455 }
5456 }
5457
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,k_gt_1_subtile)5458 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_gt_1_subtile) {
5459 TEST_REQUIRES_ARM_NEON_FMA;
5460 for (size_t k = 2; k < 10; k++) {
5461 for (uint32_t n = 1; n <= 4; n++) {
5462 SpMMMicrokernelTester()
5463 .mr(16)
5464 .nr(4)
5465 .m(16)
5466 .n(n)
5467 .k(k)
5468 .sparsity(0.0f)
5469 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5470 }
5471 }
5472 }
5473
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,n_gt_4)5474 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, n_gt_4) {
5475 TEST_REQUIRES_ARM_NEON_FMA;
5476 for (uint32_t n = 5; n < 10; n++) {
5477 for (size_t k = 1; k <= 5; k += 2) {
5478 SpMMMicrokernelTester()
5479 .mr(16)
5480 .nr(4)
5481 .m(16)
5482 .n(n)
5483 .k(k)
5484 .sparsity(0.0f)
5485 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5486 }
5487 }
5488 }
5489
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,n_div_4)5490 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, n_div_4) {
5491 TEST_REQUIRES_ARM_NEON_FMA;
5492 for (uint32_t n = 8; n <= 12; n += 4) {
5493 for (size_t k = 1; k <= 5; k += 2) {
5494 SpMMMicrokernelTester()
5495 .mr(16)
5496 .nr(4)
5497 .m(16)
5498 .n(n)
5499 .k(k)
5500 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5501 }
5502 }
5503 }
5504
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,m_lt_16)5505 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, m_lt_16) {
5506 TEST_REQUIRES_ARM_NEON_FMA;
5507 for (uint32_t m = 1; m < 16; m++) {
5508 for (uint32_t n = 1; n < 20; n += 5) {
5509 for (size_t k = 1; k <= 5; k += 2) {
5510 SpMMMicrokernelTester()
5511 .mr(16)
5512 .nr(4)
5513 .m(m)
5514 .n(n)
5515 .k(k)
5516 .sparsity(0.0f)
5517 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5518 }
5519 }
5520 }
5521 }
5522
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,m_div_16)5523 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, m_div_16) {
5524 TEST_REQUIRES_ARM_NEON_FMA;
5525 for (uint32_t m = 32; m <= 48; m += 16) {
5526 for (uint32_t n = 1; n < 20; n += 5) {
5527 for (size_t k = 1; k <= 5; k += 2) {
5528 SpMMMicrokernelTester()
5529 .mr(16)
5530 .nr(4)
5531 .m(m)
5532 .n(n)
5533 .k(k)
5534 .sparsity(0.0f)
5535 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5536 }
5537 }
5538 }
5539 }
5540
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,m_gt_16)5541 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, m_gt_16) {
5542 TEST_REQUIRES_ARM_NEON_FMA;
5543 for (uint32_t m = 17; m < 32; m++) {
5544 for (uint32_t n = 1; n < 20; n += 5) {
5545 for (size_t k = 1; k <= 5; k += 2) {
5546 SpMMMicrokernelTester()
5547 .mr(16)
5548 .nr(4)
5549 .m(m)
5550 .n(n)
5551 .k(k)
5552 .sparsity(0.0f)
5553 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5554 }
5555 }
5556 }
5557 }
5558
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,output_stride)5559 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, output_stride) {
5560 TEST_REQUIRES_ARM_NEON_FMA;
5561 for (uint32_t n = 1; n < 20; n += 5) {
5562 for (size_t k = 1; k <= 5; k += 2) {
5563 SpMMMicrokernelTester()
5564 .mr(16)
5565 .nr(4)
5566 .m(32)
5567 .n(n)
5568 .k(k)
5569 .output_stride(37)
5570 .sparsity(0.0f)
5571 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5572 }
5573 }
5574 }
5575
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,qmin)5576 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, qmin) {
5577 TEST_REQUIRES_ARM_NEON_FMA;
5578 for (uint32_t n = 1; n < 20; n += 5) {
5579 for (size_t k = 1; k <= 5; k += 2) {
5580 SpMMMicrokernelTester()
5581 .mr(16)
5582 .nr(4)
5583 .m(32)
5584 .n(n)
5585 .k(k)
5586 .sparsity(0.0f)
5587 .qmin(128)
5588 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5589 }
5590 }
5591 }
5592
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,qmax)5593 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, qmax) {
5594 TEST_REQUIRES_ARM_NEON_FMA;
5595 for (uint32_t n = 1; n < 20; n += 5) {
5596 for (size_t k = 1; k <= 5; k += 2) {
5597 SpMMMicrokernelTester()
5598 .mr(16)
5599 .nr(4)
5600 .m(32)
5601 .n(n)
5602 .k(k)
5603 .sparsity(0.0f)
5604 .qmax(128)
5605 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5606 }
5607 }
5608 }
5609
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,half_sparse)5610 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, half_sparse) {
5611 TEST_REQUIRES_ARM_NEON_FMA;
5612 for (uint32_t n = 1; n < 20; n += 5) {
5613 for (size_t k = 1; k <= 5; k += 2) {
5614 SpMMMicrokernelTester()
5615 .mr(16)
5616 .nr(4)
5617 .m(32)
5618 .n(n)
5619 .k(k)
5620 .sparsity(0.5f)
5621 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5622 }
5623 }
5624 }
5625
TEST(F32_SPMM_MINMAX_16X4__NEONFMA,zero_weights)5626 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, zero_weights) {
5627 TEST_REQUIRES_ARM_NEON_FMA;
5628 for (uint32_t n = 1; n < 20; n += 5) {
5629 for (size_t k = 1; k <= 5; k += 2) {
5630 SpMMMicrokernelTester()
5631 .mr(16)
5632 .nr(4)
5633 .m(32)
5634 .n(n)
5635 .k(k)
5636 .sparsity(1.0f)
5637 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma, xnn_init_f32_minmax_scalar_params);
5638 }
5639 }
5640 }
5641 #endif // XNN_ARCH_ARM64
5642
5643
5644 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X1__NEON,k_eq_1)5645 TEST(F32_SPMM_MINMAX_32X1__NEON, k_eq_1) {
5646 TEST_REQUIRES_ARM_NEON;
5647 SpMMMicrokernelTester()
5648 .mr(32)
5649 .nr(1)
5650 .m(32)
5651 .n(1)
5652 .k(1)
5653 .sparsity(0.0f)
5654 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5655 }
5656
TEST(F32_SPMM_MINMAX_32X1__NEON,k_gt_1)5657 TEST(F32_SPMM_MINMAX_32X1__NEON, k_gt_1) {
5658 TEST_REQUIRES_ARM_NEON;
5659 for (size_t k = 2; k < 10; k++) {
5660 SpMMMicrokernelTester()
5661 .mr(32)
5662 .nr(1)
5663 .m(32)
5664 .n(1)
5665 .k(k)
5666 .sparsity(0.0f)
5667 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5668 }
5669 }
5670
TEST(F32_SPMM_MINMAX_32X1__NEON,n_gt_1)5671 TEST(F32_SPMM_MINMAX_32X1__NEON, n_gt_1) {
5672 TEST_REQUIRES_ARM_NEON;
5673 for (uint32_t n = 2; n < 10; n++) {
5674 for (size_t k = 1; k <= 5; k += 2) {
5675 SpMMMicrokernelTester()
5676 .mr(32)
5677 .nr(1)
5678 .m(32)
5679 .n(n)
5680 .k(k)
5681 .sparsity(0.0f)
5682 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5683 }
5684 }
5685 }
5686
TEST(F32_SPMM_MINMAX_32X1__NEON,m_lt_32)5687 TEST(F32_SPMM_MINMAX_32X1__NEON, m_lt_32) {
5688 TEST_REQUIRES_ARM_NEON;
5689 for (uint32_t m = 1; m < 32; m++) {
5690 for (uint32_t n = 1; n < 10; n += 2) {
5691 for (size_t k = 1; k <= 5; k += 2) {
5692 SpMMMicrokernelTester()
5693 .mr(32)
5694 .nr(1)
5695 .m(m)
5696 .n(n)
5697 .k(k)
5698 .sparsity(0.0f)
5699 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5700 }
5701 }
5702 }
5703 }
5704
TEST(F32_SPMM_MINMAX_32X1__NEON,m_div_32)5705 TEST(F32_SPMM_MINMAX_32X1__NEON, m_div_32) {
5706 TEST_REQUIRES_ARM_NEON;
5707 for (uint32_t m = 64; m <= 96; m += 32) {
5708 for (uint32_t n = 1; n < 10; n += 2) {
5709 for (size_t k = 1; k <= 5; k += 2) {
5710 SpMMMicrokernelTester()
5711 .mr(32)
5712 .nr(1)
5713 .m(m)
5714 .n(n)
5715 .k(k)
5716 .sparsity(0.0f)
5717 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5718 }
5719 }
5720 }
5721 }
5722
TEST(F32_SPMM_MINMAX_32X1__NEON,m_gt_32)5723 TEST(F32_SPMM_MINMAX_32X1__NEON, m_gt_32) {
5724 TEST_REQUIRES_ARM_NEON;
5725 for (uint32_t m = 33; m < 64; m++) {
5726 for (uint32_t n = 1; n < 10; n += 2) {
5727 for (size_t k = 1; k <= 5; k += 2) {
5728 SpMMMicrokernelTester()
5729 .mr(32)
5730 .nr(1)
5731 .m(m)
5732 .n(n)
5733 .k(k)
5734 .sparsity(0.0f)
5735 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5736 }
5737 }
5738 }
5739 }
5740
TEST(F32_SPMM_MINMAX_32X1__NEON,output_stride)5741 TEST(F32_SPMM_MINMAX_32X1__NEON, output_stride) {
5742 TEST_REQUIRES_ARM_NEON;
5743 for (uint32_t n = 1; n < 10; n += 2) {
5744 for (size_t k = 1; k <= 5; k += 2) {
5745 SpMMMicrokernelTester()
5746 .mr(32)
5747 .nr(1)
5748 .m(64)
5749 .n(n)
5750 .k(k)
5751 .output_stride(67)
5752 .sparsity(0.0f)
5753 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5754 }
5755 }
5756 }
5757
TEST(F32_SPMM_MINMAX_32X1__NEON,qmin)5758 TEST(F32_SPMM_MINMAX_32X1__NEON, qmin) {
5759 TEST_REQUIRES_ARM_NEON;
5760 for (uint32_t n = 1; n < 10; n += 2) {
5761 for (size_t k = 1; k <= 5; k += 2) {
5762 SpMMMicrokernelTester()
5763 .mr(32)
5764 .nr(1)
5765 .m(64)
5766 .n(n)
5767 .k(k)
5768 .sparsity(0.0f)
5769 .qmin(128)
5770 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5771 }
5772 }
5773 }
5774
TEST(F32_SPMM_MINMAX_32X1__NEON,qmax)5775 TEST(F32_SPMM_MINMAX_32X1__NEON, qmax) {
5776 TEST_REQUIRES_ARM_NEON;
5777 for (uint32_t n = 1; n < 10; n += 2) {
5778 for (size_t k = 1; k <= 5; k += 2) {
5779 SpMMMicrokernelTester()
5780 .mr(32)
5781 .nr(1)
5782 .m(64)
5783 .n(n)
5784 .k(k)
5785 .sparsity(0.0f)
5786 .qmax(128)
5787 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5788 }
5789 }
5790 }
5791
TEST(F32_SPMM_MINMAX_32X1__NEON,half_sparse)5792 TEST(F32_SPMM_MINMAX_32X1__NEON, half_sparse) {
5793 TEST_REQUIRES_ARM_NEON;
5794 for (uint32_t n = 1; n < 10; n += 2) {
5795 for (size_t k = 1; k <= 5; k += 2) {
5796 SpMMMicrokernelTester()
5797 .mr(32)
5798 .nr(1)
5799 .m(64)
5800 .n(n)
5801 .k(k)
5802 .sparsity(0.5f)
5803 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5804 }
5805 }
5806 }
5807
TEST(F32_SPMM_MINMAX_32X1__NEON,zero_weights)5808 TEST(F32_SPMM_MINMAX_32X1__NEON, zero_weights) {
5809 TEST_REQUIRES_ARM_NEON;
5810 for (uint32_t n = 1; n < 10; n += 2) {
5811 for (size_t k = 1; k <= 5; k += 2) {
5812 SpMMMicrokernelTester()
5813 .mr(32)
5814 .nr(1)
5815 .m(64)
5816 .n(n)
5817 .k(k)
5818 .sparsity(1.0f)
5819 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon, xnn_init_f32_minmax_scalar_params);
5820 }
5821 }
5822 }
5823 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5824
5825
5826 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,k_eq_1)5827 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, k_eq_1) {
5828 TEST_REQUIRES_ARM_NEON;
5829 SpMMMicrokernelTester()
5830 .mr(32)
5831 .nr(1)
5832 .m(32)
5833 .n(1)
5834 .k(1)
5835 .sparsity(0.0f)
5836 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5837 }
5838
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,k_gt_1)5839 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, k_gt_1) {
5840 TEST_REQUIRES_ARM_NEON;
5841 for (size_t k = 2; k < 10; k++) {
5842 SpMMMicrokernelTester()
5843 .mr(32)
5844 .nr(1)
5845 .m(32)
5846 .n(1)
5847 .k(k)
5848 .sparsity(0.0f)
5849 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5850 }
5851 }
5852
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,n_gt_1)5853 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, n_gt_1) {
5854 TEST_REQUIRES_ARM_NEON;
5855 for (uint32_t n = 2; n < 10; n++) {
5856 for (size_t k = 1; k <= 5; k += 2) {
5857 SpMMMicrokernelTester()
5858 .mr(32)
5859 .nr(1)
5860 .m(32)
5861 .n(n)
5862 .k(k)
5863 .sparsity(0.0f)
5864 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5865 }
5866 }
5867 }
5868
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,m_lt_32)5869 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, m_lt_32) {
5870 TEST_REQUIRES_ARM_NEON;
5871 for (uint32_t m = 1; m < 32; m++) {
5872 for (uint32_t n = 1; n < 10; n += 2) {
5873 for (size_t k = 1; k <= 5; k += 2) {
5874 SpMMMicrokernelTester()
5875 .mr(32)
5876 .nr(1)
5877 .m(m)
5878 .n(n)
5879 .k(k)
5880 .sparsity(0.0f)
5881 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5882 }
5883 }
5884 }
5885 }
5886
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,m_div_32)5887 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, m_div_32) {
5888 TEST_REQUIRES_ARM_NEON;
5889 for (uint32_t m = 64; m <= 96; m += 32) {
5890 for (uint32_t n = 1; n < 10; n += 2) {
5891 for (size_t k = 1; k <= 5; k += 2) {
5892 SpMMMicrokernelTester()
5893 .mr(32)
5894 .nr(1)
5895 .m(m)
5896 .n(n)
5897 .k(k)
5898 .sparsity(0.0f)
5899 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5900 }
5901 }
5902 }
5903 }
5904
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,m_gt_32)5905 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, m_gt_32) {
5906 TEST_REQUIRES_ARM_NEON;
5907 for (uint32_t m = 33; m < 64; m++) {
5908 for (uint32_t n = 1; n < 10; n += 2) {
5909 for (size_t k = 1; k <= 5; k += 2) {
5910 SpMMMicrokernelTester()
5911 .mr(32)
5912 .nr(1)
5913 .m(m)
5914 .n(n)
5915 .k(k)
5916 .sparsity(0.0f)
5917 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5918 }
5919 }
5920 }
5921 }
5922
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,output_stride)5923 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, output_stride) {
5924 TEST_REQUIRES_ARM_NEON;
5925 for (uint32_t n = 1; n < 10; n += 2) {
5926 for (size_t k = 1; k <= 5; k += 2) {
5927 SpMMMicrokernelTester()
5928 .mr(32)
5929 .nr(1)
5930 .m(64)
5931 .n(n)
5932 .k(k)
5933 .output_stride(67)
5934 .sparsity(0.0f)
5935 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5936 }
5937 }
5938 }
5939
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,qmin)5940 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, qmin) {
5941 TEST_REQUIRES_ARM_NEON;
5942 for (uint32_t n = 1; n < 10; n += 2) {
5943 for (size_t k = 1; k <= 5; k += 2) {
5944 SpMMMicrokernelTester()
5945 .mr(32)
5946 .nr(1)
5947 .m(64)
5948 .n(n)
5949 .k(k)
5950 .sparsity(0.0f)
5951 .qmin(128)
5952 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5953 }
5954 }
5955 }
5956
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,qmax)5957 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, qmax) {
5958 TEST_REQUIRES_ARM_NEON;
5959 for (uint32_t n = 1; n < 10; n += 2) {
5960 for (size_t k = 1; k <= 5; k += 2) {
5961 SpMMMicrokernelTester()
5962 .mr(32)
5963 .nr(1)
5964 .m(64)
5965 .n(n)
5966 .k(k)
5967 .sparsity(0.0f)
5968 .qmax(128)
5969 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5970 }
5971 }
5972 }
5973
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,half_sparse)5974 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, half_sparse) {
5975 TEST_REQUIRES_ARM_NEON;
5976 for (uint32_t n = 1; n < 10; n += 2) {
5977 for (size_t k = 1; k <= 5; k += 2) {
5978 SpMMMicrokernelTester()
5979 .mr(32)
5980 .nr(1)
5981 .m(64)
5982 .n(n)
5983 .k(k)
5984 .sparsity(0.5f)
5985 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
5986 }
5987 }
5988 }
5989
TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED,zero_weights)5990 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, zero_weights) {
5991 TEST_REQUIRES_ARM_NEON;
5992 for (uint32_t n = 1; n < 10; n += 2) {
5993 for (size_t k = 1; k <= 5; k += 2) {
5994 SpMMMicrokernelTester()
5995 .mr(32)
5996 .nr(1)
5997 .m(64)
5998 .n(n)
5999 .k(k)
6000 .sparsity(1.0f)
6001 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, xnn_init_f32_minmax_scalar_params);
6002 }
6003 }
6004 }
6005 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6006
6007
6008 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,k_eq_2)6009 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_eq_2) {
6010 TEST_REQUIRES_ARM_NEON;
6011 SpMMMicrokernelTester()
6012 .mr(32)
6013 .nr(1)
6014 .m(32)
6015 .n(1)
6016 .k(2)
6017 .sparsity(0.0f)
6018 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6019 }
6020
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,k_lt_2)6021 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_lt_2) {
6022 TEST_REQUIRES_ARM_NEON;
6023 for (size_t k = 1; k < 2; k++) {
6024 SpMMMicrokernelTester()
6025 .mr(32)
6026 .nr(1)
6027 .m(32)
6028 .n(1)
6029 .k(k)
6030 .sparsity(0.0f)
6031 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6032 }
6033 }
6034
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,k_gt_2)6035 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_gt_2) {
6036 TEST_REQUIRES_ARM_NEON;
6037 for (size_t k = 3; k < 4; k++) {
6038 SpMMMicrokernelTester()
6039 .mr(32)
6040 .nr(1)
6041 .m(32)
6042 .n(1)
6043 .k(k)
6044 .sparsity(0.0f)
6045 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6046 }
6047 }
6048
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,k_div_2)6049 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_div_2) {
6050 TEST_REQUIRES_ARM_NEON;
6051 for (size_t k = 4; k <= 20; k += 2) {
6052 SpMMMicrokernelTester()
6053 .mr(32)
6054 .nr(1)
6055 .m(32)
6056 .n(1)
6057 .k(k)
6058 .sparsity(0.0f)
6059 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6060 }
6061 }
6062
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,n_gt_1)6063 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, n_gt_1) {
6064 TEST_REQUIRES_ARM_NEON;
6065 for (uint32_t n = 2; n < 10; n++) {
6066 for (size_t k = 1; k <= 10; k += 3) {
6067 SpMMMicrokernelTester()
6068 .mr(32)
6069 .nr(1)
6070 .m(32)
6071 .n(n)
6072 .k(k)
6073 .sparsity(0.0f)
6074 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6075 }
6076 }
6077 }
6078
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,m_lt_32)6079 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, m_lt_32) {
6080 TEST_REQUIRES_ARM_NEON;
6081 for (uint32_t m = 1; m < 32; m++) {
6082 for (uint32_t n = 1; n < 10; n += 2) {
6083 for (size_t k = 1; k <= 10; k += 3) {
6084 SpMMMicrokernelTester()
6085 .mr(32)
6086 .nr(1)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .sparsity(0.0f)
6091 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6092 }
6093 }
6094 }
6095 }
6096
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,m_div_32)6097 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, m_div_32) {
6098 TEST_REQUIRES_ARM_NEON;
6099 for (uint32_t m = 64; m <= 96; m += 32) {
6100 for (uint32_t n = 1; n < 10; n += 2) {
6101 for (size_t k = 1; k <= 10; k += 3) {
6102 SpMMMicrokernelTester()
6103 .mr(32)
6104 .nr(1)
6105 .m(m)
6106 .n(n)
6107 .k(k)
6108 .sparsity(0.0f)
6109 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6110 }
6111 }
6112 }
6113 }
6114
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,m_gt_32)6115 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, m_gt_32) {
6116 TEST_REQUIRES_ARM_NEON;
6117 for (uint32_t m = 33; m < 64; m++) {
6118 for (uint32_t n = 1; n < 10; n += 2) {
6119 for (size_t k = 1; k <= 10; k += 3) {
6120 SpMMMicrokernelTester()
6121 .mr(32)
6122 .nr(1)
6123 .m(m)
6124 .n(n)
6125 .k(k)
6126 .sparsity(0.0f)
6127 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6128 }
6129 }
6130 }
6131 }
6132
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,output_stride)6133 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, output_stride) {
6134 TEST_REQUIRES_ARM_NEON;
6135 for (uint32_t n = 1; n < 10; n += 2) {
6136 for (size_t k = 1; k <= 10; k += 3) {
6137 SpMMMicrokernelTester()
6138 .mr(32)
6139 .nr(1)
6140 .m(64)
6141 .n(n)
6142 .k(k)
6143 .output_stride(67)
6144 .sparsity(0.0f)
6145 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6146 }
6147 }
6148 }
6149
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,qmin)6150 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, qmin) {
6151 TEST_REQUIRES_ARM_NEON;
6152 for (uint32_t n = 1; n < 10; n += 2) {
6153 for (size_t k = 1; k <= 10; k += 3) {
6154 SpMMMicrokernelTester()
6155 .mr(32)
6156 .nr(1)
6157 .m(64)
6158 .n(n)
6159 .k(k)
6160 .sparsity(0.0f)
6161 .qmin(128)
6162 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6163 }
6164 }
6165 }
6166
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,qmax)6167 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, qmax) {
6168 TEST_REQUIRES_ARM_NEON;
6169 for (uint32_t n = 1; n < 10; n += 2) {
6170 for (size_t k = 1; k <= 10; k += 3) {
6171 SpMMMicrokernelTester()
6172 .mr(32)
6173 .nr(1)
6174 .m(64)
6175 .n(n)
6176 .k(k)
6177 .sparsity(0.0f)
6178 .qmax(128)
6179 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6180 }
6181 }
6182 }
6183
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,half_sparse)6184 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, half_sparse) {
6185 TEST_REQUIRES_ARM_NEON;
6186 for (uint32_t n = 1; n < 10; n += 2) {
6187 for (size_t k = 1; k <= 10; k += 3) {
6188 SpMMMicrokernelTester()
6189 .mr(32)
6190 .nr(1)
6191 .m(64)
6192 .n(n)
6193 .k(k)
6194 .sparsity(0.5f)
6195 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6196 }
6197 }
6198 }
6199
TEST(F32_SPMM_MINMAX_32X1__NEON_X2,zero_weights)6200 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, zero_weights) {
6201 TEST_REQUIRES_ARM_NEON;
6202 for (uint32_t n = 1; n < 10; n += 2) {
6203 for (size_t k = 1; k <= 10; k += 3) {
6204 SpMMMicrokernelTester()
6205 .mr(32)
6206 .nr(1)
6207 .m(64)
6208 .n(n)
6209 .k(k)
6210 .sparsity(1.0f)
6211 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, xnn_init_f32_minmax_scalar_params);
6212 }
6213 }
6214 }
6215 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6216
6217
6218 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,k_eq_1)6219 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, k_eq_1) {
6220 TEST_REQUIRES_ARM_NEON_FMA;
6221 SpMMMicrokernelTester()
6222 .mr(32)
6223 .nr(1)
6224 .m(32)
6225 .n(1)
6226 .k(1)
6227 .sparsity(0.0f)
6228 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6229 }
6230
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,k_gt_1)6231 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, k_gt_1) {
6232 TEST_REQUIRES_ARM_NEON_FMA;
6233 for (size_t k = 2; k < 10; k++) {
6234 SpMMMicrokernelTester()
6235 .mr(32)
6236 .nr(1)
6237 .m(32)
6238 .n(1)
6239 .k(k)
6240 .sparsity(0.0f)
6241 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6242 }
6243 }
6244
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,n_gt_1)6245 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, n_gt_1) {
6246 TEST_REQUIRES_ARM_NEON_FMA;
6247 for (uint32_t n = 2; n < 10; n++) {
6248 for (size_t k = 1; k <= 5; k += 2) {
6249 SpMMMicrokernelTester()
6250 .mr(32)
6251 .nr(1)
6252 .m(32)
6253 .n(n)
6254 .k(k)
6255 .sparsity(0.0f)
6256 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6257 }
6258 }
6259 }
6260
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,m_lt_32)6261 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, m_lt_32) {
6262 TEST_REQUIRES_ARM_NEON_FMA;
6263 for (uint32_t m = 1; m < 32; m++) {
6264 for (uint32_t n = 1; n < 10; n += 2) {
6265 for (size_t k = 1; k <= 5; k += 2) {
6266 SpMMMicrokernelTester()
6267 .mr(32)
6268 .nr(1)
6269 .m(m)
6270 .n(n)
6271 .k(k)
6272 .sparsity(0.0f)
6273 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6274 }
6275 }
6276 }
6277 }
6278
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,m_div_32)6279 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, m_div_32) {
6280 TEST_REQUIRES_ARM_NEON_FMA;
6281 for (uint32_t m = 64; m <= 96; m += 32) {
6282 for (uint32_t n = 1; n < 10; n += 2) {
6283 for (size_t k = 1; k <= 5; k += 2) {
6284 SpMMMicrokernelTester()
6285 .mr(32)
6286 .nr(1)
6287 .m(m)
6288 .n(n)
6289 .k(k)
6290 .sparsity(0.0f)
6291 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6292 }
6293 }
6294 }
6295 }
6296
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,m_gt_32)6297 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, m_gt_32) {
6298 TEST_REQUIRES_ARM_NEON_FMA;
6299 for (uint32_t m = 33; m < 64; m++) {
6300 for (uint32_t n = 1; n < 10; n += 2) {
6301 for (size_t k = 1; k <= 5; k += 2) {
6302 SpMMMicrokernelTester()
6303 .mr(32)
6304 .nr(1)
6305 .m(m)
6306 .n(n)
6307 .k(k)
6308 .sparsity(0.0f)
6309 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6310 }
6311 }
6312 }
6313 }
6314
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,output_stride)6315 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, output_stride) {
6316 TEST_REQUIRES_ARM_NEON_FMA;
6317 for (uint32_t n = 1; n < 10; n += 2) {
6318 for (size_t k = 1; k <= 5; k += 2) {
6319 SpMMMicrokernelTester()
6320 .mr(32)
6321 .nr(1)
6322 .m(64)
6323 .n(n)
6324 .k(k)
6325 .output_stride(67)
6326 .sparsity(0.0f)
6327 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6328 }
6329 }
6330 }
6331
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,qmin)6332 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, qmin) {
6333 TEST_REQUIRES_ARM_NEON_FMA;
6334 for (uint32_t n = 1; n < 10; n += 2) {
6335 for (size_t k = 1; k <= 5; k += 2) {
6336 SpMMMicrokernelTester()
6337 .mr(32)
6338 .nr(1)
6339 .m(64)
6340 .n(n)
6341 .k(k)
6342 .sparsity(0.0f)
6343 .qmin(128)
6344 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6345 }
6346 }
6347 }
6348
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,qmax)6349 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, qmax) {
6350 TEST_REQUIRES_ARM_NEON_FMA;
6351 for (uint32_t n = 1; n < 10; n += 2) {
6352 for (size_t k = 1; k <= 5; k += 2) {
6353 SpMMMicrokernelTester()
6354 .mr(32)
6355 .nr(1)
6356 .m(64)
6357 .n(n)
6358 .k(k)
6359 .sparsity(0.0f)
6360 .qmax(128)
6361 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6362 }
6363 }
6364 }
6365
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,half_sparse)6366 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, half_sparse) {
6367 TEST_REQUIRES_ARM_NEON_FMA;
6368 for (uint32_t n = 1; n < 10; n += 2) {
6369 for (size_t k = 1; k <= 5; k += 2) {
6370 SpMMMicrokernelTester()
6371 .mr(32)
6372 .nr(1)
6373 .m(64)
6374 .n(n)
6375 .k(k)
6376 .sparsity(0.5f)
6377 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6378 }
6379 }
6380 }
6381
TEST(F32_SPMM_MINMAX_32X1__NEONFMA,zero_weights)6382 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, zero_weights) {
6383 TEST_REQUIRES_ARM_NEON_FMA;
6384 for (uint32_t n = 1; n < 10; n += 2) {
6385 for (size_t k = 1; k <= 5; k += 2) {
6386 SpMMMicrokernelTester()
6387 .mr(32)
6388 .nr(1)
6389 .m(64)
6390 .n(n)
6391 .k(k)
6392 .sparsity(1.0f)
6393 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma, xnn_init_f32_minmax_scalar_params);
6394 }
6395 }
6396 }
6397 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6398
6399
6400 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,k_eq_1)6401 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, k_eq_1) {
6402 TEST_REQUIRES_ARM_NEON_FMA;
6403 SpMMMicrokernelTester()
6404 .mr(32)
6405 .nr(1)
6406 .m(32)
6407 .n(1)
6408 .k(1)
6409 .sparsity(0.0f)
6410 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6411 }
6412
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,k_gt_1)6413 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, k_gt_1) {
6414 TEST_REQUIRES_ARM_NEON_FMA;
6415 for (size_t k = 2; k < 10; k++) {
6416 SpMMMicrokernelTester()
6417 .mr(32)
6418 .nr(1)
6419 .m(32)
6420 .n(1)
6421 .k(k)
6422 .sparsity(0.0f)
6423 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6424 }
6425 }
6426
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,n_gt_1)6427 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, n_gt_1) {
6428 TEST_REQUIRES_ARM_NEON_FMA;
6429 for (uint32_t n = 2; n < 10; n++) {
6430 for (size_t k = 1; k <= 5; k += 2) {
6431 SpMMMicrokernelTester()
6432 .mr(32)
6433 .nr(1)
6434 .m(32)
6435 .n(n)
6436 .k(k)
6437 .sparsity(0.0f)
6438 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6439 }
6440 }
6441 }
6442
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,m_lt_32)6443 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, m_lt_32) {
6444 TEST_REQUIRES_ARM_NEON_FMA;
6445 for (uint32_t m = 1; m < 32; m++) {
6446 for (uint32_t n = 1; n < 10; n += 2) {
6447 for (size_t k = 1; k <= 5; k += 2) {
6448 SpMMMicrokernelTester()
6449 .mr(32)
6450 .nr(1)
6451 .m(m)
6452 .n(n)
6453 .k(k)
6454 .sparsity(0.0f)
6455 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6456 }
6457 }
6458 }
6459 }
6460
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,m_div_32)6461 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, m_div_32) {
6462 TEST_REQUIRES_ARM_NEON_FMA;
6463 for (uint32_t m = 64; m <= 96; m += 32) {
6464 for (uint32_t n = 1; n < 10; n += 2) {
6465 for (size_t k = 1; k <= 5; k += 2) {
6466 SpMMMicrokernelTester()
6467 .mr(32)
6468 .nr(1)
6469 .m(m)
6470 .n(n)
6471 .k(k)
6472 .sparsity(0.0f)
6473 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6474 }
6475 }
6476 }
6477 }
6478
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,m_gt_32)6479 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, m_gt_32) {
6480 TEST_REQUIRES_ARM_NEON_FMA;
6481 for (uint32_t m = 33; m < 64; m++) {
6482 for (uint32_t n = 1; n < 10; n += 2) {
6483 for (size_t k = 1; k <= 5; k += 2) {
6484 SpMMMicrokernelTester()
6485 .mr(32)
6486 .nr(1)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .sparsity(0.0f)
6491 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6492 }
6493 }
6494 }
6495 }
6496
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,output_stride)6497 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, output_stride) {
6498 TEST_REQUIRES_ARM_NEON_FMA;
6499 for (uint32_t n = 1; n < 10; n += 2) {
6500 for (size_t k = 1; k <= 5; k += 2) {
6501 SpMMMicrokernelTester()
6502 .mr(32)
6503 .nr(1)
6504 .m(64)
6505 .n(n)
6506 .k(k)
6507 .output_stride(67)
6508 .sparsity(0.0f)
6509 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6510 }
6511 }
6512 }
6513
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,qmin)6514 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, qmin) {
6515 TEST_REQUIRES_ARM_NEON_FMA;
6516 for (uint32_t n = 1; n < 10; n += 2) {
6517 for (size_t k = 1; k <= 5; k += 2) {
6518 SpMMMicrokernelTester()
6519 .mr(32)
6520 .nr(1)
6521 .m(64)
6522 .n(n)
6523 .k(k)
6524 .sparsity(0.0f)
6525 .qmin(128)
6526 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6527 }
6528 }
6529 }
6530
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,qmax)6531 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, qmax) {
6532 TEST_REQUIRES_ARM_NEON_FMA;
6533 for (uint32_t n = 1; n < 10; n += 2) {
6534 for (size_t k = 1; k <= 5; k += 2) {
6535 SpMMMicrokernelTester()
6536 .mr(32)
6537 .nr(1)
6538 .m(64)
6539 .n(n)
6540 .k(k)
6541 .sparsity(0.0f)
6542 .qmax(128)
6543 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6544 }
6545 }
6546 }
6547
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,half_sparse)6548 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, half_sparse) {
6549 TEST_REQUIRES_ARM_NEON_FMA;
6550 for (uint32_t n = 1; n < 10; n += 2) {
6551 for (size_t k = 1; k <= 5; k += 2) {
6552 SpMMMicrokernelTester()
6553 .mr(32)
6554 .nr(1)
6555 .m(64)
6556 .n(n)
6557 .k(k)
6558 .sparsity(0.5f)
6559 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6560 }
6561 }
6562 }
6563
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED,zero_weights)6564 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, zero_weights) {
6565 TEST_REQUIRES_ARM_NEON_FMA;
6566 for (uint32_t n = 1; n < 10; n += 2) {
6567 for (size_t k = 1; k <= 5; k += 2) {
6568 SpMMMicrokernelTester()
6569 .mr(32)
6570 .nr(1)
6571 .m(64)
6572 .n(n)
6573 .k(k)
6574 .sparsity(1.0f)
6575 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, xnn_init_f32_minmax_scalar_params);
6576 }
6577 }
6578 }
6579 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6580
6581
6582 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,k_eq_2)6583 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_eq_2) {
6584 TEST_REQUIRES_ARM_NEON_FMA;
6585 SpMMMicrokernelTester()
6586 .mr(32)
6587 .nr(1)
6588 .m(32)
6589 .n(1)
6590 .k(2)
6591 .sparsity(0.0f)
6592 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6593 }
6594
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,k_lt_2)6595 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_lt_2) {
6596 TEST_REQUIRES_ARM_NEON_FMA;
6597 for (size_t k = 1; k < 2; k++) {
6598 SpMMMicrokernelTester()
6599 .mr(32)
6600 .nr(1)
6601 .m(32)
6602 .n(1)
6603 .k(k)
6604 .sparsity(0.0f)
6605 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6606 }
6607 }
6608
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,k_gt_2)6609 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_gt_2) {
6610 TEST_REQUIRES_ARM_NEON_FMA;
6611 for (size_t k = 3; k < 4; k++) {
6612 SpMMMicrokernelTester()
6613 .mr(32)
6614 .nr(1)
6615 .m(32)
6616 .n(1)
6617 .k(k)
6618 .sparsity(0.0f)
6619 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6620 }
6621 }
6622
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,k_div_2)6623 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_div_2) {
6624 TEST_REQUIRES_ARM_NEON_FMA;
6625 for (size_t k = 4; k <= 20; k += 2) {
6626 SpMMMicrokernelTester()
6627 .mr(32)
6628 .nr(1)
6629 .m(32)
6630 .n(1)
6631 .k(k)
6632 .sparsity(0.0f)
6633 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6634 }
6635 }
6636
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,n_gt_1)6637 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, n_gt_1) {
6638 TEST_REQUIRES_ARM_NEON_FMA;
6639 for (uint32_t n = 2; n < 10; n++) {
6640 for (size_t k = 1; k <= 10; k += 3) {
6641 SpMMMicrokernelTester()
6642 .mr(32)
6643 .nr(1)
6644 .m(32)
6645 .n(n)
6646 .k(k)
6647 .sparsity(0.0f)
6648 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6649 }
6650 }
6651 }
6652
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,m_lt_32)6653 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, m_lt_32) {
6654 TEST_REQUIRES_ARM_NEON_FMA;
6655 for (uint32_t m = 1; m < 32; m++) {
6656 for (uint32_t n = 1; n < 10; n += 2) {
6657 for (size_t k = 1; k <= 10; k += 3) {
6658 SpMMMicrokernelTester()
6659 .mr(32)
6660 .nr(1)
6661 .m(m)
6662 .n(n)
6663 .k(k)
6664 .sparsity(0.0f)
6665 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6666 }
6667 }
6668 }
6669 }
6670
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,m_div_32)6671 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, m_div_32) {
6672 TEST_REQUIRES_ARM_NEON_FMA;
6673 for (uint32_t m = 64; m <= 96; m += 32) {
6674 for (uint32_t n = 1; n < 10; n += 2) {
6675 for (size_t k = 1; k <= 10; k += 3) {
6676 SpMMMicrokernelTester()
6677 .mr(32)
6678 .nr(1)
6679 .m(m)
6680 .n(n)
6681 .k(k)
6682 .sparsity(0.0f)
6683 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6684 }
6685 }
6686 }
6687 }
6688
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,m_gt_32)6689 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, m_gt_32) {
6690 TEST_REQUIRES_ARM_NEON_FMA;
6691 for (uint32_t m = 33; m < 64; m++) {
6692 for (uint32_t n = 1; n < 10; n += 2) {
6693 for (size_t k = 1; k <= 10; k += 3) {
6694 SpMMMicrokernelTester()
6695 .mr(32)
6696 .nr(1)
6697 .m(m)
6698 .n(n)
6699 .k(k)
6700 .sparsity(0.0f)
6701 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6702 }
6703 }
6704 }
6705 }
6706
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,output_stride)6707 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, output_stride) {
6708 TEST_REQUIRES_ARM_NEON_FMA;
6709 for (uint32_t n = 1; n < 10; n += 2) {
6710 for (size_t k = 1; k <= 10; k += 3) {
6711 SpMMMicrokernelTester()
6712 .mr(32)
6713 .nr(1)
6714 .m(64)
6715 .n(n)
6716 .k(k)
6717 .output_stride(67)
6718 .sparsity(0.0f)
6719 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6720 }
6721 }
6722 }
6723
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,qmin)6724 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, qmin) {
6725 TEST_REQUIRES_ARM_NEON_FMA;
6726 for (uint32_t n = 1; n < 10; n += 2) {
6727 for (size_t k = 1; k <= 10; k += 3) {
6728 SpMMMicrokernelTester()
6729 .mr(32)
6730 .nr(1)
6731 .m(64)
6732 .n(n)
6733 .k(k)
6734 .sparsity(0.0f)
6735 .qmin(128)
6736 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6737 }
6738 }
6739 }
6740
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,qmax)6741 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, qmax) {
6742 TEST_REQUIRES_ARM_NEON_FMA;
6743 for (uint32_t n = 1; n < 10; n += 2) {
6744 for (size_t k = 1; k <= 10; k += 3) {
6745 SpMMMicrokernelTester()
6746 .mr(32)
6747 .nr(1)
6748 .m(64)
6749 .n(n)
6750 .k(k)
6751 .sparsity(0.0f)
6752 .qmax(128)
6753 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6754 }
6755 }
6756 }
6757
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,half_sparse)6758 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, half_sparse) {
6759 TEST_REQUIRES_ARM_NEON_FMA;
6760 for (uint32_t n = 1; n < 10; n += 2) {
6761 for (size_t k = 1; k <= 10; k += 3) {
6762 SpMMMicrokernelTester()
6763 .mr(32)
6764 .nr(1)
6765 .m(64)
6766 .n(n)
6767 .k(k)
6768 .sparsity(0.5f)
6769 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6770 }
6771 }
6772 }
6773
TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2,zero_weights)6774 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, zero_weights) {
6775 TEST_REQUIRES_ARM_NEON_FMA;
6776 for (uint32_t n = 1; n < 10; n += 2) {
6777 for (size_t k = 1; k <= 10; k += 3) {
6778 SpMMMicrokernelTester()
6779 .mr(32)
6780 .nr(1)
6781 .m(64)
6782 .n(n)
6783 .k(k)
6784 .sparsity(1.0f)
6785 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, xnn_init_f32_minmax_scalar_params);
6786 }
6787 }
6788 }
6789 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6790
6791
6792 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,k_eq_1)6793 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_eq_1) {
6794 TEST_REQUIRES_ARM_NEON_FMA;
6795 SpMMMicrokernelTester()
6796 .mr(32)
6797 .nr(2)
6798 .m(32)
6799 .n(2)
6800 .k(1)
6801 .sparsity(0.0f)
6802 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6803 }
6804
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,k_eq_1_subtile)6805 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_eq_1_subtile) {
6806 TEST_REQUIRES_ARM_NEON_FMA;
6807 for (uint32_t n = 1; n <= 2; n++) {
6808 SpMMMicrokernelTester()
6809 .mr(32)
6810 .nr(2)
6811 .m(32)
6812 .n(n)
6813 .k(1)
6814 .sparsity(0.0f)
6815 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6816 }
6817 }
6818
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,k_gt_1)6819 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_gt_1) {
6820 TEST_REQUIRES_ARM_NEON_FMA;
6821 for (size_t k = 2; k < 10; k++) {
6822 SpMMMicrokernelTester()
6823 .mr(32)
6824 .nr(2)
6825 .m(32)
6826 .n(2)
6827 .k(k)
6828 .sparsity(0.0f)
6829 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6830 }
6831 }
6832
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,k_gt_1_subtile)6833 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_gt_1_subtile) {
6834 TEST_REQUIRES_ARM_NEON_FMA;
6835 for (size_t k = 2; k < 10; k++) {
6836 for (uint32_t n = 1; n <= 2; n++) {
6837 SpMMMicrokernelTester()
6838 .mr(32)
6839 .nr(2)
6840 .m(32)
6841 .n(n)
6842 .k(k)
6843 .sparsity(0.0f)
6844 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6845 }
6846 }
6847 }
6848
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,n_gt_2)6849 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, n_gt_2) {
6850 TEST_REQUIRES_ARM_NEON_FMA;
6851 for (uint32_t n = 3; n < 10; n++) {
6852 for (size_t k = 1; k <= 5; k += 2) {
6853 SpMMMicrokernelTester()
6854 .mr(32)
6855 .nr(2)
6856 .m(32)
6857 .n(n)
6858 .k(k)
6859 .sparsity(0.0f)
6860 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6861 }
6862 }
6863 }
6864
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,n_div_2)6865 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, n_div_2) {
6866 TEST_REQUIRES_ARM_NEON_FMA;
6867 for (uint32_t n = 4; n <= 6; n += 2) {
6868 for (size_t k = 1; k <= 5; k += 2) {
6869 SpMMMicrokernelTester()
6870 .mr(32)
6871 .nr(2)
6872 .m(32)
6873 .n(n)
6874 .k(k)
6875 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6876 }
6877 }
6878 }
6879
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,m_lt_32)6880 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, m_lt_32) {
6881 TEST_REQUIRES_ARM_NEON_FMA;
6882 for (uint32_t m = 1; m < 32; m++) {
6883 for (uint32_t n = 1; n < 10; n += 3) {
6884 for (size_t k = 1; k <= 5; k += 2) {
6885 SpMMMicrokernelTester()
6886 .mr(32)
6887 .nr(2)
6888 .m(m)
6889 .n(n)
6890 .k(k)
6891 .sparsity(0.0f)
6892 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6893 }
6894 }
6895 }
6896 }
6897
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,m_div_32)6898 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, m_div_32) {
6899 TEST_REQUIRES_ARM_NEON_FMA;
6900 for (uint32_t m = 64; m <= 96; m += 32) {
6901 for (uint32_t n = 1; n < 10; n += 3) {
6902 for (size_t k = 1; k <= 5; k += 2) {
6903 SpMMMicrokernelTester()
6904 .mr(32)
6905 .nr(2)
6906 .m(m)
6907 .n(n)
6908 .k(k)
6909 .sparsity(0.0f)
6910 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6911 }
6912 }
6913 }
6914 }
6915
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,m_gt_32)6916 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, m_gt_32) {
6917 TEST_REQUIRES_ARM_NEON_FMA;
6918 for (uint32_t m = 33; m < 64; m++) {
6919 for (uint32_t n = 1; n < 10; n += 3) {
6920 for (size_t k = 1; k <= 5; k += 2) {
6921 SpMMMicrokernelTester()
6922 .mr(32)
6923 .nr(2)
6924 .m(m)
6925 .n(n)
6926 .k(k)
6927 .sparsity(0.0f)
6928 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6929 }
6930 }
6931 }
6932 }
6933
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,output_stride)6934 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, output_stride) {
6935 TEST_REQUIRES_ARM_NEON_FMA;
6936 for (uint32_t n = 1; n < 10; n += 3) {
6937 for (size_t k = 1; k <= 5; k += 2) {
6938 SpMMMicrokernelTester()
6939 .mr(32)
6940 .nr(2)
6941 .m(64)
6942 .n(n)
6943 .k(k)
6944 .output_stride(67)
6945 .sparsity(0.0f)
6946 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6947 }
6948 }
6949 }
6950
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,qmin)6951 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, qmin) {
6952 TEST_REQUIRES_ARM_NEON_FMA;
6953 for (uint32_t n = 1; n < 10; n += 3) {
6954 for (size_t k = 1; k <= 5; k += 2) {
6955 SpMMMicrokernelTester()
6956 .mr(32)
6957 .nr(2)
6958 .m(64)
6959 .n(n)
6960 .k(k)
6961 .sparsity(0.0f)
6962 .qmin(128)
6963 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6964 }
6965 }
6966 }
6967
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,qmax)6968 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, qmax) {
6969 TEST_REQUIRES_ARM_NEON_FMA;
6970 for (uint32_t n = 1; n < 10; n += 3) {
6971 for (size_t k = 1; k <= 5; k += 2) {
6972 SpMMMicrokernelTester()
6973 .mr(32)
6974 .nr(2)
6975 .m(64)
6976 .n(n)
6977 .k(k)
6978 .sparsity(0.0f)
6979 .qmax(128)
6980 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6981 }
6982 }
6983 }
6984
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,half_sparse)6985 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, half_sparse) {
6986 TEST_REQUIRES_ARM_NEON_FMA;
6987 for (uint32_t n = 1; n < 10; n += 3) {
6988 for (size_t k = 1; k <= 5; k += 2) {
6989 SpMMMicrokernelTester()
6990 .mr(32)
6991 .nr(2)
6992 .m(64)
6993 .n(n)
6994 .k(k)
6995 .sparsity(0.5f)
6996 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
6997 }
6998 }
6999 }
7000
TEST(F32_SPMM_MINMAX_32X2__NEONFMA,zero_weights)7001 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, zero_weights) {
7002 TEST_REQUIRES_ARM_NEON_FMA;
7003 for (uint32_t n = 1; n < 10; n += 3) {
7004 for (size_t k = 1; k <= 5; k += 2) {
7005 SpMMMicrokernelTester()
7006 .mr(32)
7007 .nr(2)
7008 .m(64)
7009 .n(n)
7010 .k(k)
7011 .sparsity(1.0f)
7012 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma, xnn_init_f32_minmax_scalar_params);
7013 }
7014 }
7015 }
7016 #endif // XNN_ARCH_ARM64
7017
7018
7019 #if XNN_ARCH_ARM64
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,k_eq_1)7020 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_eq_1) {
7021 TEST_REQUIRES_ARM_NEON_FMA;
7022 SpMMMicrokernelTester()
7023 .mr(32)
7024 .nr(4)
7025 .m(32)
7026 .n(4)
7027 .k(1)
7028 .sparsity(0.0f)
7029 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7030 }
7031
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,k_eq_1_subtile)7032 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_eq_1_subtile) {
7033 TEST_REQUIRES_ARM_NEON_FMA;
7034 for (uint32_t n = 1; n <= 4; n++) {
7035 SpMMMicrokernelTester()
7036 .mr(32)
7037 .nr(4)
7038 .m(32)
7039 .n(n)
7040 .k(1)
7041 .sparsity(0.0f)
7042 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7043 }
7044 }
7045
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,k_gt_1)7046 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_gt_1) {
7047 TEST_REQUIRES_ARM_NEON_FMA;
7048 for (size_t k = 2; k < 10; k++) {
7049 SpMMMicrokernelTester()
7050 .mr(32)
7051 .nr(4)
7052 .m(32)
7053 .n(4)
7054 .k(k)
7055 .sparsity(0.0f)
7056 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7057 }
7058 }
7059
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,k_gt_1_subtile)7060 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_gt_1_subtile) {
7061 TEST_REQUIRES_ARM_NEON_FMA;
7062 for (size_t k = 2; k < 10; k++) {
7063 for (uint32_t n = 1; n <= 4; n++) {
7064 SpMMMicrokernelTester()
7065 .mr(32)
7066 .nr(4)
7067 .m(32)
7068 .n(n)
7069 .k(k)
7070 .sparsity(0.0f)
7071 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7072 }
7073 }
7074 }
7075
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,n_gt_4)7076 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, n_gt_4) {
7077 TEST_REQUIRES_ARM_NEON_FMA;
7078 for (uint32_t n = 5; n < 10; n++) {
7079 for (size_t k = 1; k <= 5; k += 2) {
7080 SpMMMicrokernelTester()
7081 .mr(32)
7082 .nr(4)
7083 .m(32)
7084 .n(n)
7085 .k(k)
7086 .sparsity(0.0f)
7087 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7088 }
7089 }
7090 }
7091
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,n_div_4)7092 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, n_div_4) {
7093 TEST_REQUIRES_ARM_NEON_FMA;
7094 for (uint32_t n = 8; n <= 12; n += 4) {
7095 for (size_t k = 1; k <= 5; k += 2) {
7096 SpMMMicrokernelTester()
7097 .mr(32)
7098 .nr(4)
7099 .m(32)
7100 .n(n)
7101 .k(k)
7102 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7103 }
7104 }
7105 }
7106
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,m_lt_32)7107 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, m_lt_32) {
7108 TEST_REQUIRES_ARM_NEON_FMA;
7109 for (uint32_t m = 1; m < 32; m++) {
7110 for (uint32_t n = 1; n < 20; n += 5) {
7111 for (size_t k = 1; k <= 5; k += 2) {
7112 SpMMMicrokernelTester()
7113 .mr(32)
7114 .nr(4)
7115 .m(m)
7116 .n(n)
7117 .k(k)
7118 .sparsity(0.0f)
7119 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7120 }
7121 }
7122 }
7123 }
7124
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,m_div_32)7125 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, m_div_32) {
7126 TEST_REQUIRES_ARM_NEON_FMA;
7127 for (uint32_t m = 64; m <= 96; m += 32) {
7128 for (uint32_t n = 1; n < 20; n += 5) {
7129 for (size_t k = 1; k <= 5; k += 2) {
7130 SpMMMicrokernelTester()
7131 .mr(32)
7132 .nr(4)
7133 .m(m)
7134 .n(n)
7135 .k(k)
7136 .sparsity(0.0f)
7137 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7138 }
7139 }
7140 }
7141 }
7142
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,m_gt_32)7143 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, m_gt_32) {
7144 TEST_REQUIRES_ARM_NEON_FMA;
7145 for (uint32_t m = 33; m < 64; m++) {
7146 for (uint32_t n = 1; n < 20; n += 5) {
7147 for (size_t k = 1; k <= 5; k += 2) {
7148 SpMMMicrokernelTester()
7149 .mr(32)
7150 .nr(4)
7151 .m(m)
7152 .n(n)
7153 .k(k)
7154 .sparsity(0.0f)
7155 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7156 }
7157 }
7158 }
7159 }
7160
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,output_stride)7161 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, output_stride) {
7162 TEST_REQUIRES_ARM_NEON_FMA;
7163 for (uint32_t n = 1; n < 20; n += 5) {
7164 for (size_t k = 1; k <= 5; k += 2) {
7165 SpMMMicrokernelTester()
7166 .mr(32)
7167 .nr(4)
7168 .m(64)
7169 .n(n)
7170 .k(k)
7171 .output_stride(67)
7172 .sparsity(0.0f)
7173 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7174 }
7175 }
7176 }
7177
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,qmin)7178 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, qmin) {
7179 TEST_REQUIRES_ARM_NEON_FMA;
7180 for (uint32_t n = 1; n < 20; n += 5) {
7181 for (size_t k = 1; k <= 5; k += 2) {
7182 SpMMMicrokernelTester()
7183 .mr(32)
7184 .nr(4)
7185 .m(64)
7186 .n(n)
7187 .k(k)
7188 .sparsity(0.0f)
7189 .qmin(128)
7190 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7191 }
7192 }
7193 }
7194
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,qmax)7195 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, qmax) {
7196 TEST_REQUIRES_ARM_NEON_FMA;
7197 for (uint32_t n = 1; n < 20; n += 5) {
7198 for (size_t k = 1; k <= 5; k += 2) {
7199 SpMMMicrokernelTester()
7200 .mr(32)
7201 .nr(4)
7202 .m(64)
7203 .n(n)
7204 .k(k)
7205 .sparsity(0.0f)
7206 .qmax(128)
7207 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7208 }
7209 }
7210 }
7211
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,half_sparse)7212 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, half_sparse) {
7213 TEST_REQUIRES_ARM_NEON_FMA;
7214 for (uint32_t n = 1; n < 20; n += 5) {
7215 for (size_t k = 1; k <= 5; k += 2) {
7216 SpMMMicrokernelTester()
7217 .mr(32)
7218 .nr(4)
7219 .m(64)
7220 .n(n)
7221 .k(k)
7222 .sparsity(0.5f)
7223 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7224 }
7225 }
7226 }
7227
TEST(F32_SPMM_MINMAX_32X4__NEONFMA,zero_weights)7228 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, zero_weights) {
7229 TEST_REQUIRES_ARM_NEON_FMA;
7230 for (uint32_t n = 1; n < 20; n += 5) {
7231 for (size_t k = 1; k <= 5; k += 2) {
7232 SpMMMicrokernelTester()
7233 .mr(32)
7234 .nr(4)
7235 .m(64)
7236 .n(n)
7237 .k(k)
7238 .sparsity(1.0f)
7239 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma, xnn_init_f32_minmax_scalar_params);
7240 }
7241 }
7242 }
7243 #endif // XNN_ARCH_ARM64
7244
7245
7246 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_SPMM_MINMAX_4X1__SSE,k_eq_1)7247 TEST(F32_SPMM_MINMAX_4X1__SSE, k_eq_1) {
7248 TEST_REQUIRES_X86_SSE;
7249 SpMMMicrokernelTester()
7250 .mr(4)
7251 .nr(1)
7252 .m(4)
7253 .n(1)
7254 .k(1)
7255 .sparsity(0.0f)
7256 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7257 }
7258
TEST(F32_SPMM_MINMAX_4X1__SSE,k_gt_1)7259 TEST(F32_SPMM_MINMAX_4X1__SSE, k_gt_1) {
7260 TEST_REQUIRES_X86_SSE;
7261 for (size_t k = 2; k < 10; k++) {
7262 SpMMMicrokernelTester()
7263 .mr(4)
7264 .nr(1)
7265 .m(4)
7266 .n(1)
7267 .k(k)
7268 .sparsity(0.0f)
7269 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7270 }
7271 }
7272
TEST(F32_SPMM_MINMAX_4X1__SSE,n_gt_1)7273 TEST(F32_SPMM_MINMAX_4X1__SSE, n_gt_1) {
7274 TEST_REQUIRES_X86_SSE;
7275 for (uint32_t n = 2; n < 10; n++) {
7276 for (size_t k = 1; k <= 5; k += 2) {
7277 SpMMMicrokernelTester()
7278 .mr(4)
7279 .nr(1)
7280 .m(4)
7281 .n(n)
7282 .k(k)
7283 .sparsity(0.0f)
7284 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7285 }
7286 }
7287 }
7288
TEST(F32_SPMM_MINMAX_4X1__SSE,m_lt_4)7289 TEST(F32_SPMM_MINMAX_4X1__SSE, m_lt_4) {
7290 TEST_REQUIRES_X86_SSE;
7291 for (uint32_t m = 1; m < 4; m++) {
7292 for (uint32_t n = 1; n < 10; n += 2) {
7293 for (size_t k = 1; k <= 5; k += 2) {
7294 SpMMMicrokernelTester()
7295 .mr(4)
7296 .nr(1)
7297 .m(m)
7298 .n(n)
7299 .k(k)
7300 .sparsity(0.0f)
7301 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7302 }
7303 }
7304 }
7305 }
7306
TEST(F32_SPMM_MINMAX_4X1__SSE,m_div_4)7307 TEST(F32_SPMM_MINMAX_4X1__SSE, m_div_4) {
7308 TEST_REQUIRES_X86_SSE;
7309 for (uint32_t m = 8; m <= 12; m += 4) {
7310 for (uint32_t n = 1; n < 10; n += 2) {
7311 for (size_t k = 1; k <= 5; k += 2) {
7312 SpMMMicrokernelTester()
7313 .mr(4)
7314 .nr(1)
7315 .m(m)
7316 .n(n)
7317 .k(k)
7318 .sparsity(0.0f)
7319 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7320 }
7321 }
7322 }
7323 }
7324
TEST(F32_SPMM_MINMAX_4X1__SSE,m_gt_4)7325 TEST(F32_SPMM_MINMAX_4X1__SSE, m_gt_4) {
7326 TEST_REQUIRES_X86_SSE;
7327 for (uint32_t m = 5; m < 8; m++) {
7328 for (uint32_t n = 1; n < 10; n += 2) {
7329 for (size_t k = 1; k <= 5; k += 2) {
7330 SpMMMicrokernelTester()
7331 .mr(4)
7332 .nr(1)
7333 .m(m)
7334 .n(n)
7335 .k(k)
7336 .sparsity(0.0f)
7337 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7338 }
7339 }
7340 }
7341 }
7342
TEST(F32_SPMM_MINMAX_4X1__SSE,output_stride)7343 TEST(F32_SPMM_MINMAX_4X1__SSE, output_stride) {
7344 TEST_REQUIRES_X86_SSE;
7345 for (uint32_t n = 1; n < 10; n += 2) {
7346 for (size_t k = 1; k <= 5; k += 2) {
7347 SpMMMicrokernelTester()
7348 .mr(4)
7349 .nr(1)
7350 .m(8)
7351 .n(n)
7352 .k(k)
7353 .output_stride(11)
7354 .sparsity(0.0f)
7355 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7356 }
7357 }
7358 }
7359
TEST(F32_SPMM_MINMAX_4X1__SSE,qmin)7360 TEST(F32_SPMM_MINMAX_4X1__SSE, qmin) {
7361 TEST_REQUIRES_X86_SSE;
7362 for (uint32_t n = 1; n < 10; n += 2) {
7363 for (size_t k = 1; k <= 5; k += 2) {
7364 SpMMMicrokernelTester()
7365 .mr(4)
7366 .nr(1)
7367 .m(8)
7368 .n(n)
7369 .k(k)
7370 .sparsity(0.0f)
7371 .qmin(128)
7372 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7373 }
7374 }
7375 }
7376
TEST(F32_SPMM_MINMAX_4X1__SSE,qmax)7377 TEST(F32_SPMM_MINMAX_4X1__SSE, qmax) {
7378 TEST_REQUIRES_X86_SSE;
7379 for (uint32_t n = 1; n < 10; n += 2) {
7380 for (size_t k = 1; k <= 5; k += 2) {
7381 SpMMMicrokernelTester()
7382 .mr(4)
7383 .nr(1)
7384 .m(8)
7385 .n(n)
7386 .k(k)
7387 .sparsity(0.0f)
7388 .qmax(128)
7389 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7390 }
7391 }
7392 }
7393
TEST(F32_SPMM_MINMAX_4X1__SSE,half_sparse)7394 TEST(F32_SPMM_MINMAX_4X1__SSE, half_sparse) {
7395 TEST_REQUIRES_X86_SSE;
7396 for (uint32_t n = 1; n < 10; n += 2) {
7397 for (size_t k = 1; k <= 5; k += 2) {
7398 SpMMMicrokernelTester()
7399 .mr(4)
7400 .nr(1)
7401 .m(8)
7402 .n(n)
7403 .k(k)
7404 .sparsity(0.5f)
7405 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7406 }
7407 }
7408 }
7409
TEST(F32_SPMM_MINMAX_4X1__SSE,zero_weights)7410 TEST(F32_SPMM_MINMAX_4X1__SSE, zero_weights) {
7411 TEST_REQUIRES_X86_SSE;
7412 for (uint32_t n = 1; n < 10; n += 2) {
7413 for (size_t k = 1; k <= 5; k += 2) {
7414 SpMMMicrokernelTester()
7415 .mr(4)
7416 .nr(1)
7417 .m(8)
7418 .n(n)
7419 .k(k)
7420 .sparsity(1.0f)
7421 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse, xnn_init_f32_minmax_sse_params);
7422 }
7423 }
7424 }
7425 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7426
7427
7428 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_SPMM_MINMAX_8X1__SSE,k_eq_1)7429 TEST(F32_SPMM_MINMAX_8X1__SSE, k_eq_1) {
7430 TEST_REQUIRES_X86_SSE;
7431 SpMMMicrokernelTester()
7432 .mr(8)
7433 .nr(1)
7434 .m(8)
7435 .n(1)
7436 .k(1)
7437 .sparsity(0.0f)
7438 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7439 }
7440
TEST(F32_SPMM_MINMAX_8X1__SSE,k_gt_1)7441 TEST(F32_SPMM_MINMAX_8X1__SSE, k_gt_1) {
7442 TEST_REQUIRES_X86_SSE;
7443 for (size_t k = 2; k < 10; k++) {
7444 SpMMMicrokernelTester()
7445 .mr(8)
7446 .nr(1)
7447 .m(8)
7448 .n(1)
7449 .k(k)
7450 .sparsity(0.0f)
7451 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7452 }
7453 }
7454
TEST(F32_SPMM_MINMAX_8X1__SSE,n_gt_1)7455 TEST(F32_SPMM_MINMAX_8X1__SSE, n_gt_1) {
7456 TEST_REQUIRES_X86_SSE;
7457 for (uint32_t n = 2; n < 10; n++) {
7458 for (size_t k = 1; k <= 5; k += 2) {
7459 SpMMMicrokernelTester()
7460 .mr(8)
7461 .nr(1)
7462 .m(8)
7463 .n(n)
7464 .k(k)
7465 .sparsity(0.0f)
7466 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7467 }
7468 }
7469 }
7470
TEST(F32_SPMM_MINMAX_8X1__SSE,m_lt_8)7471 TEST(F32_SPMM_MINMAX_8X1__SSE, m_lt_8) {
7472 TEST_REQUIRES_X86_SSE;
7473 for (uint32_t m = 1; m < 8; m++) {
7474 for (uint32_t n = 1; n < 10; n += 2) {
7475 for (size_t k = 1; k <= 5; k += 2) {
7476 SpMMMicrokernelTester()
7477 .mr(8)
7478 .nr(1)
7479 .m(m)
7480 .n(n)
7481 .k(k)
7482 .sparsity(0.0f)
7483 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7484 }
7485 }
7486 }
7487 }
7488
TEST(F32_SPMM_MINMAX_8X1__SSE,m_div_8)7489 TEST(F32_SPMM_MINMAX_8X1__SSE, m_div_8) {
7490 TEST_REQUIRES_X86_SSE;
7491 for (uint32_t m = 16; m <= 24; m += 8) {
7492 for (uint32_t n = 1; n < 10; n += 2) {
7493 for (size_t k = 1; k <= 5; k += 2) {
7494 SpMMMicrokernelTester()
7495 .mr(8)
7496 .nr(1)
7497 .m(m)
7498 .n(n)
7499 .k(k)
7500 .sparsity(0.0f)
7501 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7502 }
7503 }
7504 }
7505 }
7506
TEST(F32_SPMM_MINMAX_8X1__SSE,m_gt_8)7507 TEST(F32_SPMM_MINMAX_8X1__SSE, m_gt_8) {
7508 TEST_REQUIRES_X86_SSE;
7509 for (uint32_t m = 9; m < 16; m++) {
7510 for (uint32_t n = 1; n < 10; n += 2) {
7511 for (size_t k = 1; k <= 5; k += 2) {
7512 SpMMMicrokernelTester()
7513 .mr(8)
7514 .nr(1)
7515 .m(m)
7516 .n(n)
7517 .k(k)
7518 .sparsity(0.0f)
7519 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7520 }
7521 }
7522 }
7523 }
7524
TEST(F32_SPMM_MINMAX_8X1__SSE,output_stride)7525 TEST(F32_SPMM_MINMAX_8X1__SSE, output_stride) {
7526 TEST_REQUIRES_X86_SSE;
7527 for (uint32_t n = 1; n < 10; n += 2) {
7528 for (size_t k = 1; k <= 5; k += 2) {
7529 SpMMMicrokernelTester()
7530 .mr(8)
7531 .nr(1)
7532 .m(16)
7533 .n(n)
7534 .k(k)
7535 .output_stride(19)
7536 .sparsity(0.0f)
7537 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7538 }
7539 }
7540 }
7541
TEST(F32_SPMM_MINMAX_8X1__SSE,qmin)7542 TEST(F32_SPMM_MINMAX_8X1__SSE, qmin) {
7543 TEST_REQUIRES_X86_SSE;
7544 for (uint32_t n = 1; n < 10; n += 2) {
7545 for (size_t k = 1; k <= 5; k += 2) {
7546 SpMMMicrokernelTester()
7547 .mr(8)
7548 .nr(1)
7549 .m(16)
7550 .n(n)
7551 .k(k)
7552 .sparsity(0.0f)
7553 .qmin(128)
7554 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7555 }
7556 }
7557 }
7558
TEST(F32_SPMM_MINMAX_8X1__SSE,qmax)7559 TEST(F32_SPMM_MINMAX_8X1__SSE, qmax) {
7560 TEST_REQUIRES_X86_SSE;
7561 for (uint32_t n = 1; n < 10; n += 2) {
7562 for (size_t k = 1; k <= 5; k += 2) {
7563 SpMMMicrokernelTester()
7564 .mr(8)
7565 .nr(1)
7566 .m(16)
7567 .n(n)
7568 .k(k)
7569 .sparsity(0.0f)
7570 .qmax(128)
7571 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7572 }
7573 }
7574 }
7575
TEST(F32_SPMM_MINMAX_8X1__SSE,half_sparse)7576 TEST(F32_SPMM_MINMAX_8X1__SSE, half_sparse) {
7577 TEST_REQUIRES_X86_SSE;
7578 for (uint32_t n = 1; n < 10; n += 2) {
7579 for (size_t k = 1; k <= 5; k += 2) {
7580 SpMMMicrokernelTester()
7581 .mr(8)
7582 .nr(1)
7583 .m(16)
7584 .n(n)
7585 .k(k)
7586 .sparsity(0.5f)
7587 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7588 }
7589 }
7590 }
7591
TEST(F32_SPMM_MINMAX_8X1__SSE,zero_weights)7592 TEST(F32_SPMM_MINMAX_8X1__SSE, zero_weights) {
7593 TEST_REQUIRES_X86_SSE;
7594 for (uint32_t n = 1; n < 10; n += 2) {
7595 for (size_t k = 1; k <= 5; k += 2) {
7596 SpMMMicrokernelTester()
7597 .mr(8)
7598 .nr(1)
7599 .m(16)
7600 .n(n)
7601 .k(k)
7602 .sparsity(1.0f)
7603 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse, xnn_init_f32_minmax_sse_params);
7604 }
7605 }
7606 }
7607 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7608
7609
7610 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_SPMM_MINMAX_16X1__SSE,k_eq_1)7611 TEST(F32_SPMM_MINMAX_16X1__SSE, k_eq_1) {
7612 TEST_REQUIRES_X86_SSE;
7613 SpMMMicrokernelTester()
7614 .mr(16)
7615 .nr(1)
7616 .m(16)
7617 .n(1)
7618 .k(1)
7619 .sparsity(0.0f)
7620 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7621 }
7622
TEST(F32_SPMM_MINMAX_16X1__SSE,k_gt_1)7623 TEST(F32_SPMM_MINMAX_16X1__SSE, k_gt_1) {
7624 TEST_REQUIRES_X86_SSE;
7625 for (size_t k = 2; k < 10; k++) {
7626 SpMMMicrokernelTester()
7627 .mr(16)
7628 .nr(1)
7629 .m(16)
7630 .n(1)
7631 .k(k)
7632 .sparsity(0.0f)
7633 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7634 }
7635 }
7636
TEST(F32_SPMM_MINMAX_16X1__SSE,n_gt_1)7637 TEST(F32_SPMM_MINMAX_16X1__SSE, n_gt_1) {
7638 TEST_REQUIRES_X86_SSE;
7639 for (uint32_t n = 2; n < 10; n++) {
7640 for (size_t k = 1; k <= 5; k += 2) {
7641 SpMMMicrokernelTester()
7642 .mr(16)
7643 .nr(1)
7644 .m(16)
7645 .n(n)
7646 .k(k)
7647 .sparsity(0.0f)
7648 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7649 }
7650 }
7651 }
7652
TEST(F32_SPMM_MINMAX_16X1__SSE,m_lt_16)7653 TEST(F32_SPMM_MINMAX_16X1__SSE, m_lt_16) {
7654 TEST_REQUIRES_X86_SSE;
7655 for (uint32_t m = 1; m < 16; m++) {
7656 for (uint32_t n = 1; n < 10; n += 2) {
7657 for (size_t k = 1; k <= 5; k += 2) {
7658 SpMMMicrokernelTester()
7659 .mr(16)
7660 .nr(1)
7661 .m(m)
7662 .n(n)
7663 .k(k)
7664 .sparsity(0.0f)
7665 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7666 }
7667 }
7668 }
7669 }
7670
TEST(F32_SPMM_MINMAX_16X1__SSE,m_div_16)7671 TEST(F32_SPMM_MINMAX_16X1__SSE, m_div_16) {
7672 TEST_REQUIRES_X86_SSE;
7673 for (uint32_t m = 32; m <= 48; m += 16) {
7674 for (uint32_t n = 1; n < 10; n += 2) {
7675 for (size_t k = 1; k <= 5; k += 2) {
7676 SpMMMicrokernelTester()
7677 .mr(16)
7678 .nr(1)
7679 .m(m)
7680 .n(n)
7681 .k(k)
7682 .sparsity(0.0f)
7683 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7684 }
7685 }
7686 }
7687 }
7688
TEST(F32_SPMM_MINMAX_16X1__SSE,m_gt_16)7689 TEST(F32_SPMM_MINMAX_16X1__SSE, m_gt_16) {
7690 TEST_REQUIRES_X86_SSE;
7691 for (uint32_t m = 17; m < 32; m++) {
7692 for (uint32_t n = 1; n < 10; n += 2) {
7693 for (size_t k = 1; k <= 5; k += 2) {
7694 SpMMMicrokernelTester()
7695 .mr(16)
7696 .nr(1)
7697 .m(m)
7698 .n(n)
7699 .k(k)
7700 .sparsity(0.0f)
7701 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7702 }
7703 }
7704 }
7705 }
7706
TEST(F32_SPMM_MINMAX_16X1__SSE,output_stride)7707 TEST(F32_SPMM_MINMAX_16X1__SSE, output_stride) {
7708 TEST_REQUIRES_X86_SSE;
7709 for (uint32_t n = 1; n < 10; n += 2) {
7710 for (size_t k = 1; k <= 5; k += 2) {
7711 SpMMMicrokernelTester()
7712 .mr(16)
7713 .nr(1)
7714 .m(32)
7715 .n(n)
7716 .k(k)
7717 .output_stride(37)
7718 .sparsity(0.0f)
7719 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7720 }
7721 }
7722 }
7723
TEST(F32_SPMM_MINMAX_16X1__SSE,qmin)7724 TEST(F32_SPMM_MINMAX_16X1__SSE, qmin) {
7725 TEST_REQUIRES_X86_SSE;
7726 for (uint32_t n = 1; n < 10; n += 2) {
7727 for (size_t k = 1; k <= 5; k += 2) {
7728 SpMMMicrokernelTester()
7729 .mr(16)
7730 .nr(1)
7731 .m(32)
7732 .n(n)
7733 .k(k)
7734 .sparsity(0.0f)
7735 .qmin(128)
7736 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7737 }
7738 }
7739 }
7740
TEST(F32_SPMM_MINMAX_16X1__SSE,qmax)7741 TEST(F32_SPMM_MINMAX_16X1__SSE, qmax) {
7742 TEST_REQUIRES_X86_SSE;
7743 for (uint32_t n = 1; n < 10; n += 2) {
7744 for (size_t k = 1; k <= 5; k += 2) {
7745 SpMMMicrokernelTester()
7746 .mr(16)
7747 .nr(1)
7748 .m(32)
7749 .n(n)
7750 .k(k)
7751 .sparsity(0.0f)
7752 .qmax(128)
7753 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7754 }
7755 }
7756 }
7757
TEST(F32_SPMM_MINMAX_16X1__SSE,half_sparse)7758 TEST(F32_SPMM_MINMAX_16X1__SSE, half_sparse) {
7759 TEST_REQUIRES_X86_SSE;
7760 for (uint32_t n = 1; n < 10; n += 2) {
7761 for (size_t k = 1; k <= 5; k += 2) {
7762 SpMMMicrokernelTester()
7763 .mr(16)
7764 .nr(1)
7765 .m(32)
7766 .n(n)
7767 .k(k)
7768 .sparsity(0.5f)
7769 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7770 }
7771 }
7772 }
7773
TEST(F32_SPMM_MINMAX_16X1__SSE,zero_weights)7774 TEST(F32_SPMM_MINMAX_16X1__SSE, zero_weights) {
7775 TEST_REQUIRES_X86_SSE;
7776 for (uint32_t n = 1; n < 10; n += 2) {
7777 for (size_t k = 1; k <= 5; k += 2) {
7778 SpMMMicrokernelTester()
7779 .mr(16)
7780 .nr(1)
7781 .m(32)
7782 .n(n)
7783 .k(k)
7784 .sparsity(1.0f)
7785 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse, xnn_init_f32_minmax_sse_params);
7786 }
7787 }
7788 }
7789 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7790
7791
7792 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_SPMM_MINMAX_32X1__SSE,k_eq_1)7793 TEST(F32_SPMM_MINMAX_32X1__SSE, k_eq_1) {
7794 TEST_REQUIRES_X86_SSE;
7795 SpMMMicrokernelTester()
7796 .mr(32)
7797 .nr(1)
7798 .m(32)
7799 .n(1)
7800 .k(1)
7801 .sparsity(0.0f)
7802 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7803 }
7804
TEST(F32_SPMM_MINMAX_32X1__SSE,k_gt_1)7805 TEST(F32_SPMM_MINMAX_32X1__SSE, k_gt_1) {
7806 TEST_REQUIRES_X86_SSE;
7807 for (size_t k = 2; k < 10; k++) {
7808 SpMMMicrokernelTester()
7809 .mr(32)
7810 .nr(1)
7811 .m(32)
7812 .n(1)
7813 .k(k)
7814 .sparsity(0.0f)
7815 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7816 }
7817 }
7818
TEST(F32_SPMM_MINMAX_32X1__SSE,n_gt_1)7819 TEST(F32_SPMM_MINMAX_32X1__SSE, n_gt_1) {
7820 TEST_REQUIRES_X86_SSE;
7821 for (uint32_t n = 2; n < 10; n++) {
7822 for (size_t k = 1; k <= 5; k += 2) {
7823 SpMMMicrokernelTester()
7824 .mr(32)
7825 .nr(1)
7826 .m(32)
7827 .n(n)
7828 .k(k)
7829 .sparsity(0.0f)
7830 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7831 }
7832 }
7833 }
7834
TEST(F32_SPMM_MINMAX_32X1__SSE,m_lt_32)7835 TEST(F32_SPMM_MINMAX_32X1__SSE, m_lt_32) {
7836 TEST_REQUIRES_X86_SSE;
7837 for (uint32_t m = 1; m < 32; m++) {
7838 for (uint32_t n = 1; n < 10; n += 2) {
7839 for (size_t k = 1; k <= 5; k += 2) {
7840 SpMMMicrokernelTester()
7841 .mr(32)
7842 .nr(1)
7843 .m(m)
7844 .n(n)
7845 .k(k)
7846 .sparsity(0.0f)
7847 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7848 }
7849 }
7850 }
7851 }
7852
TEST(F32_SPMM_MINMAX_32X1__SSE,m_div_32)7853 TEST(F32_SPMM_MINMAX_32X1__SSE, m_div_32) {
7854 TEST_REQUIRES_X86_SSE;
7855 for (uint32_t m = 64; m <= 96; m += 32) {
7856 for (uint32_t n = 1; n < 10; n += 2) {
7857 for (size_t k = 1; k <= 5; k += 2) {
7858 SpMMMicrokernelTester()
7859 .mr(32)
7860 .nr(1)
7861 .m(m)
7862 .n(n)
7863 .k(k)
7864 .sparsity(0.0f)
7865 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7866 }
7867 }
7868 }
7869 }
7870
TEST(F32_SPMM_MINMAX_32X1__SSE,m_gt_32)7871 TEST(F32_SPMM_MINMAX_32X1__SSE, m_gt_32) {
7872 TEST_REQUIRES_X86_SSE;
7873 for (uint32_t m = 33; m < 64; m++) {
7874 for (uint32_t n = 1; n < 10; n += 2) {
7875 for (size_t k = 1; k <= 5; k += 2) {
7876 SpMMMicrokernelTester()
7877 .mr(32)
7878 .nr(1)
7879 .m(m)
7880 .n(n)
7881 .k(k)
7882 .sparsity(0.0f)
7883 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7884 }
7885 }
7886 }
7887 }
7888
TEST(F32_SPMM_MINMAX_32X1__SSE,output_stride)7889 TEST(F32_SPMM_MINMAX_32X1__SSE, output_stride) {
7890 TEST_REQUIRES_X86_SSE;
7891 for (uint32_t n = 1; n < 10; n += 2) {
7892 for (size_t k = 1; k <= 5; k += 2) {
7893 SpMMMicrokernelTester()
7894 .mr(32)
7895 .nr(1)
7896 .m(64)
7897 .n(n)
7898 .k(k)
7899 .output_stride(67)
7900 .sparsity(0.0f)
7901 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7902 }
7903 }
7904 }
7905
TEST(F32_SPMM_MINMAX_32X1__SSE,qmin)7906 TEST(F32_SPMM_MINMAX_32X1__SSE, qmin) {
7907 TEST_REQUIRES_X86_SSE;
7908 for (uint32_t n = 1; n < 10; n += 2) {
7909 for (size_t k = 1; k <= 5; k += 2) {
7910 SpMMMicrokernelTester()
7911 .mr(32)
7912 .nr(1)
7913 .m(64)
7914 .n(n)
7915 .k(k)
7916 .sparsity(0.0f)
7917 .qmin(128)
7918 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7919 }
7920 }
7921 }
7922
TEST(F32_SPMM_MINMAX_32X1__SSE,qmax)7923 TEST(F32_SPMM_MINMAX_32X1__SSE, qmax) {
7924 TEST_REQUIRES_X86_SSE;
7925 for (uint32_t n = 1; n < 10; n += 2) {
7926 for (size_t k = 1; k <= 5; k += 2) {
7927 SpMMMicrokernelTester()
7928 .mr(32)
7929 .nr(1)
7930 .m(64)
7931 .n(n)
7932 .k(k)
7933 .sparsity(0.0f)
7934 .qmax(128)
7935 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7936 }
7937 }
7938 }
7939
TEST(F32_SPMM_MINMAX_32X1__SSE,half_sparse)7940 TEST(F32_SPMM_MINMAX_32X1__SSE, half_sparse) {
7941 TEST_REQUIRES_X86_SSE;
7942 for (uint32_t n = 1; n < 10; n += 2) {
7943 for (size_t k = 1; k <= 5; k += 2) {
7944 SpMMMicrokernelTester()
7945 .mr(32)
7946 .nr(1)
7947 .m(64)
7948 .n(n)
7949 .k(k)
7950 .sparsity(0.5f)
7951 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7952 }
7953 }
7954 }
7955
TEST(F32_SPMM_MINMAX_32X1__SSE,zero_weights)7956 TEST(F32_SPMM_MINMAX_32X1__SSE, zero_weights) {
7957 TEST_REQUIRES_X86_SSE;
7958 for (uint32_t n = 1; n < 10; n += 2) {
7959 for (size_t k = 1; k <= 5; k += 2) {
7960 SpMMMicrokernelTester()
7961 .mr(32)
7962 .nr(1)
7963 .m(64)
7964 .n(n)
7965 .k(k)
7966 .sparsity(1.0f)
7967 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse, xnn_init_f32_minmax_sse_params);
7968 }
7969 }
7970 }
7971 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7972
7973
7974 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,k_eq_1)7975 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, k_eq_1) {
7976 SpMMMicrokernelTester()
7977 .mr(4)
7978 .nr(1)
7979 .m(4)
7980 .n(1)
7981 .k(1)
7982 .sparsity(0.0f)
7983 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
7984 }
7985
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,k_gt_1)7986 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, k_gt_1) {
7987 for (size_t k = 2; k < 10; k++) {
7988 SpMMMicrokernelTester()
7989 .mr(4)
7990 .nr(1)
7991 .m(4)
7992 .n(1)
7993 .k(k)
7994 .sparsity(0.0f)
7995 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
7996 }
7997 }
7998
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,n_gt_1)7999 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, n_gt_1) {
8000 for (uint32_t n = 2; n < 10; n++) {
8001 for (size_t k = 1; k <= 5; k += 2) {
8002 SpMMMicrokernelTester()
8003 .mr(4)
8004 .nr(1)
8005 .m(4)
8006 .n(n)
8007 .k(k)
8008 .sparsity(0.0f)
8009 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8010 }
8011 }
8012 }
8013
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,m_lt_4)8014 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, m_lt_4) {
8015 for (uint32_t m = 1; m < 4; m++) {
8016 for (uint32_t n = 1; n < 10; n += 2) {
8017 for (size_t k = 1; k <= 5; k += 2) {
8018 SpMMMicrokernelTester()
8019 .mr(4)
8020 .nr(1)
8021 .m(m)
8022 .n(n)
8023 .k(k)
8024 .sparsity(0.0f)
8025 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8026 }
8027 }
8028 }
8029 }
8030
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,m_div_4)8031 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, m_div_4) {
8032 for (uint32_t m = 8; m <= 12; m += 4) {
8033 for (uint32_t n = 1; n < 10; n += 2) {
8034 for (size_t k = 1; k <= 5; k += 2) {
8035 SpMMMicrokernelTester()
8036 .mr(4)
8037 .nr(1)
8038 .m(m)
8039 .n(n)
8040 .k(k)
8041 .sparsity(0.0f)
8042 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8043 }
8044 }
8045 }
8046 }
8047
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,m_gt_4)8048 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, m_gt_4) {
8049 for (uint32_t m = 5; m < 8; m++) {
8050 for (uint32_t n = 1; n < 10; n += 2) {
8051 for (size_t k = 1; k <= 5; k += 2) {
8052 SpMMMicrokernelTester()
8053 .mr(4)
8054 .nr(1)
8055 .m(m)
8056 .n(n)
8057 .k(k)
8058 .sparsity(0.0f)
8059 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8060 }
8061 }
8062 }
8063 }
8064
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,output_stride)8065 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, output_stride) {
8066 for (uint32_t n = 1; n < 10; n += 2) {
8067 for (size_t k = 1; k <= 5; k += 2) {
8068 SpMMMicrokernelTester()
8069 .mr(4)
8070 .nr(1)
8071 .m(8)
8072 .n(n)
8073 .k(k)
8074 .output_stride(11)
8075 .sparsity(0.0f)
8076 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8077 }
8078 }
8079 }
8080
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,qmin)8081 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, qmin) {
8082 for (uint32_t n = 1; n < 10; n += 2) {
8083 for (size_t k = 1; k <= 5; k += 2) {
8084 SpMMMicrokernelTester()
8085 .mr(4)
8086 .nr(1)
8087 .m(8)
8088 .n(n)
8089 .k(k)
8090 .sparsity(0.0f)
8091 .qmin(128)
8092 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8093 }
8094 }
8095 }
8096
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,qmax)8097 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, qmax) {
8098 for (uint32_t n = 1; n < 10; n += 2) {
8099 for (size_t k = 1; k <= 5; k += 2) {
8100 SpMMMicrokernelTester()
8101 .mr(4)
8102 .nr(1)
8103 .m(8)
8104 .n(n)
8105 .k(k)
8106 .sparsity(0.0f)
8107 .qmax(128)
8108 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8109 }
8110 }
8111 }
8112
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,half_sparse)8113 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, half_sparse) {
8114 for (uint32_t n = 1; n < 10; n += 2) {
8115 for (size_t k = 1; k <= 5; k += 2) {
8116 SpMMMicrokernelTester()
8117 .mr(4)
8118 .nr(1)
8119 .m(8)
8120 .n(n)
8121 .k(k)
8122 .sparsity(0.5f)
8123 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8124 }
8125 }
8126 }
8127
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM,zero_weights)8128 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, zero_weights) {
8129 for (uint32_t n = 1; n < 10; n += 2) {
8130 for (size_t k = 1; k <= 5; k += 2) {
8131 SpMMMicrokernelTester()
8132 .mr(4)
8133 .nr(1)
8134 .m(8)
8135 .n(n)
8136 .k(k)
8137 .sparsity(1.0f)
8138 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
8139 }
8140 }
8141 }
8142 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
8143
8144
8145 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,k_eq_1)8146 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
8147 SpMMMicrokernelTester()
8148 .mr(4)
8149 .nr(1)
8150 .m(4)
8151 .n(1)
8152 .k(1)
8153 .sparsity(0.0f)
8154 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8155 }
8156
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,k_gt_1)8157 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
8158 for (size_t k = 2; k < 10; k++) {
8159 SpMMMicrokernelTester()
8160 .mr(4)
8161 .nr(1)
8162 .m(4)
8163 .n(1)
8164 .k(k)
8165 .sparsity(0.0f)
8166 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8167 }
8168 }
8169
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,n_gt_1)8170 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
8171 for (uint32_t n = 2; n < 10; n++) {
8172 for (size_t k = 1; k <= 5; k += 2) {
8173 SpMMMicrokernelTester()
8174 .mr(4)
8175 .nr(1)
8176 .m(4)
8177 .n(n)
8178 .k(k)
8179 .sparsity(0.0f)
8180 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8181 }
8182 }
8183 }
8184
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,m_lt_4)8185 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, m_lt_4) {
8186 for (uint32_t m = 1; m < 4; m++) {
8187 for (uint32_t n = 1; n < 10; n += 2) {
8188 for (size_t k = 1; k <= 5; k += 2) {
8189 SpMMMicrokernelTester()
8190 .mr(4)
8191 .nr(1)
8192 .m(m)
8193 .n(n)
8194 .k(k)
8195 .sparsity(0.0f)
8196 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8197 }
8198 }
8199 }
8200 }
8201
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,m_div_4)8202 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, m_div_4) {
8203 for (uint32_t m = 8; m <= 12; m += 4) {
8204 for (uint32_t n = 1; n < 10; n += 2) {
8205 for (size_t k = 1; k <= 5; k += 2) {
8206 SpMMMicrokernelTester()
8207 .mr(4)
8208 .nr(1)
8209 .m(m)
8210 .n(n)
8211 .k(k)
8212 .sparsity(0.0f)
8213 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8214 }
8215 }
8216 }
8217 }
8218
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,m_gt_4)8219 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, m_gt_4) {
8220 for (uint32_t m = 5; m < 8; m++) {
8221 for (uint32_t n = 1; n < 10; n += 2) {
8222 for (size_t k = 1; k <= 5; k += 2) {
8223 SpMMMicrokernelTester()
8224 .mr(4)
8225 .nr(1)
8226 .m(m)
8227 .n(n)
8228 .k(k)
8229 .sparsity(0.0f)
8230 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8231 }
8232 }
8233 }
8234 }
8235
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,output_stride)8236 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, output_stride) {
8237 for (uint32_t n = 1; n < 10; n += 2) {
8238 for (size_t k = 1; k <= 5; k += 2) {
8239 SpMMMicrokernelTester()
8240 .mr(4)
8241 .nr(1)
8242 .m(8)
8243 .n(n)
8244 .k(k)
8245 .output_stride(11)
8246 .sparsity(0.0f)
8247 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8248 }
8249 }
8250 }
8251
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,qmin)8252 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, qmin) {
8253 for (uint32_t n = 1; n < 10; n += 2) {
8254 for (size_t k = 1; k <= 5; k += 2) {
8255 SpMMMicrokernelTester()
8256 .mr(4)
8257 .nr(1)
8258 .m(8)
8259 .n(n)
8260 .k(k)
8261 .sparsity(0.0f)
8262 .qmin(128)
8263 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8264 }
8265 }
8266 }
8267
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,qmax)8268 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, qmax) {
8269 for (uint32_t n = 1; n < 10; n += 2) {
8270 for (size_t k = 1; k <= 5; k += 2) {
8271 SpMMMicrokernelTester()
8272 .mr(4)
8273 .nr(1)
8274 .m(8)
8275 .n(n)
8276 .k(k)
8277 .sparsity(0.0f)
8278 .qmax(128)
8279 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8280 }
8281 }
8282 }
8283
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,half_sparse)8284 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
8285 for (uint32_t n = 1; n < 10; n += 2) {
8286 for (size_t k = 1; k <= 5; k += 2) {
8287 SpMMMicrokernelTester()
8288 .mr(4)
8289 .nr(1)
8290 .m(8)
8291 .n(n)
8292 .k(k)
8293 .sparsity(0.5f)
8294 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8295 }
8296 }
8297 }
8298
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED,zero_weights)8299 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
8300 for (uint32_t n = 1; n < 10; n += 2) {
8301 for (size_t k = 1; k <= 5; k += 2) {
8302 SpMMMicrokernelTester()
8303 .mr(4)
8304 .nr(1)
8305 .m(8)
8306 .n(n)
8307 .k(k)
8308 .sparsity(1.0f)
8309 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
8310 }
8311 }
8312 }
8313 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
8314
8315
8316 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,k_eq_2)8317 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
8318 SpMMMicrokernelTester()
8319 .mr(4)
8320 .nr(1)
8321 .m(4)
8322 .n(1)
8323 .k(2)
8324 .sparsity(0.0f)
8325 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8326 }
8327
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,k_lt_2)8328 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
8329 for (size_t k = 1; k < 2; k++) {
8330 SpMMMicrokernelTester()
8331 .mr(4)
8332 .nr(1)
8333 .m(4)
8334 .n(1)
8335 .k(k)
8336 .sparsity(0.0f)
8337 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8338 }
8339 }
8340
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,k_gt_2)8341 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
8342 for (size_t k = 3; k < 4; k++) {
8343 SpMMMicrokernelTester()
8344 .mr(4)
8345 .nr(1)
8346 .m(4)
8347 .n(1)
8348 .k(k)
8349 .sparsity(0.0f)
8350 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8351 }
8352 }
8353
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,k_div_2)8354 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
8355 for (size_t k = 4; k <= 20; k += 2) {
8356 SpMMMicrokernelTester()
8357 .mr(4)
8358 .nr(1)
8359 .m(4)
8360 .n(1)
8361 .k(k)
8362 .sparsity(0.0f)
8363 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8364 }
8365 }
8366
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,n_gt_1)8367 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
8368 for (uint32_t n = 2; n < 10; n++) {
8369 for (size_t k = 1; k <= 10; k += 3) {
8370 SpMMMicrokernelTester()
8371 .mr(4)
8372 .nr(1)
8373 .m(4)
8374 .n(n)
8375 .k(k)
8376 .sparsity(0.0f)
8377 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8378 }
8379 }
8380 }
8381
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,m_lt_4)8382 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_4) {
8383 for (uint32_t m = 1; m < 4; m++) {
8384 for (uint32_t n = 1; n < 10; n += 2) {
8385 for (size_t k = 1; k <= 10; k += 3) {
8386 SpMMMicrokernelTester()
8387 .mr(4)
8388 .nr(1)
8389 .m(m)
8390 .n(n)
8391 .k(k)
8392 .sparsity(0.0f)
8393 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8394 }
8395 }
8396 }
8397 }
8398
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,m_div_4)8399 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, m_div_4) {
8400 for (uint32_t m = 8; m <= 12; m += 4) {
8401 for (uint32_t n = 1; n < 10; n += 2) {
8402 for (size_t k = 1; k <= 10; k += 3) {
8403 SpMMMicrokernelTester()
8404 .mr(4)
8405 .nr(1)
8406 .m(m)
8407 .n(n)
8408 .k(k)
8409 .sparsity(0.0f)
8410 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8411 }
8412 }
8413 }
8414 }
8415
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,m_gt_4)8416 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_4) {
8417 for (uint32_t m = 5; m < 8; m++) {
8418 for (uint32_t n = 1; n < 10; n += 2) {
8419 for (size_t k = 1; k <= 10; k += 3) {
8420 SpMMMicrokernelTester()
8421 .mr(4)
8422 .nr(1)
8423 .m(m)
8424 .n(n)
8425 .k(k)
8426 .sparsity(0.0f)
8427 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8428 }
8429 }
8430 }
8431 }
8432
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,output_stride)8433 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
8434 for (uint32_t n = 1; n < 10; n += 2) {
8435 for (size_t k = 1; k <= 10; k += 3) {
8436 SpMMMicrokernelTester()
8437 .mr(4)
8438 .nr(1)
8439 .m(8)
8440 .n(n)
8441 .k(k)
8442 .output_stride(11)
8443 .sparsity(0.0f)
8444 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8445 }
8446 }
8447 }
8448
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,qmin)8449 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
8450 for (uint32_t n = 1; n < 10; n += 2) {
8451 for (size_t k = 1; k <= 10; k += 3) {
8452 SpMMMicrokernelTester()
8453 .mr(4)
8454 .nr(1)
8455 .m(8)
8456 .n(n)
8457 .k(k)
8458 .sparsity(0.0f)
8459 .qmin(128)
8460 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8461 }
8462 }
8463 }
8464
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,qmax)8465 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
8466 for (uint32_t n = 1; n < 10; n += 2) {
8467 for (size_t k = 1; k <= 10; k += 3) {
8468 SpMMMicrokernelTester()
8469 .mr(4)
8470 .nr(1)
8471 .m(8)
8472 .n(n)
8473 .k(k)
8474 .sparsity(0.0f)
8475 .qmax(128)
8476 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8477 }
8478 }
8479 }
8480
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,half_sparse)8481 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
8482 for (uint32_t n = 1; n < 10; n += 2) {
8483 for (size_t k = 1; k <= 10; k += 3) {
8484 SpMMMicrokernelTester()
8485 .mr(4)
8486 .nr(1)
8487 .m(8)
8488 .n(n)
8489 .k(k)
8490 .sparsity(0.5f)
8491 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8492 }
8493 }
8494 }
8495
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2,zero_weights)8496 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
8497 for (uint32_t n = 1; n < 10; n += 2) {
8498 for (size_t k = 1; k <= 10; k += 3) {
8499 SpMMMicrokernelTester()
8500 .mr(4)
8501 .nr(1)
8502 .m(8)
8503 .n(n)
8504 .k(k)
8505 .sparsity(1.0f)
8506 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
8507 }
8508 }
8509 }
8510 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
8511
8512
8513 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,k_eq_2)8514 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_eq_2) {
8515 SpMMMicrokernelTester()
8516 .mr(4)
8517 .nr(1)
8518 .m(4)
8519 .n(1)
8520 .k(2)
8521 .sparsity(0.0f)
8522 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8523 }
8524
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,k_lt_2)8525 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_lt_2) {
8526 for (size_t k = 1; k < 2; k++) {
8527 SpMMMicrokernelTester()
8528 .mr(4)
8529 .nr(1)
8530 .m(4)
8531 .n(1)
8532 .k(k)
8533 .sparsity(0.0f)
8534 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8535 }
8536 }
8537
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,k_gt_2)8538 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_gt_2) {
8539 for (size_t k = 3; k < 4; k++) {
8540 SpMMMicrokernelTester()
8541 .mr(4)
8542 .nr(1)
8543 .m(4)
8544 .n(1)
8545 .k(k)
8546 .sparsity(0.0f)
8547 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8548 }
8549 }
8550
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,k_div_2)8551 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_div_2) {
8552 for (size_t k = 4; k <= 20; k += 2) {
8553 SpMMMicrokernelTester()
8554 .mr(4)
8555 .nr(1)
8556 .m(4)
8557 .n(1)
8558 .k(k)
8559 .sparsity(0.0f)
8560 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8561 }
8562 }
8563
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,n_gt_1)8564 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, n_gt_1) {
8565 for (uint32_t n = 2; n < 10; n++) {
8566 for (size_t k = 1; k <= 10; k += 3) {
8567 SpMMMicrokernelTester()
8568 .mr(4)
8569 .nr(1)
8570 .m(4)
8571 .n(n)
8572 .k(k)
8573 .sparsity(0.0f)
8574 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8575 }
8576 }
8577 }
8578
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,m_lt_4)8579 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, m_lt_4) {
8580 for (uint32_t m = 1; m < 4; m++) {
8581 for (uint32_t n = 1; n < 10; n += 2) {
8582 for (size_t k = 1; k <= 10; k += 3) {
8583 SpMMMicrokernelTester()
8584 .mr(4)
8585 .nr(1)
8586 .m(m)
8587 .n(n)
8588 .k(k)
8589 .sparsity(0.0f)
8590 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8591 }
8592 }
8593 }
8594 }
8595
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,m_div_4)8596 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, m_div_4) {
8597 for (uint32_t m = 8; m <= 12; m += 4) {
8598 for (uint32_t n = 1; n < 10; n += 2) {
8599 for (size_t k = 1; k <= 10; k += 3) {
8600 SpMMMicrokernelTester()
8601 .mr(4)
8602 .nr(1)
8603 .m(m)
8604 .n(n)
8605 .k(k)
8606 .sparsity(0.0f)
8607 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8608 }
8609 }
8610 }
8611 }
8612
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,m_gt_4)8613 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, m_gt_4) {
8614 for (uint32_t m = 5; m < 8; m++) {
8615 for (uint32_t n = 1; n < 10; n += 2) {
8616 for (size_t k = 1; k <= 10; k += 3) {
8617 SpMMMicrokernelTester()
8618 .mr(4)
8619 .nr(1)
8620 .m(m)
8621 .n(n)
8622 .k(k)
8623 .sparsity(0.0f)
8624 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8625 }
8626 }
8627 }
8628 }
8629
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,output_stride)8630 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, output_stride) {
8631 for (uint32_t n = 1; n < 10; n += 2) {
8632 for (size_t k = 1; k <= 10; k += 3) {
8633 SpMMMicrokernelTester()
8634 .mr(4)
8635 .nr(1)
8636 .m(8)
8637 .n(n)
8638 .k(k)
8639 .output_stride(11)
8640 .sparsity(0.0f)
8641 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8642 }
8643 }
8644 }
8645
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,qmin)8646 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, qmin) {
8647 for (uint32_t n = 1; n < 10; n += 2) {
8648 for (size_t k = 1; k <= 10; k += 3) {
8649 SpMMMicrokernelTester()
8650 .mr(4)
8651 .nr(1)
8652 .m(8)
8653 .n(n)
8654 .k(k)
8655 .sparsity(0.0f)
8656 .qmin(128)
8657 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8658 }
8659 }
8660 }
8661
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,qmax)8662 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, qmax) {
8663 for (uint32_t n = 1; n < 10; n += 2) {
8664 for (size_t k = 1; k <= 10; k += 3) {
8665 SpMMMicrokernelTester()
8666 .mr(4)
8667 .nr(1)
8668 .m(8)
8669 .n(n)
8670 .k(k)
8671 .sparsity(0.0f)
8672 .qmax(128)
8673 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8674 }
8675 }
8676 }
8677
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,half_sparse)8678 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, half_sparse) {
8679 for (uint32_t n = 1; n < 10; n += 2) {
8680 for (size_t k = 1; k <= 10; k += 3) {
8681 SpMMMicrokernelTester()
8682 .mr(4)
8683 .nr(1)
8684 .m(8)
8685 .n(n)
8686 .k(k)
8687 .sparsity(0.5f)
8688 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8689 }
8690 }
8691 }
8692
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2,zero_weights)8693 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, zero_weights) {
8694 for (uint32_t n = 1; n < 10; n += 2) {
8695 for (size_t k = 1; k <= 10; k += 3) {
8696 SpMMMicrokernelTester()
8697 .mr(4)
8698 .nr(1)
8699 .m(8)
8700 .n(n)
8701 .k(k)
8702 .sparsity(1.0f)
8703 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
8704 }
8705 }
8706 }
8707 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
8708
8709
8710 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,k_eq_4)8711 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_eq_4) {
8712 SpMMMicrokernelTester()
8713 .mr(4)
8714 .nr(1)
8715 .m(4)
8716 .n(1)
8717 .k(4)
8718 .sparsity(0.0f)
8719 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8720 }
8721
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,k_lt_4)8722 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_lt_4) {
8723 for (size_t k = 1; k < 4; k++) {
8724 SpMMMicrokernelTester()
8725 .mr(4)
8726 .nr(1)
8727 .m(4)
8728 .n(1)
8729 .k(k)
8730 .sparsity(0.0f)
8731 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8732 }
8733 }
8734
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,k_gt_4)8735 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_gt_4) {
8736 for (size_t k = 5; k < 8; k++) {
8737 SpMMMicrokernelTester()
8738 .mr(4)
8739 .nr(1)
8740 .m(4)
8741 .n(1)
8742 .k(k)
8743 .sparsity(0.0f)
8744 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8745 }
8746 }
8747
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,k_div_4)8748 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_div_4) {
8749 for (size_t k = 8; k <= 40; k += 4) {
8750 SpMMMicrokernelTester()
8751 .mr(4)
8752 .nr(1)
8753 .m(4)
8754 .n(1)
8755 .k(k)
8756 .sparsity(0.0f)
8757 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8758 }
8759 }
8760
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,n_gt_1)8761 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, n_gt_1) {
8762 for (uint32_t n = 2; n < 10; n++) {
8763 for (size_t k = 1; k <= 20; k += 5) {
8764 SpMMMicrokernelTester()
8765 .mr(4)
8766 .nr(1)
8767 .m(4)
8768 .n(n)
8769 .k(k)
8770 .sparsity(0.0f)
8771 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8772 }
8773 }
8774 }
8775
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,m_lt_4)8776 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, m_lt_4) {
8777 for (uint32_t m = 1; m < 4; m++) {
8778 for (uint32_t n = 1; n < 10; n += 2) {
8779 for (size_t k = 1; k <= 20; k += 5) {
8780 SpMMMicrokernelTester()
8781 .mr(4)
8782 .nr(1)
8783 .m(m)
8784 .n(n)
8785 .k(k)
8786 .sparsity(0.0f)
8787 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8788 }
8789 }
8790 }
8791 }
8792
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,m_div_4)8793 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, m_div_4) {
8794 for (uint32_t m = 8; m <= 12; m += 4) {
8795 for (uint32_t n = 1; n < 10; n += 2) {
8796 for (size_t k = 1; k <= 20; k += 5) {
8797 SpMMMicrokernelTester()
8798 .mr(4)
8799 .nr(1)
8800 .m(m)
8801 .n(n)
8802 .k(k)
8803 .sparsity(0.0f)
8804 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8805 }
8806 }
8807 }
8808 }
8809
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,m_gt_4)8810 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, m_gt_4) {
8811 for (uint32_t m = 5; m < 8; m++) {
8812 for (uint32_t n = 1; n < 10; n += 2) {
8813 for (size_t k = 1; k <= 20; k += 5) {
8814 SpMMMicrokernelTester()
8815 .mr(4)
8816 .nr(1)
8817 .m(m)
8818 .n(n)
8819 .k(k)
8820 .sparsity(0.0f)
8821 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8822 }
8823 }
8824 }
8825 }
8826
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,output_stride)8827 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, output_stride) {
8828 for (uint32_t n = 1; n < 10; n += 2) {
8829 for (size_t k = 1; k <= 20; k += 5) {
8830 SpMMMicrokernelTester()
8831 .mr(4)
8832 .nr(1)
8833 .m(8)
8834 .n(n)
8835 .k(k)
8836 .output_stride(11)
8837 .sparsity(0.0f)
8838 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8839 }
8840 }
8841 }
8842
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,qmin)8843 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, qmin) {
8844 for (uint32_t n = 1; n < 10; n += 2) {
8845 for (size_t k = 1; k <= 20; k += 5) {
8846 SpMMMicrokernelTester()
8847 .mr(4)
8848 .nr(1)
8849 .m(8)
8850 .n(n)
8851 .k(k)
8852 .sparsity(0.0f)
8853 .qmin(128)
8854 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8855 }
8856 }
8857 }
8858
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,qmax)8859 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, qmax) {
8860 for (uint32_t n = 1; n < 10; n += 2) {
8861 for (size_t k = 1; k <= 20; k += 5) {
8862 SpMMMicrokernelTester()
8863 .mr(4)
8864 .nr(1)
8865 .m(8)
8866 .n(n)
8867 .k(k)
8868 .sparsity(0.0f)
8869 .qmax(128)
8870 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8871 }
8872 }
8873 }
8874
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,half_sparse)8875 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, half_sparse) {
8876 for (uint32_t n = 1; n < 10; n += 2) {
8877 for (size_t k = 1; k <= 20; k += 5) {
8878 SpMMMicrokernelTester()
8879 .mr(4)
8880 .nr(1)
8881 .m(8)
8882 .n(n)
8883 .k(k)
8884 .sparsity(0.5f)
8885 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8886 }
8887 }
8888 }
8889
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4,zero_weights)8890 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, zero_weights) {
8891 for (uint32_t n = 1; n < 10; n += 2) {
8892 for (size_t k = 1; k <= 20; k += 5) {
8893 SpMMMicrokernelTester()
8894 .mr(4)
8895 .nr(1)
8896 .m(8)
8897 .n(n)
8898 .k(k)
8899 .sparsity(1.0f)
8900 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
8901 }
8902 }
8903 }
8904 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
8905
8906
8907 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,k_eq_1)8908 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, k_eq_1) {
8909 SpMMMicrokernelTester()
8910 .mr(4)
8911 .nr(1)
8912 .m(4)
8913 .n(1)
8914 .k(1)
8915 .sparsity(0.0f)
8916 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
8917 }
8918
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,k_gt_1)8919 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, k_gt_1) {
8920 for (size_t k = 2; k < 10; k++) {
8921 SpMMMicrokernelTester()
8922 .mr(4)
8923 .nr(1)
8924 .m(4)
8925 .n(1)
8926 .k(k)
8927 .sparsity(0.0f)
8928 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
8929 }
8930 }
8931
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,n_gt_1)8932 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, n_gt_1) {
8933 for (uint32_t n = 2; n < 10; n++) {
8934 for (size_t k = 1; k <= 5; k += 2) {
8935 SpMMMicrokernelTester()
8936 .mr(4)
8937 .nr(1)
8938 .m(4)
8939 .n(n)
8940 .k(k)
8941 .sparsity(0.0f)
8942 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
8943 }
8944 }
8945 }
8946
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,m_lt_4)8947 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, m_lt_4) {
8948 for (uint32_t m = 1; m < 4; m++) {
8949 for (uint32_t n = 1; n < 10; n += 2) {
8950 for (size_t k = 1; k <= 5; k += 2) {
8951 SpMMMicrokernelTester()
8952 .mr(4)
8953 .nr(1)
8954 .m(m)
8955 .n(n)
8956 .k(k)
8957 .sparsity(0.0f)
8958 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
8959 }
8960 }
8961 }
8962 }
8963
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,m_div_4)8964 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, m_div_4) {
8965 for (uint32_t m = 8; m <= 12; m += 4) {
8966 for (uint32_t n = 1; n < 10; n += 2) {
8967 for (size_t k = 1; k <= 5; k += 2) {
8968 SpMMMicrokernelTester()
8969 .mr(4)
8970 .nr(1)
8971 .m(m)
8972 .n(n)
8973 .k(k)
8974 .sparsity(0.0f)
8975 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
8976 }
8977 }
8978 }
8979 }
8980
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,m_gt_4)8981 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, m_gt_4) {
8982 for (uint32_t m = 5; m < 8; m++) {
8983 for (uint32_t n = 1; n < 10; n += 2) {
8984 for (size_t k = 1; k <= 5; k += 2) {
8985 SpMMMicrokernelTester()
8986 .mr(4)
8987 .nr(1)
8988 .m(m)
8989 .n(n)
8990 .k(k)
8991 .sparsity(0.0f)
8992 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
8993 }
8994 }
8995 }
8996 }
8997
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,output_stride)8998 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, output_stride) {
8999 for (uint32_t n = 1; n < 10; n += 2) {
9000 for (size_t k = 1; k <= 5; k += 2) {
9001 SpMMMicrokernelTester()
9002 .mr(4)
9003 .nr(1)
9004 .m(8)
9005 .n(n)
9006 .k(k)
9007 .output_stride(11)
9008 .sparsity(0.0f)
9009 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
9010 }
9011 }
9012 }
9013
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,qmin)9014 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, qmin) {
9015 for (uint32_t n = 1; n < 10; n += 2) {
9016 for (size_t k = 1; k <= 5; k += 2) {
9017 SpMMMicrokernelTester()
9018 .mr(4)
9019 .nr(1)
9020 .m(8)
9021 .n(n)
9022 .k(k)
9023 .sparsity(0.0f)
9024 .qmin(128)
9025 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
9026 }
9027 }
9028 }
9029
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,qmax)9030 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, qmax) {
9031 for (uint32_t n = 1; n < 10; n += 2) {
9032 for (size_t k = 1; k <= 5; k += 2) {
9033 SpMMMicrokernelTester()
9034 .mr(4)
9035 .nr(1)
9036 .m(8)
9037 .n(n)
9038 .k(k)
9039 .sparsity(0.0f)
9040 .qmax(128)
9041 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
9042 }
9043 }
9044 }
9045
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,half_sparse)9046 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, half_sparse) {
9047 for (uint32_t n = 1; n < 10; n += 2) {
9048 for (size_t k = 1; k <= 5; k += 2) {
9049 SpMMMicrokernelTester()
9050 .mr(4)
9051 .nr(1)
9052 .m(8)
9053 .n(n)
9054 .k(k)
9055 .sparsity(0.5f)
9056 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
9057 }
9058 }
9059 }
9060
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86,zero_weights)9061 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, zero_weights) {
9062 for (uint32_t n = 1; n < 10; n += 2) {
9063 for (size_t k = 1; k <= 5; k += 2) {
9064 SpMMMicrokernelTester()
9065 .mr(4)
9066 .nr(1)
9067 .m(8)
9068 .n(n)
9069 .k(k)
9070 .sparsity(1.0f)
9071 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
9072 }
9073 }
9074 }
9075 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9076
9077
9078 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,k_eq_1)9079 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
9080 SpMMMicrokernelTester()
9081 .mr(4)
9082 .nr(1)
9083 .m(4)
9084 .n(1)
9085 .k(1)
9086 .sparsity(0.0f)
9087 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9088 }
9089
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,k_gt_1)9090 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
9091 for (size_t k = 2; k < 10; k++) {
9092 SpMMMicrokernelTester()
9093 .mr(4)
9094 .nr(1)
9095 .m(4)
9096 .n(1)
9097 .k(k)
9098 .sparsity(0.0f)
9099 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9100 }
9101 }
9102
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,n_gt_1)9103 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
9104 for (uint32_t n = 2; n < 10; n++) {
9105 for (size_t k = 1; k <= 5; k += 2) {
9106 SpMMMicrokernelTester()
9107 .mr(4)
9108 .nr(1)
9109 .m(4)
9110 .n(n)
9111 .k(k)
9112 .sparsity(0.0f)
9113 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9114 }
9115 }
9116 }
9117
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,m_lt_4)9118 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, m_lt_4) {
9119 for (uint32_t m = 1; m < 4; m++) {
9120 for (uint32_t n = 1; n < 10; n += 2) {
9121 for (size_t k = 1; k <= 5; k += 2) {
9122 SpMMMicrokernelTester()
9123 .mr(4)
9124 .nr(1)
9125 .m(m)
9126 .n(n)
9127 .k(k)
9128 .sparsity(0.0f)
9129 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9130 }
9131 }
9132 }
9133 }
9134
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,m_div_4)9135 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, m_div_4) {
9136 for (uint32_t m = 8; m <= 12; m += 4) {
9137 for (uint32_t n = 1; n < 10; n += 2) {
9138 for (size_t k = 1; k <= 5; k += 2) {
9139 SpMMMicrokernelTester()
9140 .mr(4)
9141 .nr(1)
9142 .m(m)
9143 .n(n)
9144 .k(k)
9145 .sparsity(0.0f)
9146 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9147 }
9148 }
9149 }
9150 }
9151
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,m_gt_4)9152 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, m_gt_4) {
9153 for (uint32_t m = 5; m < 8; m++) {
9154 for (uint32_t n = 1; n < 10; n += 2) {
9155 for (size_t k = 1; k <= 5; k += 2) {
9156 SpMMMicrokernelTester()
9157 .mr(4)
9158 .nr(1)
9159 .m(m)
9160 .n(n)
9161 .k(k)
9162 .sparsity(0.0f)
9163 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9164 }
9165 }
9166 }
9167 }
9168
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,output_stride)9169 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, output_stride) {
9170 for (uint32_t n = 1; n < 10; n += 2) {
9171 for (size_t k = 1; k <= 5; k += 2) {
9172 SpMMMicrokernelTester()
9173 .mr(4)
9174 .nr(1)
9175 .m(8)
9176 .n(n)
9177 .k(k)
9178 .output_stride(11)
9179 .sparsity(0.0f)
9180 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9181 }
9182 }
9183 }
9184
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,qmin)9185 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, qmin) {
9186 for (uint32_t n = 1; n < 10; n += 2) {
9187 for (size_t k = 1; k <= 5; k += 2) {
9188 SpMMMicrokernelTester()
9189 .mr(4)
9190 .nr(1)
9191 .m(8)
9192 .n(n)
9193 .k(k)
9194 .sparsity(0.0f)
9195 .qmin(128)
9196 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9197 }
9198 }
9199 }
9200
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,qmax)9201 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, qmax) {
9202 for (uint32_t n = 1; n < 10; n += 2) {
9203 for (size_t k = 1; k <= 5; k += 2) {
9204 SpMMMicrokernelTester()
9205 .mr(4)
9206 .nr(1)
9207 .m(8)
9208 .n(n)
9209 .k(k)
9210 .sparsity(0.0f)
9211 .qmax(128)
9212 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9213 }
9214 }
9215 }
9216
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,half_sparse)9217 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, half_sparse) {
9218 for (uint32_t n = 1; n < 10; n += 2) {
9219 for (size_t k = 1; k <= 5; k += 2) {
9220 SpMMMicrokernelTester()
9221 .mr(4)
9222 .nr(1)
9223 .m(8)
9224 .n(n)
9225 .k(k)
9226 .sparsity(0.5f)
9227 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9228 }
9229 }
9230 }
9231
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED,zero_weights)9232 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, zero_weights) {
9233 for (uint32_t n = 1; n < 10; n += 2) {
9234 for (size_t k = 1; k <= 5; k += 2) {
9235 SpMMMicrokernelTester()
9236 .mr(4)
9237 .nr(1)
9238 .m(8)
9239 .n(n)
9240 .k(k)
9241 .sparsity(1.0f)
9242 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
9243 }
9244 }
9245 }
9246 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9247
9248
9249 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,k_eq_2)9250 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
9251 SpMMMicrokernelTester()
9252 .mr(4)
9253 .nr(1)
9254 .m(4)
9255 .n(1)
9256 .k(2)
9257 .sparsity(0.0f)
9258 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9259 }
9260
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,k_lt_2)9261 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
9262 for (size_t k = 1; k < 2; k++) {
9263 SpMMMicrokernelTester()
9264 .mr(4)
9265 .nr(1)
9266 .m(4)
9267 .n(1)
9268 .k(k)
9269 .sparsity(0.0f)
9270 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9271 }
9272 }
9273
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,k_gt_2)9274 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
9275 for (size_t k = 3; k < 4; k++) {
9276 SpMMMicrokernelTester()
9277 .mr(4)
9278 .nr(1)
9279 .m(4)
9280 .n(1)
9281 .k(k)
9282 .sparsity(0.0f)
9283 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9284 }
9285 }
9286
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,k_div_2)9287 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
9288 for (size_t k = 4; k <= 20; k += 2) {
9289 SpMMMicrokernelTester()
9290 .mr(4)
9291 .nr(1)
9292 .m(4)
9293 .n(1)
9294 .k(k)
9295 .sparsity(0.0f)
9296 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9297 }
9298 }
9299
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,n_gt_1)9300 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
9301 for (uint32_t n = 2; n < 10; n++) {
9302 for (size_t k = 1; k <= 10; k += 3) {
9303 SpMMMicrokernelTester()
9304 .mr(4)
9305 .nr(1)
9306 .m(4)
9307 .n(n)
9308 .k(k)
9309 .sparsity(0.0f)
9310 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9311 }
9312 }
9313 }
9314
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,m_lt_4)9315 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, m_lt_4) {
9316 for (uint32_t m = 1; m < 4; m++) {
9317 for (uint32_t n = 1; n < 10; n += 2) {
9318 for (size_t k = 1; k <= 10; k += 3) {
9319 SpMMMicrokernelTester()
9320 .mr(4)
9321 .nr(1)
9322 .m(m)
9323 .n(n)
9324 .k(k)
9325 .sparsity(0.0f)
9326 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9327 }
9328 }
9329 }
9330 }
9331
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,m_div_4)9332 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, m_div_4) {
9333 for (uint32_t m = 8; m <= 12; m += 4) {
9334 for (uint32_t n = 1; n < 10; n += 2) {
9335 for (size_t k = 1; k <= 10; k += 3) {
9336 SpMMMicrokernelTester()
9337 .mr(4)
9338 .nr(1)
9339 .m(m)
9340 .n(n)
9341 .k(k)
9342 .sparsity(0.0f)
9343 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9344 }
9345 }
9346 }
9347 }
9348
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,m_gt_4)9349 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, m_gt_4) {
9350 for (uint32_t m = 5; m < 8; m++) {
9351 for (uint32_t n = 1; n < 10; n += 2) {
9352 for (size_t k = 1; k <= 10; k += 3) {
9353 SpMMMicrokernelTester()
9354 .mr(4)
9355 .nr(1)
9356 .m(m)
9357 .n(n)
9358 .k(k)
9359 .sparsity(0.0f)
9360 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9361 }
9362 }
9363 }
9364 }
9365
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,output_stride)9366 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
9367 for (uint32_t n = 1; n < 10; n += 2) {
9368 for (size_t k = 1; k <= 10; k += 3) {
9369 SpMMMicrokernelTester()
9370 .mr(4)
9371 .nr(1)
9372 .m(8)
9373 .n(n)
9374 .k(k)
9375 .output_stride(11)
9376 .sparsity(0.0f)
9377 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9378 }
9379 }
9380 }
9381
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,qmin)9382 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
9383 for (uint32_t n = 1; n < 10; n += 2) {
9384 for (size_t k = 1; k <= 10; k += 3) {
9385 SpMMMicrokernelTester()
9386 .mr(4)
9387 .nr(1)
9388 .m(8)
9389 .n(n)
9390 .k(k)
9391 .sparsity(0.0f)
9392 .qmin(128)
9393 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9394 }
9395 }
9396 }
9397
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,qmax)9398 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
9399 for (uint32_t n = 1; n < 10; n += 2) {
9400 for (size_t k = 1; k <= 10; k += 3) {
9401 SpMMMicrokernelTester()
9402 .mr(4)
9403 .nr(1)
9404 .m(8)
9405 .n(n)
9406 .k(k)
9407 .sparsity(0.0f)
9408 .qmax(128)
9409 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9410 }
9411 }
9412 }
9413
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,half_sparse)9414 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
9415 for (uint32_t n = 1; n < 10; n += 2) {
9416 for (size_t k = 1; k <= 10; k += 3) {
9417 SpMMMicrokernelTester()
9418 .mr(4)
9419 .nr(1)
9420 .m(8)
9421 .n(n)
9422 .k(k)
9423 .sparsity(0.5f)
9424 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9425 }
9426 }
9427 }
9428
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2,zero_weights)9429 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
9430 for (uint32_t n = 1; n < 10; n += 2) {
9431 for (size_t k = 1; k <= 10; k += 3) {
9432 SpMMMicrokernelTester()
9433 .mr(4)
9434 .nr(1)
9435 .m(8)
9436 .n(n)
9437 .k(k)
9438 .sparsity(1.0f)
9439 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
9440 }
9441 }
9442 }
9443 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9444
9445
9446 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,k_eq_2)9447 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_eq_2) {
9448 SpMMMicrokernelTester()
9449 .mr(4)
9450 .nr(1)
9451 .m(4)
9452 .n(1)
9453 .k(2)
9454 .sparsity(0.0f)
9455 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9456 }
9457
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,k_lt_2)9458 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_lt_2) {
9459 for (size_t k = 1; k < 2; k++) {
9460 SpMMMicrokernelTester()
9461 .mr(4)
9462 .nr(1)
9463 .m(4)
9464 .n(1)
9465 .k(k)
9466 .sparsity(0.0f)
9467 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9468 }
9469 }
9470
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,k_gt_2)9471 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_gt_2) {
9472 for (size_t k = 3; k < 4; k++) {
9473 SpMMMicrokernelTester()
9474 .mr(4)
9475 .nr(1)
9476 .m(4)
9477 .n(1)
9478 .k(k)
9479 .sparsity(0.0f)
9480 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9481 }
9482 }
9483
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,k_div_2)9484 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_div_2) {
9485 for (size_t k = 4; k <= 20; k += 2) {
9486 SpMMMicrokernelTester()
9487 .mr(4)
9488 .nr(1)
9489 .m(4)
9490 .n(1)
9491 .k(k)
9492 .sparsity(0.0f)
9493 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9494 }
9495 }
9496
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,n_gt_1)9497 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, n_gt_1) {
9498 for (uint32_t n = 2; n < 10; n++) {
9499 for (size_t k = 1; k <= 10; k += 3) {
9500 SpMMMicrokernelTester()
9501 .mr(4)
9502 .nr(1)
9503 .m(4)
9504 .n(n)
9505 .k(k)
9506 .sparsity(0.0f)
9507 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9508 }
9509 }
9510 }
9511
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,m_lt_4)9512 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, m_lt_4) {
9513 for (uint32_t m = 1; m < 4; m++) {
9514 for (uint32_t n = 1; n < 10; n += 2) {
9515 for (size_t k = 1; k <= 10; k += 3) {
9516 SpMMMicrokernelTester()
9517 .mr(4)
9518 .nr(1)
9519 .m(m)
9520 .n(n)
9521 .k(k)
9522 .sparsity(0.0f)
9523 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9524 }
9525 }
9526 }
9527 }
9528
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,m_div_4)9529 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, m_div_4) {
9530 for (uint32_t m = 8; m <= 12; m += 4) {
9531 for (uint32_t n = 1; n < 10; n += 2) {
9532 for (size_t k = 1; k <= 10; k += 3) {
9533 SpMMMicrokernelTester()
9534 .mr(4)
9535 .nr(1)
9536 .m(m)
9537 .n(n)
9538 .k(k)
9539 .sparsity(0.0f)
9540 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9541 }
9542 }
9543 }
9544 }
9545
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,m_gt_4)9546 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, m_gt_4) {
9547 for (uint32_t m = 5; m < 8; m++) {
9548 for (uint32_t n = 1; n < 10; n += 2) {
9549 for (size_t k = 1; k <= 10; k += 3) {
9550 SpMMMicrokernelTester()
9551 .mr(4)
9552 .nr(1)
9553 .m(m)
9554 .n(n)
9555 .k(k)
9556 .sparsity(0.0f)
9557 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9558 }
9559 }
9560 }
9561 }
9562
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,output_stride)9563 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, output_stride) {
9564 for (uint32_t n = 1; n < 10; n += 2) {
9565 for (size_t k = 1; k <= 10; k += 3) {
9566 SpMMMicrokernelTester()
9567 .mr(4)
9568 .nr(1)
9569 .m(8)
9570 .n(n)
9571 .k(k)
9572 .output_stride(11)
9573 .sparsity(0.0f)
9574 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9575 }
9576 }
9577 }
9578
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,qmin)9579 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, qmin) {
9580 for (uint32_t n = 1; n < 10; n += 2) {
9581 for (size_t k = 1; k <= 10; k += 3) {
9582 SpMMMicrokernelTester()
9583 .mr(4)
9584 .nr(1)
9585 .m(8)
9586 .n(n)
9587 .k(k)
9588 .sparsity(0.0f)
9589 .qmin(128)
9590 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9591 }
9592 }
9593 }
9594
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,qmax)9595 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, qmax) {
9596 for (uint32_t n = 1; n < 10; n += 2) {
9597 for (size_t k = 1; k <= 10; k += 3) {
9598 SpMMMicrokernelTester()
9599 .mr(4)
9600 .nr(1)
9601 .m(8)
9602 .n(n)
9603 .k(k)
9604 .sparsity(0.0f)
9605 .qmax(128)
9606 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9607 }
9608 }
9609 }
9610
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,half_sparse)9611 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, half_sparse) {
9612 for (uint32_t n = 1; n < 10; n += 2) {
9613 for (size_t k = 1; k <= 10; k += 3) {
9614 SpMMMicrokernelTester()
9615 .mr(4)
9616 .nr(1)
9617 .m(8)
9618 .n(n)
9619 .k(k)
9620 .sparsity(0.5f)
9621 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9622 }
9623 }
9624 }
9625
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2,zero_weights)9626 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, zero_weights) {
9627 for (uint32_t n = 1; n < 10; n += 2) {
9628 for (size_t k = 1; k <= 10; k += 3) {
9629 SpMMMicrokernelTester()
9630 .mr(4)
9631 .nr(1)
9632 .m(8)
9633 .n(n)
9634 .k(k)
9635 .sparsity(1.0f)
9636 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
9637 }
9638 }
9639 }
9640 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9641
9642
9643 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,k_eq_4)9644 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_eq_4) {
9645 SpMMMicrokernelTester()
9646 .mr(4)
9647 .nr(1)
9648 .m(4)
9649 .n(1)
9650 .k(4)
9651 .sparsity(0.0f)
9652 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9653 }
9654
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,k_lt_4)9655 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_lt_4) {
9656 for (size_t k = 1; k < 4; k++) {
9657 SpMMMicrokernelTester()
9658 .mr(4)
9659 .nr(1)
9660 .m(4)
9661 .n(1)
9662 .k(k)
9663 .sparsity(0.0f)
9664 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9665 }
9666 }
9667
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,k_gt_4)9668 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_gt_4) {
9669 for (size_t k = 5; k < 8; k++) {
9670 SpMMMicrokernelTester()
9671 .mr(4)
9672 .nr(1)
9673 .m(4)
9674 .n(1)
9675 .k(k)
9676 .sparsity(0.0f)
9677 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9678 }
9679 }
9680
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,k_div_4)9681 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_div_4) {
9682 for (size_t k = 8; k <= 40; k += 4) {
9683 SpMMMicrokernelTester()
9684 .mr(4)
9685 .nr(1)
9686 .m(4)
9687 .n(1)
9688 .k(k)
9689 .sparsity(0.0f)
9690 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9691 }
9692 }
9693
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,n_gt_1)9694 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, n_gt_1) {
9695 for (uint32_t n = 2; n < 10; n++) {
9696 for (size_t k = 1; k <= 20; k += 5) {
9697 SpMMMicrokernelTester()
9698 .mr(4)
9699 .nr(1)
9700 .m(4)
9701 .n(n)
9702 .k(k)
9703 .sparsity(0.0f)
9704 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9705 }
9706 }
9707 }
9708
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,m_lt_4)9709 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, m_lt_4) {
9710 for (uint32_t m = 1; m < 4; m++) {
9711 for (uint32_t n = 1; n < 10; n += 2) {
9712 for (size_t k = 1; k <= 20; k += 5) {
9713 SpMMMicrokernelTester()
9714 .mr(4)
9715 .nr(1)
9716 .m(m)
9717 .n(n)
9718 .k(k)
9719 .sparsity(0.0f)
9720 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9721 }
9722 }
9723 }
9724 }
9725
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,m_div_4)9726 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, m_div_4) {
9727 for (uint32_t m = 8; m <= 12; m += 4) {
9728 for (uint32_t n = 1; n < 10; n += 2) {
9729 for (size_t k = 1; k <= 20; k += 5) {
9730 SpMMMicrokernelTester()
9731 .mr(4)
9732 .nr(1)
9733 .m(m)
9734 .n(n)
9735 .k(k)
9736 .sparsity(0.0f)
9737 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9738 }
9739 }
9740 }
9741 }
9742
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,m_gt_4)9743 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, m_gt_4) {
9744 for (uint32_t m = 5; m < 8; m++) {
9745 for (uint32_t n = 1; n < 10; n += 2) {
9746 for (size_t k = 1; k <= 20; k += 5) {
9747 SpMMMicrokernelTester()
9748 .mr(4)
9749 .nr(1)
9750 .m(m)
9751 .n(n)
9752 .k(k)
9753 .sparsity(0.0f)
9754 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9755 }
9756 }
9757 }
9758 }
9759
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,output_stride)9760 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, output_stride) {
9761 for (uint32_t n = 1; n < 10; n += 2) {
9762 for (size_t k = 1; k <= 20; k += 5) {
9763 SpMMMicrokernelTester()
9764 .mr(4)
9765 .nr(1)
9766 .m(8)
9767 .n(n)
9768 .k(k)
9769 .output_stride(11)
9770 .sparsity(0.0f)
9771 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9772 }
9773 }
9774 }
9775
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,qmin)9776 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, qmin) {
9777 for (uint32_t n = 1; n < 10; n += 2) {
9778 for (size_t k = 1; k <= 20; k += 5) {
9779 SpMMMicrokernelTester()
9780 .mr(4)
9781 .nr(1)
9782 .m(8)
9783 .n(n)
9784 .k(k)
9785 .sparsity(0.0f)
9786 .qmin(128)
9787 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9788 }
9789 }
9790 }
9791
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,qmax)9792 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, qmax) {
9793 for (uint32_t n = 1; n < 10; n += 2) {
9794 for (size_t k = 1; k <= 20; k += 5) {
9795 SpMMMicrokernelTester()
9796 .mr(4)
9797 .nr(1)
9798 .m(8)
9799 .n(n)
9800 .k(k)
9801 .sparsity(0.0f)
9802 .qmax(128)
9803 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9804 }
9805 }
9806 }
9807
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,half_sparse)9808 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, half_sparse) {
9809 for (uint32_t n = 1; n < 10; n += 2) {
9810 for (size_t k = 1; k <= 20; k += 5) {
9811 SpMMMicrokernelTester()
9812 .mr(4)
9813 .nr(1)
9814 .m(8)
9815 .n(n)
9816 .k(k)
9817 .sparsity(0.5f)
9818 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9819 }
9820 }
9821 }
9822
TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4,zero_weights)9823 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, zero_weights) {
9824 for (uint32_t n = 1; n < 10; n += 2) {
9825 for (size_t k = 1; k <= 20; k += 5) {
9826 SpMMMicrokernelTester()
9827 .mr(4)
9828 .nr(1)
9829 .m(8)
9830 .n(n)
9831 .k(k)
9832 .sparsity(1.0f)
9833 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
9834 }
9835 }
9836 }
9837 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9838
9839
9840 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,k_eq_1)9841 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, k_eq_1) {
9842 SpMMMicrokernelTester()
9843 .mr(8)
9844 .nr(1)
9845 .m(8)
9846 .n(1)
9847 .k(1)
9848 .sparsity(0.0f)
9849 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9850 }
9851
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,k_gt_1)9852 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, k_gt_1) {
9853 for (size_t k = 2; k < 10; k++) {
9854 SpMMMicrokernelTester()
9855 .mr(8)
9856 .nr(1)
9857 .m(8)
9858 .n(1)
9859 .k(k)
9860 .sparsity(0.0f)
9861 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9862 }
9863 }
9864
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,n_gt_1)9865 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, n_gt_1) {
9866 for (uint32_t n = 2; n < 10; n++) {
9867 for (size_t k = 1; k <= 5; k += 2) {
9868 SpMMMicrokernelTester()
9869 .mr(8)
9870 .nr(1)
9871 .m(8)
9872 .n(n)
9873 .k(k)
9874 .sparsity(0.0f)
9875 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9876 }
9877 }
9878 }
9879
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,m_lt_8)9880 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, m_lt_8) {
9881 for (uint32_t m = 1; m < 8; m++) {
9882 for (uint32_t n = 1; n < 10; n += 2) {
9883 for (size_t k = 1; k <= 5; k += 2) {
9884 SpMMMicrokernelTester()
9885 .mr(8)
9886 .nr(1)
9887 .m(m)
9888 .n(n)
9889 .k(k)
9890 .sparsity(0.0f)
9891 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9892 }
9893 }
9894 }
9895 }
9896
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,m_div_8)9897 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, m_div_8) {
9898 for (uint32_t m = 16; m <= 24; m += 8) {
9899 for (uint32_t n = 1; n < 10; n += 2) {
9900 for (size_t k = 1; k <= 5; k += 2) {
9901 SpMMMicrokernelTester()
9902 .mr(8)
9903 .nr(1)
9904 .m(m)
9905 .n(n)
9906 .k(k)
9907 .sparsity(0.0f)
9908 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9909 }
9910 }
9911 }
9912 }
9913
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,m_gt_8)9914 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, m_gt_8) {
9915 for (uint32_t m = 9; m < 16; m++) {
9916 for (uint32_t n = 1; n < 10; n += 2) {
9917 for (size_t k = 1; k <= 5; k += 2) {
9918 SpMMMicrokernelTester()
9919 .mr(8)
9920 .nr(1)
9921 .m(m)
9922 .n(n)
9923 .k(k)
9924 .sparsity(0.0f)
9925 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9926 }
9927 }
9928 }
9929 }
9930
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,output_stride)9931 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, output_stride) {
9932 for (uint32_t n = 1; n < 10; n += 2) {
9933 for (size_t k = 1; k <= 5; k += 2) {
9934 SpMMMicrokernelTester()
9935 .mr(8)
9936 .nr(1)
9937 .m(16)
9938 .n(n)
9939 .k(k)
9940 .output_stride(19)
9941 .sparsity(0.0f)
9942 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9943 }
9944 }
9945 }
9946
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,qmin)9947 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, qmin) {
9948 for (uint32_t n = 1; n < 10; n += 2) {
9949 for (size_t k = 1; k <= 5; k += 2) {
9950 SpMMMicrokernelTester()
9951 .mr(8)
9952 .nr(1)
9953 .m(16)
9954 .n(n)
9955 .k(k)
9956 .sparsity(0.0f)
9957 .qmin(128)
9958 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9959 }
9960 }
9961 }
9962
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,qmax)9963 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, qmax) {
9964 for (uint32_t n = 1; n < 10; n += 2) {
9965 for (size_t k = 1; k <= 5; k += 2) {
9966 SpMMMicrokernelTester()
9967 .mr(8)
9968 .nr(1)
9969 .m(16)
9970 .n(n)
9971 .k(k)
9972 .sparsity(0.0f)
9973 .qmax(128)
9974 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9975 }
9976 }
9977 }
9978
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,half_sparse)9979 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, half_sparse) {
9980 for (uint32_t n = 1; n < 10; n += 2) {
9981 for (size_t k = 1; k <= 5; k += 2) {
9982 SpMMMicrokernelTester()
9983 .mr(8)
9984 .nr(1)
9985 .m(16)
9986 .n(n)
9987 .k(k)
9988 .sparsity(0.5f)
9989 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
9990 }
9991 }
9992 }
9993
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM,zero_weights)9994 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, zero_weights) {
9995 for (uint32_t n = 1; n < 10; n += 2) {
9996 for (size_t k = 1; k <= 5; k += 2) {
9997 SpMMMicrokernelTester()
9998 .mr(8)
9999 .nr(1)
10000 .m(16)
10001 .n(n)
10002 .k(k)
10003 .sparsity(1.0f)
10004 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
10005 }
10006 }
10007 }
10008 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10009
10010
10011 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,k_eq_1)10012 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
10013 SpMMMicrokernelTester()
10014 .mr(8)
10015 .nr(1)
10016 .m(8)
10017 .n(1)
10018 .k(1)
10019 .sparsity(0.0f)
10020 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10021 }
10022
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,k_gt_1)10023 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
10024 for (size_t k = 2; k < 10; k++) {
10025 SpMMMicrokernelTester()
10026 .mr(8)
10027 .nr(1)
10028 .m(8)
10029 .n(1)
10030 .k(k)
10031 .sparsity(0.0f)
10032 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10033 }
10034 }
10035
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,n_gt_1)10036 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
10037 for (uint32_t n = 2; n < 10; n++) {
10038 for (size_t k = 1; k <= 5; k += 2) {
10039 SpMMMicrokernelTester()
10040 .mr(8)
10041 .nr(1)
10042 .m(8)
10043 .n(n)
10044 .k(k)
10045 .sparsity(0.0f)
10046 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10047 }
10048 }
10049 }
10050
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,m_lt_8)10051 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, m_lt_8) {
10052 for (uint32_t m = 1; m < 8; m++) {
10053 for (uint32_t n = 1; n < 10; n += 2) {
10054 for (size_t k = 1; k <= 5; k += 2) {
10055 SpMMMicrokernelTester()
10056 .mr(8)
10057 .nr(1)
10058 .m(m)
10059 .n(n)
10060 .k(k)
10061 .sparsity(0.0f)
10062 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10063 }
10064 }
10065 }
10066 }
10067
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,m_div_8)10068 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, m_div_8) {
10069 for (uint32_t m = 16; m <= 24; m += 8) {
10070 for (uint32_t n = 1; n < 10; n += 2) {
10071 for (size_t k = 1; k <= 5; k += 2) {
10072 SpMMMicrokernelTester()
10073 .mr(8)
10074 .nr(1)
10075 .m(m)
10076 .n(n)
10077 .k(k)
10078 .sparsity(0.0f)
10079 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10080 }
10081 }
10082 }
10083 }
10084
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,m_gt_8)10085 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, m_gt_8) {
10086 for (uint32_t m = 9; m < 16; m++) {
10087 for (uint32_t n = 1; n < 10; n += 2) {
10088 for (size_t k = 1; k <= 5; k += 2) {
10089 SpMMMicrokernelTester()
10090 .mr(8)
10091 .nr(1)
10092 .m(m)
10093 .n(n)
10094 .k(k)
10095 .sparsity(0.0f)
10096 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10097 }
10098 }
10099 }
10100 }
10101
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,output_stride)10102 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, output_stride) {
10103 for (uint32_t n = 1; n < 10; n += 2) {
10104 for (size_t k = 1; k <= 5; k += 2) {
10105 SpMMMicrokernelTester()
10106 .mr(8)
10107 .nr(1)
10108 .m(16)
10109 .n(n)
10110 .k(k)
10111 .output_stride(19)
10112 .sparsity(0.0f)
10113 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10114 }
10115 }
10116 }
10117
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,qmin)10118 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, qmin) {
10119 for (uint32_t n = 1; n < 10; n += 2) {
10120 for (size_t k = 1; k <= 5; k += 2) {
10121 SpMMMicrokernelTester()
10122 .mr(8)
10123 .nr(1)
10124 .m(16)
10125 .n(n)
10126 .k(k)
10127 .sparsity(0.0f)
10128 .qmin(128)
10129 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10130 }
10131 }
10132 }
10133
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,qmax)10134 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, qmax) {
10135 for (uint32_t n = 1; n < 10; n += 2) {
10136 for (size_t k = 1; k <= 5; k += 2) {
10137 SpMMMicrokernelTester()
10138 .mr(8)
10139 .nr(1)
10140 .m(16)
10141 .n(n)
10142 .k(k)
10143 .sparsity(0.0f)
10144 .qmax(128)
10145 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10146 }
10147 }
10148 }
10149
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,half_sparse)10150 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
10151 for (uint32_t n = 1; n < 10; n += 2) {
10152 for (size_t k = 1; k <= 5; k += 2) {
10153 SpMMMicrokernelTester()
10154 .mr(8)
10155 .nr(1)
10156 .m(16)
10157 .n(n)
10158 .k(k)
10159 .sparsity(0.5f)
10160 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10161 }
10162 }
10163 }
10164
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED,zero_weights)10165 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
10166 for (uint32_t n = 1; n < 10; n += 2) {
10167 for (size_t k = 1; k <= 5; k += 2) {
10168 SpMMMicrokernelTester()
10169 .mr(8)
10170 .nr(1)
10171 .m(16)
10172 .n(n)
10173 .k(k)
10174 .sparsity(1.0f)
10175 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10176 }
10177 }
10178 }
10179 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10180
10181
10182 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,k_eq_2)10183 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
10184 SpMMMicrokernelTester()
10185 .mr(8)
10186 .nr(1)
10187 .m(8)
10188 .n(1)
10189 .k(2)
10190 .sparsity(0.0f)
10191 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10192 }
10193
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,k_lt_2)10194 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
10195 for (size_t k = 1; k < 2; k++) {
10196 SpMMMicrokernelTester()
10197 .mr(8)
10198 .nr(1)
10199 .m(8)
10200 .n(1)
10201 .k(k)
10202 .sparsity(0.0f)
10203 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10204 }
10205 }
10206
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,k_gt_2)10207 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
10208 for (size_t k = 3; k < 4; k++) {
10209 SpMMMicrokernelTester()
10210 .mr(8)
10211 .nr(1)
10212 .m(8)
10213 .n(1)
10214 .k(k)
10215 .sparsity(0.0f)
10216 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10217 }
10218 }
10219
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,k_div_2)10220 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
10221 for (size_t k = 4; k <= 20; k += 2) {
10222 SpMMMicrokernelTester()
10223 .mr(8)
10224 .nr(1)
10225 .m(8)
10226 .n(1)
10227 .k(k)
10228 .sparsity(0.0f)
10229 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10230 }
10231 }
10232
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,n_gt_1)10233 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
10234 for (uint32_t n = 2; n < 10; n++) {
10235 for (size_t k = 1; k <= 10; k += 3) {
10236 SpMMMicrokernelTester()
10237 .mr(8)
10238 .nr(1)
10239 .m(8)
10240 .n(n)
10241 .k(k)
10242 .sparsity(0.0f)
10243 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10244 }
10245 }
10246 }
10247
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,m_lt_8)10248 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_8) {
10249 for (uint32_t m = 1; m < 8; m++) {
10250 for (uint32_t n = 1; n < 10; n += 2) {
10251 for (size_t k = 1; k <= 10; k += 3) {
10252 SpMMMicrokernelTester()
10253 .mr(8)
10254 .nr(1)
10255 .m(m)
10256 .n(n)
10257 .k(k)
10258 .sparsity(0.0f)
10259 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10260 }
10261 }
10262 }
10263 }
10264
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,m_div_8)10265 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, m_div_8) {
10266 for (uint32_t m = 16; m <= 24; m += 8) {
10267 for (uint32_t n = 1; n < 10; n += 2) {
10268 for (size_t k = 1; k <= 10; k += 3) {
10269 SpMMMicrokernelTester()
10270 .mr(8)
10271 .nr(1)
10272 .m(m)
10273 .n(n)
10274 .k(k)
10275 .sparsity(0.0f)
10276 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10277 }
10278 }
10279 }
10280 }
10281
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,m_gt_8)10282 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_8) {
10283 for (uint32_t m = 9; m < 16; m++) {
10284 for (uint32_t n = 1; n < 10; n += 2) {
10285 for (size_t k = 1; k <= 10; k += 3) {
10286 SpMMMicrokernelTester()
10287 .mr(8)
10288 .nr(1)
10289 .m(m)
10290 .n(n)
10291 .k(k)
10292 .sparsity(0.0f)
10293 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10294 }
10295 }
10296 }
10297 }
10298
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,output_stride)10299 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
10300 for (uint32_t n = 1; n < 10; n += 2) {
10301 for (size_t k = 1; k <= 10; k += 3) {
10302 SpMMMicrokernelTester()
10303 .mr(8)
10304 .nr(1)
10305 .m(16)
10306 .n(n)
10307 .k(k)
10308 .output_stride(19)
10309 .sparsity(0.0f)
10310 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10311 }
10312 }
10313 }
10314
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,qmin)10315 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
10316 for (uint32_t n = 1; n < 10; n += 2) {
10317 for (size_t k = 1; k <= 10; k += 3) {
10318 SpMMMicrokernelTester()
10319 .mr(8)
10320 .nr(1)
10321 .m(16)
10322 .n(n)
10323 .k(k)
10324 .sparsity(0.0f)
10325 .qmin(128)
10326 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10327 }
10328 }
10329 }
10330
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,qmax)10331 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
10332 for (uint32_t n = 1; n < 10; n += 2) {
10333 for (size_t k = 1; k <= 10; k += 3) {
10334 SpMMMicrokernelTester()
10335 .mr(8)
10336 .nr(1)
10337 .m(16)
10338 .n(n)
10339 .k(k)
10340 .sparsity(0.0f)
10341 .qmax(128)
10342 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10343 }
10344 }
10345 }
10346
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,half_sparse)10347 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
10348 for (uint32_t n = 1; n < 10; n += 2) {
10349 for (size_t k = 1; k <= 10; k += 3) {
10350 SpMMMicrokernelTester()
10351 .mr(8)
10352 .nr(1)
10353 .m(16)
10354 .n(n)
10355 .k(k)
10356 .sparsity(0.5f)
10357 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10358 }
10359 }
10360 }
10361
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2,zero_weights)10362 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
10363 for (uint32_t n = 1; n < 10; n += 2) {
10364 for (size_t k = 1; k <= 10; k += 3) {
10365 SpMMMicrokernelTester()
10366 .mr(8)
10367 .nr(1)
10368 .m(16)
10369 .n(n)
10370 .k(k)
10371 .sparsity(1.0f)
10372 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
10373 }
10374 }
10375 }
10376 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10377
10378
10379 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,k_eq_2)10380 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_eq_2) {
10381 SpMMMicrokernelTester()
10382 .mr(8)
10383 .nr(1)
10384 .m(8)
10385 .n(1)
10386 .k(2)
10387 .sparsity(0.0f)
10388 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10389 }
10390
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,k_lt_2)10391 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_lt_2) {
10392 for (size_t k = 1; k < 2; k++) {
10393 SpMMMicrokernelTester()
10394 .mr(8)
10395 .nr(1)
10396 .m(8)
10397 .n(1)
10398 .k(k)
10399 .sparsity(0.0f)
10400 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10401 }
10402 }
10403
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,k_gt_2)10404 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_gt_2) {
10405 for (size_t k = 3; k < 4; k++) {
10406 SpMMMicrokernelTester()
10407 .mr(8)
10408 .nr(1)
10409 .m(8)
10410 .n(1)
10411 .k(k)
10412 .sparsity(0.0f)
10413 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10414 }
10415 }
10416
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,k_div_2)10417 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_div_2) {
10418 for (size_t k = 4; k <= 20; k += 2) {
10419 SpMMMicrokernelTester()
10420 .mr(8)
10421 .nr(1)
10422 .m(8)
10423 .n(1)
10424 .k(k)
10425 .sparsity(0.0f)
10426 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10427 }
10428 }
10429
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,n_gt_1)10430 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, n_gt_1) {
10431 for (uint32_t n = 2; n < 10; n++) {
10432 for (size_t k = 1; k <= 10; k += 3) {
10433 SpMMMicrokernelTester()
10434 .mr(8)
10435 .nr(1)
10436 .m(8)
10437 .n(n)
10438 .k(k)
10439 .sparsity(0.0f)
10440 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10441 }
10442 }
10443 }
10444
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,m_lt_8)10445 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, m_lt_8) {
10446 for (uint32_t m = 1; m < 8; m++) {
10447 for (uint32_t n = 1; n < 10; n += 2) {
10448 for (size_t k = 1; k <= 10; k += 3) {
10449 SpMMMicrokernelTester()
10450 .mr(8)
10451 .nr(1)
10452 .m(m)
10453 .n(n)
10454 .k(k)
10455 .sparsity(0.0f)
10456 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10457 }
10458 }
10459 }
10460 }
10461
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,m_div_8)10462 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, m_div_8) {
10463 for (uint32_t m = 16; m <= 24; m += 8) {
10464 for (uint32_t n = 1; n < 10; n += 2) {
10465 for (size_t k = 1; k <= 10; k += 3) {
10466 SpMMMicrokernelTester()
10467 .mr(8)
10468 .nr(1)
10469 .m(m)
10470 .n(n)
10471 .k(k)
10472 .sparsity(0.0f)
10473 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10474 }
10475 }
10476 }
10477 }
10478
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,m_gt_8)10479 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, m_gt_8) {
10480 for (uint32_t m = 9; m < 16; m++) {
10481 for (uint32_t n = 1; n < 10; n += 2) {
10482 for (size_t k = 1; k <= 10; k += 3) {
10483 SpMMMicrokernelTester()
10484 .mr(8)
10485 .nr(1)
10486 .m(m)
10487 .n(n)
10488 .k(k)
10489 .sparsity(0.0f)
10490 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10491 }
10492 }
10493 }
10494 }
10495
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,output_stride)10496 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, output_stride) {
10497 for (uint32_t n = 1; n < 10; n += 2) {
10498 for (size_t k = 1; k <= 10; k += 3) {
10499 SpMMMicrokernelTester()
10500 .mr(8)
10501 .nr(1)
10502 .m(16)
10503 .n(n)
10504 .k(k)
10505 .output_stride(19)
10506 .sparsity(0.0f)
10507 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10508 }
10509 }
10510 }
10511
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,qmin)10512 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, qmin) {
10513 for (uint32_t n = 1; n < 10; n += 2) {
10514 for (size_t k = 1; k <= 10; k += 3) {
10515 SpMMMicrokernelTester()
10516 .mr(8)
10517 .nr(1)
10518 .m(16)
10519 .n(n)
10520 .k(k)
10521 .sparsity(0.0f)
10522 .qmin(128)
10523 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10524 }
10525 }
10526 }
10527
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,qmax)10528 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, qmax) {
10529 for (uint32_t n = 1; n < 10; n += 2) {
10530 for (size_t k = 1; k <= 10; k += 3) {
10531 SpMMMicrokernelTester()
10532 .mr(8)
10533 .nr(1)
10534 .m(16)
10535 .n(n)
10536 .k(k)
10537 .sparsity(0.0f)
10538 .qmax(128)
10539 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10540 }
10541 }
10542 }
10543
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,half_sparse)10544 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, half_sparse) {
10545 for (uint32_t n = 1; n < 10; n += 2) {
10546 for (size_t k = 1; k <= 10; k += 3) {
10547 SpMMMicrokernelTester()
10548 .mr(8)
10549 .nr(1)
10550 .m(16)
10551 .n(n)
10552 .k(k)
10553 .sparsity(0.5f)
10554 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10555 }
10556 }
10557 }
10558
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2,zero_weights)10559 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, zero_weights) {
10560 for (uint32_t n = 1; n < 10; n += 2) {
10561 for (size_t k = 1; k <= 10; k += 3) {
10562 SpMMMicrokernelTester()
10563 .mr(8)
10564 .nr(1)
10565 .m(16)
10566 .n(n)
10567 .k(k)
10568 .sparsity(1.0f)
10569 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
10570 }
10571 }
10572 }
10573 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10574
10575
10576 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,k_eq_4)10577 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_eq_4) {
10578 SpMMMicrokernelTester()
10579 .mr(8)
10580 .nr(1)
10581 .m(8)
10582 .n(1)
10583 .k(4)
10584 .sparsity(0.0f)
10585 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10586 }
10587
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,k_lt_4)10588 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_lt_4) {
10589 for (size_t k = 1; k < 4; k++) {
10590 SpMMMicrokernelTester()
10591 .mr(8)
10592 .nr(1)
10593 .m(8)
10594 .n(1)
10595 .k(k)
10596 .sparsity(0.0f)
10597 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10598 }
10599 }
10600
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,k_gt_4)10601 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_gt_4) {
10602 for (size_t k = 5; k < 8; k++) {
10603 SpMMMicrokernelTester()
10604 .mr(8)
10605 .nr(1)
10606 .m(8)
10607 .n(1)
10608 .k(k)
10609 .sparsity(0.0f)
10610 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10611 }
10612 }
10613
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,k_div_4)10614 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_div_4) {
10615 for (size_t k = 8; k <= 40; k += 4) {
10616 SpMMMicrokernelTester()
10617 .mr(8)
10618 .nr(1)
10619 .m(8)
10620 .n(1)
10621 .k(k)
10622 .sparsity(0.0f)
10623 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10624 }
10625 }
10626
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,n_gt_1)10627 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, n_gt_1) {
10628 for (uint32_t n = 2; n < 10; n++) {
10629 for (size_t k = 1; k <= 20; k += 5) {
10630 SpMMMicrokernelTester()
10631 .mr(8)
10632 .nr(1)
10633 .m(8)
10634 .n(n)
10635 .k(k)
10636 .sparsity(0.0f)
10637 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10638 }
10639 }
10640 }
10641
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,m_lt_8)10642 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, m_lt_8) {
10643 for (uint32_t m = 1; m < 8; m++) {
10644 for (uint32_t n = 1; n < 10; n += 2) {
10645 for (size_t k = 1; k <= 20; k += 5) {
10646 SpMMMicrokernelTester()
10647 .mr(8)
10648 .nr(1)
10649 .m(m)
10650 .n(n)
10651 .k(k)
10652 .sparsity(0.0f)
10653 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10654 }
10655 }
10656 }
10657 }
10658
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,m_div_8)10659 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, m_div_8) {
10660 for (uint32_t m = 16; m <= 24; m += 8) {
10661 for (uint32_t n = 1; n < 10; n += 2) {
10662 for (size_t k = 1; k <= 20; k += 5) {
10663 SpMMMicrokernelTester()
10664 .mr(8)
10665 .nr(1)
10666 .m(m)
10667 .n(n)
10668 .k(k)
10669 .sparsity(0.0f)
10670 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10671 }
10672 }
10673 }
10674 }
10675
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,m_gt_8)10676 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, m_gt_8) {
10677 for (uint32_t m = 9; m < 16; m++) {
10678 for (uint32_t n = 1; n < 10; n += 2) {
10679 for (size_t k = 1; k <= 20; k += 5) {
10680 SpMMMicrokernelTester()
10681 .mr(8)
10682 .nr(1)
10683 .m(m)
10684 .n(n)
10685 .k(k)
10686 .sparsity(0.0f)
10687 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10688 }
10689 }
10690 }
10691 }
10692
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,output_stride)10693 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, output_stride) {
10694 for (uint32_t n = 1; n < 10; n += 2) {
10695 for (size_t k = 1; k <= 20; k += 5) {
10696 SpMMMicrokernelTester()
10697 .mr(8)
10698 .nr(1)
10699 .m(16)
10700 .n(n)
10701 .k(k)
10702 .output_stride(19)
10703 .sparsity(0.0f)
10704 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10705 }
10706 }
10707 }
10708
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,qmin)10709 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, qmin) {
10710 for (uint32_t n = 1; n < 10; n += 2) {
10711 for (size_t k = 1; k <= 20; k += 5) {
10712 SpMMMicrokernelTester()
10713 .mr(8)
10714 .nr(1)
10715 .m(16)
10716 .n(n)
10717 .k(k)
10718 .sparsity(0.0f)
10719 .qmin(128)
10720 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10721 }
10722 }
10723 }
10724
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,qmax)10725 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, qmax) {
10726 for (uint32_t n = 1; n < 10; n += 2) {
10727 for (size_t k = 1; k <= 20; k += 5) {
10728 SpMMMicrokernelTester()
10729 .mr(8)
10730 .nr(1)
10731 .m(16)
10732 .n(n)
10733 .k(k)
10734 .sparsity(0.0f)
10735 .qmax(128)
10736 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10737 }
10738 }
10739 }
10740
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,half_sparse)10741 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, half_sparse) {
10742 for (uint32_t n = 1; n < 10; n += 2) {
10743 for (size_t k = 1; k <= 20; k += 5) {
10744 SpMMMicrokernelTester()
10745 .mr(8)
10746 .nr(1)
10747 .m(16)
10748 .n(n)
10749 .k(k)
10750 .sparsity(0.5f)
10751 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10752 }
10753 }
10754 }
10755
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4,zero_weights)10756 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, zero_weights) {
10757 for (uint32_t n = 1; n < 10; n += 2) {
10758 for (size_t k = 1; k <= 20; k += 5) {
10759 SpMMMicrokernelTester()
10760 .mr(8)
10761 .nr(1)
10762 .m(16)
10763 .n(n)
10764 .k(k)
10765 .sparsity(1.0f)
10766 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
10767 }
10768 }
10769 }
10770 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10771
10772
10773 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,k_eq_1)10774 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, k_eq_1) {
10775 SpMMMicrokernelTester()
10776 .mr(8)
10777 .nr(1)
10778 .m(8)
10779 .n(1)
10780 .k(1)
10781 .sparsity(0.0f)
10782 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10783 }
10784
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,k_gt_1)10785 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, k_gt_1) {
10786 for (size_t k = 2; k < 10; k++) {
10787 SpMMMicrokernelTester()
10788 .mr(8)
10789 .nr(1)
10790 .m(8)
10791 .n(1)
10792 .k(k)
10793 .sparsity(0.0f)
10794 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10795 }
10796 }
10797
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,n_gt_1)10798 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, n_gt_1) {
10799 for (uint32_t n = 2; n < 10; n++) {
10800 for (size_t k = 1; k <= 5; k += 2) {
10801 SpMMMicrokernelTester()
10802 .mr(8)
10803 .nr(1)
10804 .m(8)
10805 .n(n)
10806 .k(k)
10807 .sparsity(0.0f)
10808 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10809 }
10810 }
10811 }
10812
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,m_lt_8)10813 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, m_lt_8) {
10814 for (uint32_t m = 1; m < 8; m++) {
10815 for (uint32_t n = 1; n < 10; n += 2) {
10816 for (size_t k = 1; k <= 5; k += 2) {
10817 SpMMMicrokernelTester()
10818 .mr(8)
10819 .nr(1)
10820 .m(m)
10821 .n(n)
10822 .k(k)
10823 .sparsity(0.0f)
10824 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10825 }
10826 }
10827 }
10828 }
10829
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,m_div_8)10830 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, m_div_8) {
10831 for (uint32_t m = 16; m <= 24; m += 8) {
10832 for (uint32_t n = 1; n < 10; n += 2) {
10833 for (size_t k = 1; k <= 5; k += 2) {
10834 SpMMMicrokernelTester()
10835 .mr(8)
10836 .nr(1)
10837 .m(m)
10838 .n(n)
10839 .k(k)
10840 .sparsity(0.0f)
10841 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10842 }
10843 }
10844 }
10845 }
10846
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,m_gt_8)10847 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, m_gt_8) {
10848 for (uint32_t m = 9; m < 16; m++) {
10849 for (uint32_t n = 1; n < 10; n += 2) {
10850 for (size_t k = 1; k <= 5; k += 2) {
10851 SpMMMicrokernelTester()
10852 .mr(8)
10853 .nr(1)
10854 .m(m)
10855 .n(n)
10856 .k(k)
10857 .sparsity(0.0f)
10858 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10859 }
10860 }
10861 }
10862 }
10863
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,output_stride)10864 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, output_stride) {
10865 for (uint32_t n = 1; n < 10; n += 2) {
10866 for (size_t k = 1; k <= 5; k += 2) {
10867 SpMMMicrokernelTester()
10868 .mr(8)
10869 .nr(1)
10870 .m(16)
10871 .n(n)
10872 .k(k)
10873 .output_stride(19)
10874 .sparsity(0.0f)
10875 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10876 }
10877 }
10878 }
10879
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,qmin)10880 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, qmin) {
10881 for (uint32_t n = 1; n < 10; n += 2) {
10882 for (size_t k = 1; k <= 5; k += 2) {
10883 SpMMMicrokernelTester()
10884 .mr(8)
10885 .nr(1)
10886 .m(16)
10887 .n(n)
10888 .k(k)
10889 .sparsity(0.0f)
10890 .qmin(128)
10891 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10892 }
10893 }
10894 }
10895
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,qmax)10896 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, qmax) {
10897 for (uint32_t n = 1; n < 10; n += 2) {
10898 for (size_t k = 1; k <= 5; k += 2) {
10899 SpMMMicrokernelTester()
10900 .mr(8)
10901 .nr(1)
10902 .m(16)
10903 .n(n)
10904 .k(k)
10905 .sparsity(0.0f)
10906 .qmax(128)
10907 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10908 }
10909 }
10910 }
10911
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,half_sparse)10912 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, half_sparse) {
10913 for (uint32_t n = 1; n < 10; n += 2) {
10914 for (size_t k = 1; k <= 5; k += 2) {
10915 SpMMMicrokernelTester()
10916 .mr(8)
10917 .nr(1)
10918 .m(16)
10919 .n(n)
10920 .k(k)
10921 .sparsity(0.5f)
10922 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10923 }
10924 }
10925 }
10926
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86,zero_weights)10927 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, zero_weights) {
10928 for (uint32_t n = 1; n < 10; n += 2) {
10929 for (size_t k = 1; k <= 5; k += 2) {
10930 SpMMMicrokernelTester()
10931 .mr(8)
10932 .nr(1)
10933 .m(16)
10934 .n(n)
10935 .k(k)
10936 .sparsity(1.0f)
10937 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
10938 }
10939 }
10940 }
10941 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10942
10943
10944 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,k_eq_1)10945 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
10946 SpMMMicrokernelTester()
10947 .mr(8)
10948 .nr(1)
10949 .m(8)
10950 .n(1)
10951 .k(1)
10952 .sparsity(0.0f)
10953 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10954 }
10955
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,k_gt_1)10956 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
10957 for (size_t k = 2; k < 10; k++) {
10958 SpMMMicrokernelTester()
10959 .mr(8)
10960 .nr(1)
10961 .m(8)
10962 .n(1)
10963 .k(k)
10964 .sparsity(0.0f)
10965 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10966 }
10967 }
10968
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,n_gt_1)10969 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
10970 for (uint32_t n = 2; n < 10; n++) {
10971 for (size_t k = 1; k <= 5; k += 2) {
10972 SpMMMicrokernelTester()
10973 .mr(8)
10974 .nr(1)
10975 .m(8)
10976 .n(n)
10977 .k(k)
10978 .sparsity(0.0f)
10979 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10980 }
10981 }
10982 }
10983
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,m_lt_8)10984 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, m_lt_8) {
10985 for (uint32_t m = 1; m < 8; m++) {
10986 for (uint32_t n = 1; n < 10; n += 2) {
10987 for (size_t k = 1; k <= 5; k += 2) {
10988 SpMMMicrokernelTester()
10989 .mr(8)
10990 .nr(1)
10991 .m(m)
10992 .n(n)
10993 .k(k)
10994 .sparsity(0.0f)
10995 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
10996 }
10997 }
10998 }
10999 }
11000
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,m_div_8)11001 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, m_div_8) {
11002 for (uint32_t m = 16; m <= 24; m += 8) {
11003 for (uint32_t n = 1; n < 10; n += 2) {
11004 for (size_t k = 1; k <= 5; k += 2) {
11005 SpMMMicrokernelTester()
11006 .mr(8)
11007 .nr(1)
11008 .m(m)
11009 .n(n)
11010 .k(k)
11011 .sparsity(0.0f)
11012 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11013 }
11014 }
11015 }
11016 }
11017
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,m_gt_8)11018 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, m_gt_8) {
11019 for (uint32_t m = 9; m < 16; m++) {
11020 for (uint32_t n = 1; n < 10; n += 2) {
11021 for (size_t k = 1; k <= 5; k += 2) {
11022 SpMMMicrokernelTester()
11023 .mr(8)
11024 .nr(1)
11025 .m(m)
11026 .n(n)
11027 .k(k)
11028 .sparsity(0.0f)
11029 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11030 }
11031 }
11032 }
11033 }
11034
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,output_stride)11035 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, output_stride) {
11036 for (uint32_t n = 1; n < 10; n += 2) {
11037 for (size_t k = 1; k <= 5; k += 2) {
11038 SpMMMicrokernelTester()
11039 .mr(8)
11040 .nr(1)
11041 .m(16)
11042 .n(n)
11043 .k(k)
11044 .output_stride(19)
11045 .sparsity(0.0f)
11046 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11047 }
11048 }
11049 }
11050
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,qmin)11051 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, qmin) {
11052 for (uint32_t n = 1; n < 10; n += 2) {
11053 for (size_t k = 1; k <= 5; k += 2) {
11054 SpMMMicrokernelTester()
11055 .mr(8)
11056 .nr(1)
11057 .m(16)
11058 .n(n)
11059 .k(k)
11060 .sparsity(0.0f)
11061 .qmin(128)
11062 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11063 }
11064 }
11065 }
11066
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,qmax)11067 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, qmax) {
11068 for (uint32_t n = 1; n < 10; n += 2) {
11069 for (size_t k = 1; k <= 5; k += 2) {
11070 SpMMMicrokernelTester()
11071 .mr(8)
11072 .nr(1)
11073 .m(16)
11074 .n(n)
11075 .k(k)
11076 .sparsity(0.0f)
11077 .qmax(128)
11078 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11079 }
11080 }
11081 }
11082
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,half_sparse)11083 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, half_sparse) {
11084 for (uint32_t n = 1; n < 10; n += 2) {
11085 for (size_t k = 1; k <= 5; k += 2) {
11086 SpMMMicrokernelTester()
11087 .mr(8)
11088 .nr(1)
11089 .m(16)
11090 .n(n)
11091 .k(k)
11092 .sparsity(0.5f)
11093 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11094 }
11095 }
11096 }
11097
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED,zero_weights)11098 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, zero_weights) {
11099 for (uint32_t n = 1; n < 10; n += 2) {
11100 for (size_t k = 1; k <= 5; k += 2) {
11101 SpMMMicrokernelTester()
11102 .mr(8)
11103 .nr(1)
11104 .m(16)
11105 .n(n)
11106 .k(k)
11107 .sparsity(1.0f)
11108 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11109 }
11110 }
11111 }
11112 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11113
11114
11115 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,k_eq_2)11116 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
11117 SpMMMicrokernelTester()
11118 .mr(8)
11119 .nr(1)
11120 .m(8)
11121 .n(1)
11122 .k(2)
11123 .sparsity(0.0f)
11124 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11125 }
11126
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,k_lt_2)11127 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
11128 for (size_t k = 1; k < 2; k++) {
11129 SpMMMicrokernelTester()
11130 .mr(8)
11131 .nr(1)
11132 .m(8)
11133 .n(1)
11134 .k(k)
11135 .sparsity(0.0f)
11136 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11137 }
11138 }
11139
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,k_gt_2)11140 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
11141 for (size_t k = 3; k < 4; k++) {
11142 SpMMMicrokernelTester()
11143 .mr(8)
11144 .nr(1)
11145 .m(8)
11146 .n(1)
11147 .k(k)
11148 .sparsity(0.0f)
11149 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11150 }
11151 }
11152
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,k_div_2)11153 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
11154 for (size_t k = 4; k <= 20; k += 2) {
11155 SpMMMicrokernelTester()
11156 .mr(8)
11157 .nr(1)
11158 .m(8)
11159 .n(1)
11160 .k(k)
11161 .sparsity(0.0f)
11162 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11163 }
11164 }
11165
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,n_gt_1)11166 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
11167 for (uint32_t n = 2; n < 10; n++) {
11168 for (size_t k = 1; k <= 10; k += 3) {
11169 SpMMMicrokernelTester()
11170 .mr(8)
11171 .nr(1)
11172 .m(8)
11173 .n(n)
11174 .k(k)
11175 .sparsity(0.0f)
11176 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11177 }
11178 }
11179 }
11180
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,m_lt_8)11181 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, m_lt_8) {
11182 for (uint32_t m = 1; m < 8; m++) {
11183 for (uint32_t n = 1; n < 10; n += 2) {
11184 for (size_t k = 1; k <= 10; k += 3) {
11185 SpMMMicrokernelTester()
11186 .mr(8)
11187 .nr(1)
11188 .m(m)
11189 .n(n)
11190 .k(k)
11191 .sparsity(0.0f)
11192 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11193 }
11194 }
11195 }
11196 }
11197
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,m_div_8)11198 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, m_div_8) {
11199 for (uint32_t m = 16; m <= 24; m += 8) {
11200 for (uint32_t n = 1; n < 10; n += 2) {
11201 for (size_t k = 1; k <= 10; k += 3) {
11202 SpMMMicrokernelTester()
11203 .mr(8)
11204 .nr(1)
11205 .m(m)
11206 .n(n)
11207 .k(k)
11208 .sparsity(0.0f)
11209 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11210 }
11211 }
11212 }
11213 }
11214
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,m_gt_8)11215 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, m_gt_8) {
11216 for (uint32_t m = 9; m < 16; m++) {
11217 for (uint32_t n = 1; n < 10; n += 2) {
11218 for (size_t k = 1; k <= 10; k += 3) {
11219 SpMMMicrokernelTester()
11220 .mr(8)
11221 .nr(1)
11222 .m(m)
11223 .n(n)
11224 .k(k)
11225 .sparsity(0.0f)
11226 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11227 }
11228 }
11229 }
11230 }
11231
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,output_stride)11232 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
11233 for (uint32_t n = 1; n < 10; n += 2) {
11234 for (size_t k = 1; k <= 10; k += 3) {
11235 SpMMMicrokernelTester()
11236 .mr(8)
11237 .nr(1)
11238 .m(16)
11239 .n(n)
11240 .k(k)
11241 .output_stride(19)
11242 .sparsity(0.0f)
11243 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11244 }
11245 }
11246 }
11247
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,qmin)11248 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
11249 for (uint32_t n = 1; n < 10; n += 2) {
11250 for (size_t k = 1; k <= 10; k += 3) {
11251 SpMMMicrokernelTester()
11252 .mr(8)
11253 .nr(1)
11254 .m(16)
11255 .n(n)
11256 .k(k)
11257 .sparsity(0.0f)
11258 .qmin(128)
11259 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11260 }
11261 }
11262 }
11263
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,qmax)11264 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
11265 for (uint32_t n = 1; n < 10; n += 2) {
11266 for (size_t k = 1; k <= 10; k += 3) {
11267 SpMMMicrokernelTester()
11268 .mr(8)
11269 .nr(1)
11270 .m(16)
11271 .n(n)
11272 .k(k)
11273 .sparsity(0.0f)
11274 .qmax(128)
11275 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11276 }
11277 }
11278 }
11279
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,half_sparse)11280 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
11281 for (uint32_t n = 1; n < 10; n += 2) {
11282 for (size_t k = 1; k <= 10; k += 3) {
11283 SpMMMicrokernelTester()
11284 .mr(8)
11285 .nr(1)
11286 .m(16)
11287 .n(n)
11288 .k(k)
11289 .sparsity(0.5f)
11290 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11291 }
11292 }
11293 }
11294
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2,zero_weights)11295 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
11296 for (uint32_t n = 1; n < 10; n += 2) {
11297 for (size_t k = 1; k <= 10; k += 3) {
11298 SpMMMicrokernelTester()
11299 .mr(8)
11300 .nr(1)
11301 .m(16)
11302 .n(n)
11303 .k(k)
11304 .sparsity(1.0f)
11305 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
11306 }
11307 }
11308 }
11309 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11310
11311
11312 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,k_eq_2)11313 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_eq_2) {
11314 SpMMMicrokernelTester()
11315 .mr(8)
11316 .nr(1)
11317 .m(8)
11318 .n(1)
11319 .k(2)
11320 .sparsity(0.0f)
11321 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11322 }
11323
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,k_lt_2)11324 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_lt_2) {
11325 for (size_t k = 1; k < 2; k++) {
11326 SpMMMicrokernelTester()
11327 .mr(8)
11328 .nr(1)
11329 .m(8)
11330 .n(1)
11331 .k(k)
11332 .sparsity(0.0f)
11333 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11334 }
11335 }
11336
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,k_gt_2)11337 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_gt_2) {
11338 for (size_t k = 3; k < 4; k++) {
11339 SpMMMicrokernelTester()
11340 .mr(8)
11341 .nr(1)
11342 .m(8)
11343 .n(1)
11344 .k(k)
11345 .sparsity(0.0f)
11346 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11347 }
11348 }
11349
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,k_div_2)11350 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_div_2) {
11351 for (size_t k = 4; k <= 20; k += 2) {
11352 SpMMMicrokernelTester()
11353 .mr(8)
11354 .nr(1)
11355 .m(8)
11356 .n(1)
11357 .k(k)
11358 .sparsity(0.0f)
11359 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11360 }
11361 }
11362
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,n_gt_1)11363 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, n_gt_1) {
11364 for (uint32_t n = 2; n < 10; n++) {
11365 for (size_t k = 1; k <= 10; k += 3) {
11366 SpMMMicrokernelTester()
11367 .mr(8)
11368 .nr(1)
11369 .m(8)
11370 .n(n)
11371 .k(k)
11372 .sparsity(0.0f)
11373 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11374 }
11375 }
11376 }
11377
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,m_lt_8)11378 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, m_lt_8) {
11379 for (uint32_t m = 1; m < 8; m++) {
11380 for (uint32_t n = 1; n < 10; n += 2) {
11381 for (size_t k = 1; k <= 10; k += 3) {
11382 SpMMMicrokernelTester()
11383 .mr(8)
11384 .nr(1)
11385 .m(m)
11386 .n(n)
11387 .k(k)
11388 .sparsity(0.0f)
11389 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11390 }
11391 }
11392 }
11393 }
11394
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,m_div_8)11395 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, m_div_8) {
11396 for (uint32_t m = 16; m <= 24; m += 8) {
11397 for (uint32_t n = 1; n < 10; n += 2) {
11398 for (size_t k = 1; k <= 10; k += 3) {
11399 SpMMMicrokernelTester()
11400 .mr(8)
11401 .nr(1)
11402 .m(m)
11403 .n(n)
11404 .k(k)
11405 .sparsity(0.0f)
11406 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11407 }
11408 }
11409 }
11410 }
11411
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,m_gt_8)11412 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, m_gt_8) {
11413 for (uint32_t m = 9; m < 16; m++) {
11414 for (uint32_t n = 1; n < 10; n += 2) {
11415 for (size_t k = 1; k <= 10; k += 3) {
11416 SpMMMicrokernelTester()
11417 .mr(8)
11418 .nr(1)
11419 .m(m)
11420 .n(n)
11421 .k(k)
11422 .sparsity(0.0f)
11423 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11424 }
11425 }
11426 }
11427 }
11428
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,output_stride)11429 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, output_stride) {
11430 for (uint32_t n = 1; n < 10; n += 2) {
11431 for (size_t k = 1; k <= 10; k += 3) {
11432 SpMMMicrokernelTester()
11433 .mr(8)
11434 .nr(1)
11435 .m(16)
11436 .n(n)
11437 .k(k)
11438 .output_stride(19)
11439 .sparsity(0.0f)
11440 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11441 }
11442 }
11443 }
11444
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,qmin)11445 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, qmin) {
11446 for (uint32_t n = 1; n < 10; n += 2) {
11447 for (size_t k = 1; k <= 10; k += 3) {
11448 SpMMMicrokernelTester()
11449 .mr(8)
11450 .nr(1)
11451 .m(16)
11452 .n(n)
11453 .k(k)
11454 .sparsity(0.0f)
11455 .qmin(128)
11456 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11457 }
11458 }
11459 }
11460
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,qmax)11461 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, qmax) {
11462 for (uint32_t n = 1; n < 10; n += 2) {
11463 for (size_t k = 1; k <= 10; k += 3) {
11464 SpMMMicrokernelTester()
11465 .mr(8)
11466 .nr(1)
11467 .m(16)
11468 .n(n)
11469 .k(k)
11470 .sparsity(0.0f)
11471 .qmax(128)
11472 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11473 }
11474 }
11475 }
11476
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,half_sparse)11477 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, half_sparse) {
11478 for (uint32_t n = 1; n < 10; n += 2) {
11479 for (size_t k = 1; k <= 10; k += 3) {
11480 SpMMMicrokernelTester()
11481 .mr(8)
11482 .nr(1)
11483 .m(16)
11484 .n(n)
11485 .k(k)
11486 .sparsity(0.5f)
11487 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11488 }
11489 }
11490 }
11491
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2,zero_weights)11492 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, zero_weights) {
11493 for (uint32_t n = 1; n < 10; n += 2) {
11494 for (size_t k = 1; k <= 10; k += 3) {
11495 SpMMMicrokernelTester()
11496 .mr(8)
11497 .nr(1)
11498 .m(16)
11499 .n(n)
11500 .k(k)
11501 .sparsity(1.0f)
11502 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
11503 }
11504 }
11505 }
11506 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11507
11508
11509 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,k_eq_4)11510 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_eq_4) {
11511 SpMMMicrokernelTester()
11512 .mr(8)
11513 .nr(1)
11514 .m(8)
11515 .n(1)
11516 .k(4)
11517 .sparsity(0.0f)
11518 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11519 }
11520
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,k_lt_4)11521 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_lt_4) {
11522 for (size_t k = 1; k < 4; k++) {
11523 SpMMMicrokernelTester()
11524 .mr(8)
11525 .nr(1)
11526 .m(8)
11527 .n(1)
11528 .k(k)
11529 .sparsity(0.0f)
11530 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11531 }
11532 }
11533
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,k_gt_4)11534 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_gt_4) {
11535 for (size_t k = 5; k < 8; k++) {
11536 SpMMMicrokernelTester()
11537 .mr(8)
11538 .nr(1)
11539 .m(8)
11540 .n(1)
11541 .k(k)
11542 .sparsity(0.0f)
11543 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11544 }
11545 }
11546
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,k_div_4)11547 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_div_4) {
11548 for (size_t k = 8; k <= 40; k += 4) {
11549 SpMMMicrokernelTester()
11550 .mr(8)
11551 .nr(1)
11552 .m(8)
11553 .n(1)
11554 .k(k)
11555 .sparsity(0.0f)
11556 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11557 }
11558 }
11559
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,n_gt_1)11560 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, n_gt_1) {
11561 for (uint32_t n = 2; n < 10; n++) {
11562 for (size_t k = 1; k <= 20; k += 5) {
11563 SpMMMicrokernelTester()
11564 .mr(8)
11565 .nr(1)
11566 .m(8)
11567 .n(n)
11568 .k(k)
11569 .sparsity(0.0f)
11570 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11571 }
11572 }
11573 }
11574
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,m_lt_8)11575 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, m_lt_8) {
11576 for (uint32_t m = 1; m < 8; m++) {
11577 for (uint32_t n = 1; n < 10; n += 2) {
11578 for (size_t k = 1; k <= 20; k += 5) {
11579 SpMMMicrokernelTester()
11580 .mr(8)
11581 .nr(1)
11582 .m(m)
11583 .n(n)
11584 .k(k)
11585 .sparsity(0.0f)
11586 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11587 }
11588 }
11589 }
11590 }
11591
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,m_div_8)11592 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, m_div_8) {
11593 for (uint32_t m = 16; m <= 24; m += 8) {
11594 for (uint32_t n = 1; n < 10; n += 2) {
11595 for (size_t k = 1; k <= 20; k += 5) {
11596 SpMMMicrokernelTester()
11597 .mr(8)
11598 .nr(1)
11599 .m(m)
11600 .n(n)
11601 .k(k)
11602 .sparsity(0.0f)
11603 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11604 }
11605 }
11606 }
11607 }
11608
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,m_gt_8)11609 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, m_gt_8) {
11610 for (uint32_t m = 9; m < 16; m++) {
11611 for (uint32_t n = 1; n < 10; n += 2) {
11612 for (size_t k = 1; k <= 20; k += 5) {
11613 SpMMMicrokernelTester()
11614 .mr(8)
11615 .nr(1)
11616 .m(m)
11617 .n(n)
11618 .k(k)
11619 .sparsity(0.0f)
11620 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11621 }
11622 }
11623 }
11624 }
11625
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,output_stride)11626 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, output_stride) {
11627 for (uint32_t n = 1; n < 10; n += 2) {
11628 for (size_t k = 1; k <= 20; k += 5) {
11629 SpMMMicrokernelTester()
11630 .mr(8)
11631 .nr(1)
11632 .m(16)
11633 .n(n)
11634 .k(k)
11635 .output_stride(19)
11636 .sparsity(0.0f)
11637 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11638 }
11639 }
11640 }
11641
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,qmin)11642 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, qmin) {
11643 for (uint32_t n = 1; n < 10; n += 2) {
11644 for (size_t k = 1; k <= 20; k += 5) {
11645 SpMMMicrokernelTester()
11646 .mr(8)
11647 .nr(1)
11648 .m(16)
11649 .n(n)
11650 .k(k)
11651 .sparsity(0.0f)
11652 .qmin(128)
11653 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11654 }
11655 }
11656 }
11657
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,qmax)11658 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, qmax) {
11659 for (uint32_t n = 1; n < 10; n += 2) {
11660 for (size_t k = 1; k <= 20; k += 5) {
11661 SpMMMicrokernelTester()
11662 .mr(8)
11663 .nr(1)
11664 .m(16)
11665 .n(n)
11666 .k(k)
11667 .sparsity(0.0f)
11668 .qmax(128)
11669 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11670 }
11671 }
11672 }
11673
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,half_sparse)11674 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, half_sparse) {
11675 for (uint32_t n = 1; n < 10; n += 2) {
11676 for (size_t k = 1; k <= 20; k += 5) {
11677 SpMMMicrokernelTester()
11678 .mr(8)
11679 .nr(1)
11680 .m(16)
11681 .n(n)
11682 .k(k)
11683 .sparsity(0.5f)
11684 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11685 }
11686 }
11687 }
11688
TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4,zero_weights)11689 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, zero_weights) {
11690 for (uint32_t n = 1; n < 10; n += 2) {
11691 for (size_t k = 1; k <= 20; k += 5) {
11692 SpMMMicrokernelTester()
11693 .mr(8)
11694 .nr(1)
11695 .m(16)
11696 .n(n)
11697 .k(k)
11698 .sparsity(1.0f)
11699 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
11700 }
11701 }
11702 }
11703 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11704
11705
11706 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,k_eq_1)11707 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, k_eq_1) {
11708 SpMMMicrokernelTester()
11709 .mr(16)
11710 .nr(1)
11711 .m(16)
11712 .n(1)
11713 .k(1)
11714 .sparsity(0.0f)
11715 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11716 }
11717
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,k_gt_1)11718 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, k_gt_1) {
11719 for (size_t k = 2; k < 10; k++) {
11720 SpMMMicrokernelTester()
11721 .mr(16)
11722 .nr(1)
11723 .m(16)
11724 .n(1)
11725 .k(k)
11726 .sparsity(0.0f)
11727 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11728 }
11729 }
11730
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,n_gt_1)11731 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, n_gt_1) {
11732 for (uint32_t n = 2; n < 10; n++) {
11733 for (size_t k = 1; k <= 5; k += 2) {
11734 SpMMMicrokernelTester()
11735 .mr(16)
11736 .nr(1)
11737 .m(16)
11738 .n(n)
11739 .k(k)
11740 .sparsity(0.0f)
11741 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11742 }
11743 }
11744 }
11745
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,m_lt_16)11746 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, m_lt_16) {
11747 for (uint32_t m = 1; m < 16; m++) {
11748 for (uint32_t n = 1; n < 10; n += 2) {
11749 for (size_t k = 1; k <= 5; k += 2) {
11750 SpMMMicrokernelTester()
11751 .mr(16)
11752 .nr(1)
11753 .m(m)
11754 .n(n)
11755 .k(k)
11756 .sparsity(0.0f)
11757 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11758 }
11759 }
11760 }
11761 }
11762
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,m_div_16)11763 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, m_div_16) {
11764 for (uint32_t m = 32; m <= 48; m += 16) {
11765 for (uint32_t n = 1; n < 10; n += 2) {
11766 for (size_t k = 1; k <= 5; k += 2) {
11767 SpMMMicrokernelTester()
11768 .mr(16)
11769 .nr(1)
11770 .m(m)
11771 .n(n)
11772 .k(k)
11773 .sparsity(0.0f)
11774 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11775 }
11776 }
11777 }
11778 }
11779
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,m_gt_16)11780 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, m_gt_16) {
11781 for (uint32_t m = 17; m < 32; m++) {
11782 for (uint32_t n = 1; n < 10; n += 2) {
11783 for (size_t k = 1; k <= 5; k += 2) {
11784 SpMMMicrokernelTester()
11785 .mr(16)
11786 .nr(1)
11787 .m(m)
11788 .n(n)
11789 .k(k)
11790 .sparsity(0.0f)
11791 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11792 }
11793 }
11794 }
11795 }
11796
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,output_stride)11797 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, output_stride) {
11798 for (uint32_t n = 1; n < 10; n += 2) {
11799 for (size_t k = 1; k <= 5; k += 2) {
11800 SpMMMicrokernelTester()
11801 .mr(16)
11802 .nr(1)
11803 .m(32)
11804 .n(n)
11805 .k(k)
11806 .output_stride(37)
11807 .sparsity(0.0f)
11808 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11809 }
11810 }
11811 }
11812
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,qmin)11813 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, qmin) {
11814 for (uint32_t n = 1; n < 10; n += 2) {
11815 for (size_t k = 1; k <= 5; k += 2) {
11816 SpMMMicrokernelTester()
11817 .mr(16)
11818 .nr(1)
11819 .m(32)
11820 .n(n)
11821 .k(k)
11822 .sparsity(0.0f)
11823 .qmin(128)
11824 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11825 }
11826 }
11827 }
11828
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,qmax)11829 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, qmax) {
11830 for (uint32_t n = 1; n < 10; n += 2) {
11831 for (size_t k = 1; k <= 5; k += 2) {
11832 SpMMMicrokernelTester()
11833 .mr(16)
11834 .nr(1)
11835 .m(32)
11836 .n(n)
11837 .k(k)
11838 .sparsity(0.0f)
11839 .qmax(128)
11840 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11841 }
11842 }
11843 }
11844
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,half_sparse)11845 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, half_sparse) {
11846 for (uint32_t n = 1; n < 10; n += 2) {
11847 for (size_t k = 1; k <= 5; k += 2) {
11848 SpMMMicrokernelTester()
11849 .mr(16)
11850 .nr(1)
11851 .m(32)
11852 .n(n)
11853 .k(k)
11854 .sparsity(0.5f)
11855 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11856 }
11857 }
11858 }
11859
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM,zero_weights)11860 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, zero_weights) {
11861 for (uint32_t n = 1; n < 10; n += 2) {
11862 for (size_t k = 1; k <= 5; k += 2) {
11863 SpMMMicrokernelTester()
11864 .mr(16)
11865 .nr(1)
11866 .m(32)
11867 .n(n)
11868 .k(k)
11869 .sparsity(1.0f)
11870 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
11871 }
11872 }
11873 }
11874 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11875
11876
11877 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,k_eq_1)11878 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
11879 SpMMMicrokernelTester()
11880 .mr(16)
11881 .nr(1)
11882 .m(16)
11883 .n(1)
11884 .k(1)
11885 .sparsity(0.0f)
11886 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11887 }
11888
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,k_gt_1)11889 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
11890 for (size_t k = 2; k < 10; k++) {
11891 SpMMMicrokernelTester()
11892 .mr(16)
11893 .nr(1)
11894 .m(16)
11895 .n(1)
11896 .k(k)
11897 .sparsity(0.0f)
11898 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11899 }
11900 }
11901
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,n_gt_1)11902 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
11903 for (uint32_t n = 2; n < 10; n++) {
11904 for (size_t k = 1; k <= 5; k += 2) {
11905 SpMMMicrokernelTester()
11906 .mr(16)
11907 .nr(1)
11908 .m(16)
11909 .n(n)
11910 .k(k)
11911 .sparsity(0.0f)
11912 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11913 }
11914 }
11915 }
11916
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,m_lt_16)11917 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, m_lt_16) {
11918 for (uint32_t m = 1; m < 16; m++) {
11919 for (uint32_t n = 1; n < 10; n += 2) {
11920 for (size_t k = 1; k <= 5; k += 2) {
11921 SpMMMicrokernelTester()
11922 .mr(16)
11923 .nr(1)
11924 .m(m)
11925 .n(n)
11926 .k(k)
11927 .sparsity(0.0f)
11928 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11929 }
11930 }
11931 }
11932 }
11933
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,m_div_16)11934 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, m_div_16) {
11935 for (uint32_t m = 32; m <= 48; m += 16) {
11936 for (uint32_t n = 1; n < 10; n += 2) {
11937 for (size_t k = 1; k <= 5; k += 2) {
11938 SpMMMicrokernelTester()
11939 .mr(16)
11940 .nr(1)
11941 .m(m)
11942 .n(n)
11943 .k(k)
11944 .sparsity(0.0f)
11945 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11946 }
11947 }
11948 }
11949 }
11950
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,m_gt_16)11951 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, m_gt_16) {
11952 for (uint32_t m = 17; m < 32; m++) {
11953 for (uint32_t n = 1; n < 10; n += 2) {
11954 for (size_t k = 1; k <= 5; k += 2) {
11955 SpMMMicrokernelTester()
11956 .mr(16)
11957 .nr(1)
11958 .m(m)
11959 .n(n)
11960 .k(k)
11961 .sparsity(0.0f)
11962 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11963 }
11964 }
11965 }
11966 }
11967
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,output_stride)11968 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, output_stride) {
11969 for (uint32_t n = 1; n < 10; n += 2) {
11970 for (size_t k = 1; k <= 5; k += 2) {
11971 SpMMMicrokernelTester()
11972 .mr(16)
11973 .nr(1)
11974 .m(32)
11975 .n(n)
11976 .k(k)
11977 .output_stride(37)
11978 .sparsity(0.0f)
11979 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11980 }
11981 }
11982 }
11983
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,qmin)11984 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, qmin) {
11985 for (uint32_t n = 1; n < 10; n += 2) {
11986 for (size_t k = 1; k <= 5; k += 2) {
11987 SpMMMicrokernelTester()
11988 .mr(16)
11989 .nr(1)
11990 .m(32)
11991 .n(n)
11992 .k(k)
11993 .sparsity(0.0f)
11994 .qmin(128)
11995 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
11996 }
11997 }
11998 }
11999
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,qmax)12000 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, qmax) {
12001 for (uint32_t n = 1; n < 10; n += 2) {
12002 for (size_t k = 1; k <= 5; k += 2) {
12003 SpMMMicrokernelTester()
12004 .mr(16)
12005 .nr(1)
12006 .m(32)
12007 .n(n)
12008 .k(k)
12009 .sparsity(0.0f)
12010 .qmax(128)
12011 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12012 }
12013 }
12014 }
12015
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,half_sparse)12016 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
12017 for (uint32_t n = 1; n < 10; n += 2) {
12018 for (size_t k = 1; k <= 5; k += 2) {
12019 SpMMMicrokernelTester()
12020 .mr(16)
12021 .nr(1)
12022 .m(32)
12023 .n(n)
12024 .k(k)
12025 .sparsity(0.5f)
12026 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12027 }
12028 }
12029 }
12030
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED,zero_weights)12031 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
12032 for (uint32_t n = 1; n < 10; n += 2) {
12033 for (size_t k = 1; k <= 5; k += 2) {
12034 SpMMMicrokernelTester()
12035 .mr(16)
12036 .nr(1)
12037 .m(32)
12038 .n(n)
12039 .k(k)
12040 .sparsity(1.0f)
12041 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12042 }
12043 }
12044 }
12045 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12046
12047
12048 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,k_eq_2)12049 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
12050 SpMMMicrokernelTester()
12051 .mr(16)
12052 .nr(1)
12053 .m(16)
12054 .n(1)
12055 .k(2)
12056 .sparsity(0.0f)
12057 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12058 }
12059
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,k_lt_2)12060 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
12061 for (size_t k = 1; k < 2; k++) {
12062 SpMMMicrokernelTester()
12063 .mr(16)
12064 .nr(1)
12065 .m(16)
12066 .n(1)
12067 .k(k)
12068 .sparsity(0.0f)
12069 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12070 }
12071 }
12072
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,k_gt_2)12073 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
12074 for (size_t k = 3; k < 4; k++) {
12075 SpMMMicrokernelTester()
12076 .mr(16)
12077 .nr(1)
12078 .m(16)
12079 .n(1)
12080 .k(k)
12081 .sparsity(0.0f)
12082 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12083 }
12084 }
12085
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,k_div_2)12086 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
12087 for (size_t k = 4; k <= 20; k += 2) {
12088 SpMMMicrokernelTester()
12089 .mr(16)
12090 .nr(1)
12091 .m(16)
12092 .n(1)
12093 .k(k)
12094 .sparsity(0.0f)
12095 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12096 }
12097 }
12098
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,n_gt_1)12099 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
12100 for (uint32_t n = 2; n < 10; n++) {
12101 for (size_t k = 1; k <= 10; k += 3) {
12102 SpMMMicrokernelTester()
12103 .mr(16)
12104 .nr(1)
12105 .m(16)
12106 .n(n)
12107 .k(k)
12108 .sparsity(0.0f)
12109 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12110 }
12111 }
12112 }
12113
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,m_lt_16)12114 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_16) {
12115 for (uint32_t m = 1; m < 16; m++) {
12116 for (uint32_t n = 1; n < 10; n += 2) {
12117 for (size_t k = 1; k <= 10; k += 3) {
12118 SpMMMicrokernelTester()
12119 .mr(16)
12120 .nr(1)
12121 .m(m)
12122 .n(n)
12123 .k(k)
12124 .sparsity(0.0f)
12125 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12126 }
12127 }
12128 }
12129 }
12130
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,m_div_16)12131 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, m_div_16) {
12132 for (uint32_t m = 32; m <= 48; m += 16) {
12133 for (uint32_t n = 1; n < 10; n += 2) {
12134 for (size_t k = 1; k <= 10; k += 3) {
12135 SpMMMicrokernelTester()
12136 .mr(16)
12137 .nr(1)
12138 .m(m)
12139 .n(n)
12140 .k(k)
12141 .sparsity(0.0f)
12142 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12143 }
12144 }
12145 }
12146 }
12147
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,m_gt_16)12148 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_16) {
12149 for (uint32_t m = 17; m < 32; m++) {
12150 for (uint32_t n = 1; n < 10; n += 2) {
12151 for (size_t k = 1; k <= 10; k += 3) {
12152 SpMMMicrokernelTester()
12153 .mr(16)
12154 .nr(1)
12155 .m(m)
12156 .n(n)
12157 .k(k)
12158 .sparsity(0.0f)
12159 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12160 }
12161 }
12162 }
12163 }
12164
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,output_stride)12165 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
12166 for (uint32_t n = 1; n < 10; n += 2) {
12167 for (size_t k = 1; k <= 10; k += 3) {
12168 SpMMMicrokernelTester()
12169 .mr(16)
12170 .nr(1)
12171 .m(32)
12172 .n(n)
12173 .k(k)
12174 .output_stride(37)
12175 .sparsity(0.0f)
12176 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12177 }
12178 }
12179 }
12180
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,qmin)12181 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
12182 for (uint32_t n = 1; n < 10; n += 2) {
12183 for (size_t k = 1; k <= 10; k += 3) {
12184 SpMMMicrokernelTester()
12185 .mr(16)
12186 .nr(1)
12187 .m(32)
12188 .n(n)
12189 .k(k)
12190 .sparsity(0.0f)
12191 .qmin(128)
12192 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12193 }
12194 }
12195 }
12196
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,qmax)12197 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
12198 for (uint32_t n = 1; n < 10; n += 2) {
12199 for (size_t k = 1; k <= 10; k += 3) {
12200 SpMMMicrokernelTester()
12201 .mr(16)
12202 .nr(1)
12203 .m(32)
12204 .n(n)
12205 .k(k)
12206 .sparsity(0.0f)
12207 .qmax(128)
12208 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12209 }
12210 }
12211 }
12212
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,half_sparse)12213 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
12214 for (uint32_t n = 1; n < 10; n += 2) {
12215 for (size_t k = 1; k <= 10; k += 3) {
12216 SpMMMicrokernelTester()
12217 .mr(16)
12218 .nr(1)
12219 .m(32)
12220 .n(n)
12221 .k(k)
12222 .sparsity(0.5f)
12223 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12224 }
12225 }
12226 }
12227
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2,zero_weights)12228 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
12229 for (uint32_t n = 1; n < 10; n += 2) {
12230 for (size_t k = 1; k <= 10; k += 3) {
12231 SpMMMicrokernelTester()
12232 .mr(16)
12233 .nr(1)
12234 .m(32)
12235 .n(n)
12236 .k(k)
12237 .sparsity(1.0f)
12238 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12239 }
12240 }
12241 }
12242 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12243
12244
12245 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,k_eq_2)12246 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_eq_2) {
12247 SpMMMicrokernelTester()
12248 .mr(16)
12249 .nr(1)
12250 .m(16)
12251 .n(1)
12252 .k(2)
12253 .sparsity(0.0f)
12254 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12255 }
12256
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,k_lt_2)12257 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_lt_2) {
12258 for (size_t k = 1; k < 2; k++) {
12259 SpMMMicrokernelTester()
12260 .mr(16)
12261 .nr(1)
12262 .m(16)
12263 .n(1)
12264 .k(k)
12265 .sparsity(0.0f)
12266 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12267 }
12268 }
12269
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,k_gt_2)12270 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_gt_2) {
12271 for (size_t k = 3; k < 4; k++) {
12272 SpMMMicrokernelTester()
12273 .mr(16)
12274 .nr(1)
12275 .m(16)
12276 .n(1)
12277 .k(k)
12278 .sparsity(0.0f)
12279 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12280 }
12281 }
12282
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,k_div_2)12283 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_div_2) {
12284 for (size_t k = 4; k <= 20; k += 2) {
12285 SpMMMicrokernelTester()
12286 .mr(16)
12287 .nr(1)
12288 .m(16)
12289 .n(1)
12290 .k(k)
12291 .sparsity(0.0f)
12292 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12293 }
12294 }
12295
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,n_gt_1)12296 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, n_gt_1) {
12297 for (uint32_t n = 2; n < 10; n++) {
12298 for (size_t k = 1; k <= 10; k += 3) {
12299 SpMMMicrokernelTester()
12300 .mr(16)
12301 .nr(1)
12302 .m(16)
12303 .n(n)
12304 .k(k)
12305 .sparsity(0.0f)
12306 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12307 }
12308 }
12309 }
12310
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,m_lt_16)12311 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, m_lt_16) {
12312 for (uint32_t m = 1; m < 16; m++) {
12313 for (uint32_t n = 1; n < 10; n += 2) {
12314 for (size_t k = 1; k <= 10; k += 3) {
12315 SpMMMicrokernelTester()
12316 .mr(16)
12317 .nr(1)
12318 .m(m)
12319 .n(n)
12320 .k(k)
12321 .sparsity(0.0f)
12322 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12323 }
12324 }
12325 }
12326 }
12327
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,m_div_16)12328 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, m_div_16) {
12329 for (uint32_t m = 32; m <= 48; m += 16) {
12330 for (uint32_t n = 1; n < 10; n += 2) {
12331 for (size_t k = 1; k <= 10; k += 3) {
12332 SpMMMicrokernelTester()
12333 .mr(16)
12334 .nr(1)
12335 .m(m)
12336 .n(n)
12337 .k(k)
12338 .sparsity(0.0f)
12339 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12340 }
12341 }
12342 }
12343 }
12344
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,m_gt_16)12345 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, m_gt_16) {
12346 for (uint32_t m = 17; m < 32; m++) {
12347 for (uint32_t n = 1; n < 10; n += 2) {
12348 for (size_t k = 1; k <= 10; k += 3) {
12349 SpMMMicrokernelTester()
12350 .mr(16)
12351 .nr(1)
12352 .m(m)
12353 .n(n)
12354 .k(k)
12355 .sparsity(0.0f)
12356 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12357 }
12358 }
12359 }
12360 }
12361
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,output_stride)12362 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, output_stride) {
12363 for (uint32_t n = 1; n < 10; n += 2) {
12364 for (size_t k = 1; k <= 10; k += 3) {
12365 SpMMMicrokernelTester()
12366 .mr(16)
12367 .nr(1)
12368 .m(32)
12369 .n(n)
12370 .k(k)
12371 .output_stride(37)
12372 .sparsity(0.0f)
12373 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12374 }
12375 }
12376 }
12377
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,qmin)12378 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, qmin) {
12379 for (uint32_t n = 1; n < 10; n += 2) {
12380 for (size_t k = 1; k <= 10; k += 3) {
12381 SpMMMicrokernelTester()
12382 .mr(16)
12383 .nr(1)
12384 .m(32)
12385 .n(n)
12386 .k(k)
12387 .sparsity(0.0f)
12388 .qmin(128)
12389 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12390 }
12391 }
12392 }
12393
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,qmax)12394 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, qmax) {
12395 for (uint32_t n = 1; n < 10; n += 2) {
12396 for (size_t k = 1; k <= 10; k += 3) {
12397 SpMMMicrokernelTester()
12398 .mr(16)
12399 .nr(1)
12400 .m(32)
12401 .n(n)
12402 .k(k)
12403 .sparsity(0.0f)
12404 .qmax(128)
12405 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12406 }
12407 }
12408 }
12409
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,half_sparse)12410 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, half_sparse) {
12411 for (uint32_t n = 1; n < 10; n += 2) {
12412 for (size_t k = 1; k <= 10; k += 3) {
12413 SpMMMicrokernelTester()
12414 .mr(16)
12415 .nr(1)
12416 .m(32)
12417 .n(n)
12418 .k(k)
12419 .sparsity(0.5f)
12420 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12421 }
12422 }
12423 }
12424
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2,zero_weights)12425 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, zero_weights) {
12426 for (uint32_t n = 1; n < 10; n += 2) {
12427 for (size_t k = 1; k <= 10; k += 3) {
12428 SpMMMicrokernelTester()
12429 .mr(16)
12430 .nr(1)
12431 .m(32)
12432 .n(n)
12433 .k(k)
12434 .sparsity(1.0f)
12435 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
12436 }
12437 }
12438 }
12439 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12440
12441
12442 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,k_eq_4)12443 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_eq_4) {
12444 SpMMMicrokernelTester()
12445 .mr(16)
12446 .nr(1)
12447 .m(16)
12448 .n(1)
12449 .k(4)
12450 .sparsity(0.0f)
12451 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12452 }
12453
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,k_lt_4)12454 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_lt_4) {
12455 for (size_t k = 1; k < 4; k++) {
12456 SpMMMicrokernelTester()
12457 .mr(16)
12458 .nr(1)
12459 .m(16)
12460 .n(1)
12461 .k(k)
12462 .sparsity(0.0f)
12463 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12464 }
12465 }
12466
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,k_gt_4)12467 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_gt_4) {
12468 for (size_t k = 5; k < 8; k++) {
12469 SpMMMicrokernelTester()
12470 .mr(16)
12471 .nr(1)
12472 .m(16)
12473 .n(1)
12474 .k(k)
12475 .sparsity(0.0f)
12476 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12477 }
12478 }
12479
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,k_div_4)12480 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_div_4) {
12481 for (size_t k = 8; k <= 40; k += 4) {
12482 SpMMMicrokernelTester()
12483 .mr(16)
12484 .nr(1)
12485 .m(16)
12486 .n(1)
12487 .k(k)
12488 .sparsity(0.0f)
12489 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12490 }
12491 }
12492
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,n_gt_1)12493 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, n_gt_1) {
12494 for (uint32_t n = 2; n < 10; n++) {
12495 for (size_t k = 1; k <= 20; k += 5) {
12496 SpMMMicrokernelTester()
12497 .mr(16)
12498 .nr(1)
12499 .m(16)
12500 .n(n)
12501 .k(k)
12502 .sparsity(0.0f)
12503 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12504 }
12505 }
12506 }
12507
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,m_lt_16)12508 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, m_lt_16) {
12509 for (uint32_t m = 1; m < 16; m++) {
12510 for (uint32_t n = 1; n < 10; n += 2) {
12511 for (size_t k = 1; k <= 20; k += 5) {
12512 SpMMMicrokernelTester()
12513 .mr(16)
12514 .nr(1)
12515 .m(m)
12516 .n(n)
12517 .k(k)
12518 .sparsity(0.0f)
12519 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12520 }
12521 }
12522 }
12523 }
12524
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,m_div_16)12525 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, m_div_16) {
12526 for (uint32_t m = 32; m <= 48; m += 16) {
12527 for (uint32_t n = 1; n < 10; n += 2) {
12528 for (size_t k = 1; k <= 20; k += 5) {
12529 SpMMMicrokernelTester()
12530 .mr(16)
12531 .nr(1)
12532 .m(m)
12533 .n(n)
12534 .k(k)
12535 .sparsity(0.0f)
12536 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12537 }
12538 }
12539 }
12540 }
12541
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,m_gt_16)12542 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, m_gt_16) {
12543 for (uint32_t m = 17; m < 32; m++) {
12544 for (uint32_t n = 1; n < 10; n += 2) {
12545 for (size_t k = 1; k <= 20; k += 5) {
12546 SpMMMicrokernelTester()
12547 .mr(16)
12548 .nr(1)
12549 .m(m)
12550 .n(n)
12551 .k(k)
12552 .sparsity(0.0f)
12553 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12554 }
12555 }
12556 }
12557 }
12558
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,output_stride)12559 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, output_stride) {
12560 for (uint32_t n = 1; n < 10; n += 2) {
12561 for (size_t k = 1; k <= 20; k += 5) {
12562 SpMMMicrokernelTester()
12563 .mr(16)
12564 .nr(1)
12565 .m(32)
12566 .n(n)
12567 .k(k)
12568 .output_stride(37)
12569 .sparsity(0.0f)
12570 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12571 }
12572 }
12573 }
12574
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,qmin)12575 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, qmin) {
12576 for (uint32_t n = 1; n < 10; n += 2) {
12577 for (size_t k = 1; k <= 20; k += 5) {
12578 SpMMMicrokernelTester()
12579 .mr(16)
12580 .nr(1)
12581 .m(32)
12582 .n(n)
12583 .k(k)
12584 .sparsity(0.0f)
12585 .qmin(128)
12586 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12587 }
12588 }
12589 }
12590
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,qmax)12591 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, qmax) {
12592 for (uint32_t n = 1; n < 10; n += 2) {
12593 for (size_t k = 1; k <= 20; k += 5) {
12594 SpMMMicrokernelTester()
12595 .mr(16)
12596 .nr(1)
12597 .m(32)
12598 .n(n)
12599 .k(k)
12600 .sparsity(0.0f)
12601 .qmax(128)
12602 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12603 }
12604 }
12605 }
12606
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,half_sparse)12607 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, half_sparse) {
12608 for (uint32_t n = 1; n < 10; n += 2) {
12609 for (size_t k = 1; k <= 20; k += 5) {
12610 SpMMMicrokernelTester()
12611 .mr(16)
12612 .nr(1)
12613 .m(32)
12614 .n(n)
12615 .k(k)
12616 .sparsity(0.5f)
12617 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12618 }
12619 }
12620 }
12621
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4,zero_weights)12622 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, zero_weights) {
12623 for (uint32_t n = 1; n < 10; n += 2) {
12624 for (size_t k = 1; k <= 20; k += 5) {
12625 SpMMMicrokernelTester()
12626 .mr(16)
12627 .nr(1)
12628 .m(32)
12629 .n(n)
12630 .k(k)
12631 .sparsity(1.0f)
12632 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
12633 }
12634 }
12635 }
12636 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12637
12638
12639 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,k_eq_1)12640 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, k_eq_1) {
12641 SpMMMicrokernelTester()
12642 .mr(16)
12643 .nr(1)
12644 .m(16)
12645 .n(1)
12646 .k(1)
12647 .sparsity(0.0f)
12648 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12649 }
12650
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,k_gt_1)12651 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, k_gt_1) {
12652 for (size_t k = 2; k < 10; k++) {
12653 SpMMMicrokernelTester()
12654 .mr(16)
12655 .nr(1)
12656 .m(16)
12657 .n(1)
12658 .k(k)
12659 .sparsity(0.0f)
12660 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12661 }
12662 }
12663
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,n_gt_1)12664 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, n_gt_1) {
12665 for (uint32_t n = 2; n < 10; n++) {
12666 for (size_t k = 1; k <= 5; k += 2) {
12667 SpMMMicrokernelTester()
12668 .mr(16)
12669 .nr(1)
12670 .m(16)
12671 .n(n)
12672 .k(k)
12673 .sparsity(0.0f)
12674 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12675 }
12676 }
12677 }
12678
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,m_lt_16)12679 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, m_lt_16) {
12680 for (uint32_t m = 1; m < 16; m++) {
12681 for (uint32_t n = 1; n < 10; n += 2) {
12682 for (size_t k = 1; k <= 5; k += 2) {
12683 SpMMMicrokernelTester()
12684 .mr(16)
12685 .nr(1)
12686 .m(m)
12687 .n(n)
12688 .k(k)
12689 .sparsity(0.0f)
12690 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12691 }
12692 }
12693 }
12694 }
12695
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,m_div_16)12696 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, m_div_16) {
12697 for (uint32_t m = 32; m <= 48; m += 16) {
12698 for (uint32_t n = 1; n < 10; n += 2) {
12699 for (size_t k = 1; k <= 5; k += 2) {
12700 SpMMMicrokernelTester()
12701 .mr(16)
12702 .nr(1)
12703 .m(m)
12704 .n(n)
12705 .k(k)
12706 .sparsity(0.0f)
12707 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12708 }
12709 }
12710 }
12711 }
12712
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,m_gt_16)12713 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, m_gt_16) {
12714 for (uint32_t m = 17; m < 32; m++) {
12715 for (uint32_t n = 1; n < 10; n += 2) {
12716 for (size_t k = 1; k <= 5; k += 2) {
12717 SpMMMicrokernelTester()
12718 .mr(16)
12719 .nr(1)
12720 .m(m)
12721 .n(n)
12722 .k(k)
12723 .sparsity(0.0f)
12724 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12725 }
12726 }
12727 }
12728 }
12729
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,output_stride)12730 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, output_stride) {
12731 for (uint32_t n = 1; n < 10; n += 2) {
12732 for (size_t k = 1; k <= 5; k += 2) {
12733 SpMMMicrokernelTester()
12734 .mr(16)
12735 .nr(1)
12736 .m(32)
12737 .n(n)
12738 .k(k)
12739 .output_stride(37)
12740 .sparsity(0.0f)
12741 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12742 }
12743 }
12744 }
12745
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,qmin)12746 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, qmin) {
12747 for (uint32_t n = 1; n < 10; n += 2) {
12748 for (size_t k = 1; k <= 5; k += 2) {
12749 SpMMMicrokernelTester()
12750 .mr(16)
12751 .nr(1)
12752 .m(32)
12753 .n(n)
12754 .k(k)
12755 .sparsity(0.0f)
12756 .qmin(128)
12757 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12758 }
12759 }
12760 }
12761
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,qmax)12762 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, qmax) {
12763 for (uint32_t n = 1; n < 10; n += 2) {
12764 for (size_t k = 1; k <= 5; k += 2) {
12765 SpMMMicrokernelTester()
12766 .mr(16)
12767 .nr(1)
12768 .m(32)
12769 .n(n)
12770 .k(k)
12771 .sparsity(0.0f)
12772 .qmax(128)
12773 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12774 }
12775 }
12776 }
12777
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,half_sparse)12778 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, half_sparse) {
12779 for (uint32_t n = 1; n < 10; n += 2) {
12780 for (size_t k = 1; k <= 5; k += 2) {
12781 SpMMMicrokernelTester()
12782 .mr(16)
12783 .nr(1)
12784 .m(32)
12785 .n(n)
12786 .k(k)
12787 .sparsity(0.5f)
12788 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12789 }
12790 }
12791 }
12792
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86,zero_weights)12793 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, zero_weights) {
12794 for (uint32_t n = 1; n < 10; n += 2) {
12795 for (size_t k = 1; k <= 5; k += 2) {
12796 SpMMMicrokernelTester()
12797 .mr(16)
12798 .nr(1)
12799 .m(32)
12800 .n(n)
12801 .k(k)
12802 .sparsity(1.0f)
12803 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
12804 }
12805 }
12806 }
12807 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12808
12809
12810 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,k_eq_1)12811 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
12812 SpMMMicrokernelTester()
12813 .mr(16)
12814 .nr(1)
12815 .m(16)
12816 .n(1)
12817 .k(1)
12818 .sparsity(0.0f)
12819 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12820 }
12821
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,k_gt_1)12822 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
12823 for (size_t k = 2; k < 10; k++) {
12824 SpMMMicrokernelTester()
12825 .mr(16)
12826 .nr(1)
12827 .m(16)
12828 .n(1)
12829 .k(k)
12830 .sparsity(0.0f)
12831 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12832 }
12833 }
12834
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,n_gt_1)12835 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
12836 for (uint32_t n = 2; n < 10; n++) {
12837 for (size_t k = 1; k <= 5; k += 2) {
12838 SpMMMicrokernelTester()
12839 .mr(16)
12840 .nr(1)
12841 .m(16)
12842 .n(n)
12843 .k(k)
12844 .sparsity(0.0f)
12845 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12846 }
12847 }
12848 }
12849
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,m_lt_16)12850 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, m_lt_16) {
12851 for (uint32_t m = 1; m < 16; m++) {
12852 for (uint32_t n = 1; n < 10; n += 2) {
12853 for (size_t k = 1; k <= 5; k += 2) {
12854 SpMMMicrokernelTester()
12855 .mr(16)
12856 .nr(1)
12857 .m(m)
12858 .n(n)
12859 .k(k)
12860 .sparsity(0.0f)
12861 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12862 }
12863 }
12864 }
12865 }
12866
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,m_div_16)12867 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, m_div_16) {
12868 for (uint32_t m = 32; m <= 48; m += 16) {
12869 for (uint32_t n = 1; n < 10; n += 2) {
12870 for (size_t k = 1; k <= 5; k += 2) {
12871 SpMMMicrokernelTester()
12872 .mr(16)
12873 .nr(1)
12874 .m(m)
12875 .n(n)
12876 .k(k)
12877 .sparsity(0.0f)
12878 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12879 }
12880 }
12881 }
12882 }
12883
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,m_gt_16)12884 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, m_gt_16) {
12885 for (uint32_t m = 17; m < 32; m++) {
12886 for (uint32_t n = 1; n < 10; n += 2) {
12887 for (size_t k = 1; k <= 5; k += 2) {
12888 SpMMMicrokernelTester()
12889 .mr(16)
12890 .nr(1)
12891 .m(m)
12892 .n(n)
12893 .k(k)
12894 .sparsity(0.0f)
12895 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12896 }
12897 }
12898 }
12899 }
12900
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,output_stride)12901 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, output_stride) {
12902 for (uint32_t n = 1; n < 10; n += 2) {
12903 for (size_t k = 1; k <= 5; k += 2) {
12904 SpMMMicrokernelTester()
12905 .mr(16)
12906 .nr(1)
12907 .m(32)
12908 .n(n)
12909 .k(k)
12910 .output_stride(37)
12911 .sparsity(0.0f)
12912 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12913 }
12914 }
12915 }
12916
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,qmin)12917 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, qmin) {
12918 for (uint32_t n = 1; n < 10; n += 2) {
12919 for (size_t k = 1; k <= 5; k += 2) {
12920 SpMMMicrokernelTester()
12921 .mr(16)
12922 .nr(1)
12923 .m(32)
12924 .n(n)
12925 .k(k)
12926 .sparsity(0.0f)
12927 .qmin(128)
12928 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12929 }
12930 }
12931 }
12932
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,qmax)12933 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, qmax) {
12934 for (uint32_t n = 1; n < 10; n += 2) {
12935 for (size_t k = 1; k <= 5; k += 2) {
12936 SpMMMicrokernelTester()
12937 .mr(16)
12938 .nr(1)
12939 .m(32)
12940 .n(n)
12941 .k(k)
12942 .sparsity(0.0f)
12943 .qmax(128)
12944 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12945 }
12946 }
12947 }
12948
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,half_sparse)12949 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, half_sparse) {
12950 for (uint32_t n = 1; n < 10; n += 2) {
12951 for (size_t k = 1; k <= 5; k += 2) {
12952 SpMMMicrokernelTester()
12953 .mr(16)
12954 .nr(1)
12955 .m(32)
12956 .n(n)
12957 .k(k)
12958 .sparsity(0.5f)
12959 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12960 }
12961 }
12962 }
12963
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED,zero_weights)12964 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, zero_weights) {
12965 for (uint32_t n = 1; n < 10; n += 2) {
12966 for (size_t k = 1; k <= 5; k += 2) {
12967 SpMMMicrokernelTester()
12968 .mr(16)
12969 .nr(1)
12970 .m(32)
12971 .n(n)
12972 .k(k)
12973 .sparsity(1.0f)
12974 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
12975 }
12976 }
12977 }
12978 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12979
12980
12981 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,k_eq_2)12982 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
12983 SpMMMicrokernelTester()
12984 .mr(16)
12985 .nr(1)
12986 .m(16)
12987 .n(1)
12988 .k(2)
12989 .sparsity(0.0f)
12990 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
12991 }
12992
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,k_lt_2)12993 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
12994 for (size_t k = 1; k < 2; k++) {
12995 SpMMMicrokernelTester()
12996 .mr(16)
12997 .nr(1)
12998 .m(16)
12999 .n(1)
13000 .k(k)
13001 .sparsity(0.0f)
13002 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13003 }
13004 }
13005
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,k_gt_2)13006 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
13007 for (size_t k = 3; k < 4; k++) {
13008 SpMMMicrokernelTester()
13009 .mr(16)
13010 .nr(1)
13011 .m(16)
13012 .n(1)
13013 .k(k)
13014 .sparsity(0.0f)
13015 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13016 }
13017 }
13018
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,k_div_2)13019 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
13020 for (size_t k = 4; k <= 20; k += 2) {
13021 SpMMMicrokernelTester()
13022 .mr(16)
13023 .nr(1)
13024 .m(16)
13025 .n(1)
13026 .k(k)
13027 .sparsity(0.0f)
13028 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13029 }
13030 }
13031
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,n_gt_1)13032 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
13033 for (uint32_t n = 2; n < 10; n++) {
13034 for (size_t k = 1; k <= 10; k += 3) {
13035 SpMMMicrokernelTester()
13036 .mr(16)
13037 .nr(1)
13038 .m(16)
13039 .n(n)
13040 .k(k)
13041 .sparsity(0.0f)
13042 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13043 }
13044 }
13045 }
13046
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,m_lt_16)13047 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, m_lt_16) {
13048 for (uint32_t m = 1; m < 16; m++) {
13049 for (uint32_t n = 1; n < 10; n += 2) {
13050 for (size_t k = 1; k <= 10; k += 3) {
13051 SpMMMicrokernelTester()
13052 .mr(16)
13053 .nr(1)
13054 .m(m)
13055 .n(n)
13056 .k(k)
13057 .sparsity(0.0f)
13058 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13059 }
13060 }
13061 }
13062 }
13063
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,m_div_16)13064 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, m_div_16) {
13065 for (uint32_t m = 32; m <= 48; m += 16) {
13066 for (uint32_t n = 1; n < 10; n += 2) {
13067 for (size_t k = 1; k <= 10; k += 3) {
13068 SpMMMicrokernelTester()
13069 .mr(16)
13070 .nr(1)
13071 .m(m)
13072 .n(n)
13073 .k(k)
13074 .sparsity(0.0f)
13075 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13076 }
13077 }
13078 }
13079 }
13080
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,m_gt_16)13081 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, m_gt_16) {
13082 for (uint32_t m = 17; m < 32; m++) {
13083 for (uint32_t n = 1; n < 10; n += 2) {
13084 for (size_t k = 1; k <= 10; k += 3) {
13085 SpMMMicrokernelTester()
13086 .mr(16)
13087 .nr(1)
13088 .m(m)
13089 .n(n)
13090 .k(k)
13091 .sparsity(0.0f)
13092 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13093 }
13094 }
13095 }
13096 }
13097
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,output_stride)13098 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
13099 for (uint32_t n = 1; n < 10; n += 2) {
13100 for (size_t k = 1; k <= 10; k += 3) {
13101 SpMMMicrokernelTester()
13102 .mr(16)
13103 .nr(1)
13104 .m(32)
13105 .n(n)
13106 .k(k)
13107 .output_stride(37)
13108 .sparsity(0.0f)
13109 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13110 }
13111 }
13112 }
13113
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,qmin)13114 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
13115 for (uint32_t n = 1; n < 10; n += 2) {
13116 for (size_t k = 1; k <= 10; k += 3) {
13117 SpMMMicrokernelTester()
13118 .mr(16)
13119 .nr(1)
13120 .m(32)
13121 .n(n)
13122 .k(k)
13123 .sparsity(0.0f)
13124 .qmin(128)
13125 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13126 }
13127 }
13128 }
13129
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,qmax)13130 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
13131 for (uint32_t n = 1; n < 10; n += 2) {
13132 for (size_t k = 1; k <= 10; k += 3) {
13133 SpMMMicrokernelTester()
13134 .mr(16)
13135 .nr(1)
13136 .m(32)
13137 .n(n)
13138 .k(k)
13139 .sparsity(0.0f)
13140 .qmax(128)
13141 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13142 }
13143 }
13144 }
13145
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,half_sparse)13146 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
13147 for (uint32_t n = 1; n < 10; n += 2) {
13148 for (size_t k = 1; k <= 10; k += 3) {
13149 SpMMMicrokernelTester()
13150 .mr(16)
13151 .nr(1)
13152 .m(32)
13153 .n(n)
13154 .k(k)
13155 .sparsity(0.5f)
13156 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13157 }
13158 }
13159 }
13160
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2,zero_weights)13161 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
13162 for (uint32_t n = 1; n < 10; n += 2) {
13163 for (size_t k = 1; k <= 10; k += 3) {
13164 SpMMMicrokernelTester()
13165 .mr(16)
13166 .nr(1)
13167 .m(32)
13168 .n(n)
13169 .k(k)
13170 .sparsity(1.0f)
13171 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13172 }
13173 }
13174 }
13175 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
13176
13177
13178 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,k_eq_2)13179 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_eq_2) {
13180 SpMMMicrokernelTester()
13181 .mr(16)
13182 .nr(1)
13183 .m(16)
13184 .n(1)
13185 .k(2)
13186 .sparsity(0.0f)
13187 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13188 }
13189
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,k_lt_2)13190 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_lt_2) {
13191 for (size_t k = 1; k < 2; k++) {
13192 SpMMMicrokernelTester()
13193 .mr(16)
13194 .nr(1)
13195 .m(16)
13196 .n(1)
13197 .k(k)
13198 .sparsity(0.0f)
13199 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13200 }
13201 }
13202
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,k_gt_2)13203 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_gt_2) {
13204 for (size_t k = 3; k < 4; k++) {
13205 SpMMMicrokernelTester()
13206 .mr(16)
13207 .nr(1)
13208 .m(16)
13209 .n(1)
13210 .k(k)
13211 .sparsity(0.0f)
13212 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13213 }
13214 }
13215
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,k_div_2)13216 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_div_2) {
13217 for (size_t k = 4; k <= 20; k += 2) {
13218 SpMMMicrokernelTester()
13219 .mr(16)
13220 .nr(1)
13221 .m(16)
13222 .n(1)
13223 .k(k)
13224 .sparsity(0.0f)
13225 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13226 }
13227 }
13228
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,n_gt_1)13229 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, n_gt_1) {
13230 for (uint32_t n = 2; n < 10; n++) {
13231 for (size_t k = 1; k <= 10; k += 3) {
13232 SpMMMicrokernelTester()
13233 .mr(16)
13234 .nr(1)
13235 .m(16)
13236 .n(n)
13237 .k(k)
13238 .sparsity(0.0f)
13239 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13240 }
13241 }
13242 }
13243
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,m_lt_16)13244 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, m_lt_16) {
13245 for (uint32_t m = 1; m < 16; m++) {
13246 for (uint32_t n = 1; n < 10; n += 2) {
13247 for (size_t k = 1; k <= 10; k += 3) {
13248 SpMMMicrokernelTester()
13249 .mr(16)
13250 .nr(1)
13251 .m(m)
13252 .n(n)
13253 .k(k)
13254 .sparsity(0.0f)
13255 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13256 }
13257 }
13258 }
13259 }
13260
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,m_div_16)13261 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, m_div_16) {
13262 for (uint32_t m = 32; m <= 48; m += 16) {
13263 for (uint32_t n = 1; n < 10; n += 2) {
13264 for (size_t k = 1; k <= 10; k += 3) {
13265 SpMMMicrokernelTester()
13266 .mr(16)
13267 .nr(1)
13268 .m(m)
13269 .n(n)
13270 .k(k)
13271 .sparsity(0.0f)
13272 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13273 }
13274 }
13275 }
13276 }
13277
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,m_gt_16)13278 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, m_gt_16) {
13279 for (uint32_t m = 17; m < 32; m++) {
13280 for (uint32_t n = 1; n < 10; n += 2) {
13281 for (size_t k = 1; k <= 10; k += 3) {
13282 SpMMMicrokernelTester()
13283 .mr(16)
13284 .nr(1)
13285 .m(m)
13286 .n(n)
13287 .k(k)
13288 .sparsity(0.0f)
13289 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13290 }
13291 }
13292 }
13293 }
13294
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,output_stride)13295 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, output_stride) {
13296 for (uint32_t n = 1; n < 10; n += 2) {
13297 for (size_t k = 1; k <= 10; k += 3) {
13298 SpMMMicrokernelTester()
13299 .mr(16)
13300 .nr(1)
13301 .m(32)
13302 .n(n)
13303 .k(k)
13304 .output_stride(37)
13305 .sparsity(0.0f)
13306 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13307 }
13308 }
13309 }
13310
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,qmin)13311 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, qmin) {
13312 for (uint32_t n = 1; n < 10; n += 2) {
13313 for (size_t k = 1; k <= 10; k += 3) {
13314 SpMMMicrokernelTester()
13315 .mr(16)
13316 .nr(1)
13317 .m(32)
13318 .n(n)
13319 .k(k)
13320 .sparsity(0.0f)
13321 .qmin(128)
13322 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13323 }
13324 }
13325 }
13326
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,qmax)13327 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, qmax) {
13328 for (uint32_t n = 1; n < 10; n += 2) {
13329 for (size_t k = 1; k <= 10; k += 3) {
13330 SpMMMicrokernelTester()
13331 .mr(16)
13332 .nr(1)
13333 .m(32)
13334 .n(n)
13335 .k(k)
13336 .sparsity(0.0f)
13337 .qmax(128)
13338 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13339 }
13340 }
13341 }
13342
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,half_sparse)13343 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, half_sparse) {
13344 for (uint32_t n = 1; n < 10; n += 2) {
13345 for (size_t k = 1; k <= 10; k += 3) {
13346 SpMMMicrokernelTester()
13347 .mr(16)
13348 .nr(1)
13349 .m(32)
13350 .n(n)
13351 .k(k)
13352 .sparsity(0.5f)
13353 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13354 }
13355 }
13356 }
13357
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2,zero_weights)13358 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, zero_weights) {
13359 for (uint32_t n = 1; n < 10; n += 2) {
13360 for (size_t k = 1; k <= 10; k += 3) {
13361 SpMMMicrokernelTester()
13362 .mr(16)
13363 .nr(1)
13364 .m(32)
13365 .n(n)
13366 .k(k)
13367 .sparsity(1.0f)
13368 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
13369 }
13370 }
13371 }
13372 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
13373
13374
13375 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,k_eq_4)13376 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_eq_4) {
13377 SpMMMicrokernelTester()
13378 .mr(16)
13379 .nr(1)
13380 .m(16)
13381 .n(1)
13382 .k(4)
13383 .sparsity(0.0f)
13384 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13385 }
13386
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,k_lt_4)13387 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_lt_4) {
13388 for (size_t k = 1; k < 4; k++) {
13389 SpMMMicrokernelTester()
13390 .mr(16)
13391 .nr(1)
13392 .m(16)
13393 .n(1)
13394 .k(k)
13395 .sparsity(0.0f)
13396 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13397 }
13398 }
13399
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,k_gt_4)13400 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_gt_4) {
13401 for (size_t k = 5; k < 8; k++) {
13402 SpMMMicrokernelTester()
13403 .mr(16)
13404 .nr(1)
13405 .m(16)
13406 .n(1)
13407 .k(k)
13408 .sparsity(0.0f)
13409 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13410 }
13411 }
13412
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,k_div_4)13413 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_div_4) {
13414 for (size_t k = 8; k <= 40; k += 4) {
13415 SpMMMicrokernelTester()
13416 .mr(16)
13417 .nr(1)
13418 .m(16)
13419 .n(1)
13420 .k(k)
13421 .sparsity(0.0f)
13422 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13423 }
13424 }
13425
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,n_gt_1)13426 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, n_gt_1) {
13427 for (uint32_t n = 2; n < 10; n++) {
13428 for (size_t k = 1; k <= 20; k += 5) {
13429 SpMMMicrokernelTester()
13430 .mr(16)
13431 .nr(1)
13432 .m(16)
13433 .n(n)
13434 .k(k)
13435 .sparsity(0.0f)
13436 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13437 }
13438 }
13439 }
13440
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,m_lt_16)13441 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, m_lt_16) {
13442 for (uint32_t m = 1; m < 16; m++) {
13443 for (uint32_t n = 1; n < 10; n += 2) {
13444 for (size_t k = 1; k <= 20; k += 5) {
13445 SpMMMicrokernelTester()
13446 .mr(16)
13447 .nr(1)
13448 .m(m)
13449 .n(n)
13450 .k(k)
13451 .sparsity(0.0f)
13452 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13453 }
13454 }
13455 }
13456 }
13457
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,m_div_16)13458 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, m_div_16) {
13459 for (uint32_t m = 32; m <= 48; m += 16) {
13460 for (uint32_t n = 1; n < 10; n += 2) {
13461 for (size_t k = 1; k <= 20; k += 5) {
13462 SpMMMicrokernelTester()
13463 .mr(16)
13464 .nr(1)
13465 .m(m)
13466 .n(n)
13467 .k(k)
13468 .sparsity(0.0f)
13469 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13470 }
13471 }
13472 }
13473 }
13474
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,m_gt_16)13475 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, m_gt_16) {
13476 for (uint32_t m = 17; m < 32; m++) {
13477 for (uint32_t n = 1; n < 10; n += 2) {
13478 for (size_t k = 1; k <= 20; k += 5) {
13479 SpMMMicrokernelTester()
13480 .mr(16)
13481 .nr(1)
13482 .m(m)
13483 .n(n)
13484 .k(k)
13485 .sparsity(0.0f)
13486 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13487 }
13488 }
13489 }
13490 }
13491
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,output_stride)13492 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, output_stride) {
13493 for (uint32_t n = 1; n < 10; n += 2) {
13494 for (size_t k = 1; k <= 20; k += 5) {
13495 SpMMMicrokernelTester()
13496 .mr(16)
13497 .nr(1)
13498 .m(32)
13499 .n(n)
13500 .k(k)
13501 .output_stride(37)
13502 .sparsity(0.0f)
13503 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13504 }
13505 }
13506 }
13507
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,qmin)13508 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, qmin) {
13509 for (uint32_t n = 1; n < 10; n += 2) {
13510 for (size_t k = 1; k <= 20; k += 5) {
13511 SpMMMicrokernelTester()
13512 .mr(16)
13513 .nr(1)
13514 .m(32)
13515 .n(n)
13516 .k(k)
13517 .sparsity(0.0f)
13518 .qmin(128)
13519 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13520 }
13521 }
13522 }
13523
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,qmax)13524 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, qmax) {
13525 for (uint32_t n = 1; n < 10; n += 2) {
13526 for (size_t k = 1; k <= 20; k += 5) {
13527 SpMMMicrokernelTester()
13528 .mr(16)
13529 .nr(1)
13530 .m(32)
13531 .n(n)
13532 .k(k)
13533 .sparsity(0.0f)
13534 .qmax(128)
13535 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13536 }
13537 }
13538 }
13539
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,half_sparse)13540 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, half_sparse) {
13541 for (uint32_t n = 1; n < 10; n += 2) {
13542 for (size_t k = 1; k <= 20; k += 5) {
13543 SpMMMicrokernelTester()
13544 .mr(16)
13545 .nr(1)
13546 .m(32)
13547 .n(n)
13548 .k(k)
13549 .sparsity(0.5f)
13550 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13551 }
13552 }
13553 }
13554
TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4,zero_weights)13555 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, zero_weights) {
13556 for (uint32_t n = 1; n < 10; n += 2) {
13557 for (size_t k = 1; k <= 20; k += 5) {
13558 SpMMMicrokernelTester()
13559 .mr(16)
13560 .nr(1)
13561 .m(32)
13562 .n(n)
13563 .k(k)
13564 .sparsity(1.0f)
13565 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
13566 }
13567 }
13568 }
13569 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
13570
13571
13572 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,k_eq_1)13573 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, k_eq_1) {
13574 SpMMMicrokernelTester()
13575 .mr(32)
13576 .nr(1)
13577 .m(32)
13578 .n(1)
13579 .k(1)
13580 .sparsity(0.0f)
13581 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13582 }
13583
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,k_gt_1)13584 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, k_gt_1) {
13585 for (size_t k = 2; k < 10; k++) {
13586 SpMMMicrokernelTester()
13587 .mr(32)
13588 .nr(1)
13589 .m(32)
13590 .n(1)
13591 .k(k)
13592 .sparsity(0.0f)
13593 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13594 }
13595 }
13596
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,n_gt_1)13597 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, n_gt_1) {
13598 for (uint32_t n = 2; n < 10; n++) {
13599 for (size_t k = 1; k <= 5; k += 2) {
13600 SpMMMicrokernelTester()
13601 .mr(32)
13602 .nr(1)
13603 .m(32)
13604 .n(n)
13605 .k(k)
13606 .sparsity(0.0f)
13607 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13608 }
13609 }
13610 }
13611
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,m_lt_32)13612 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, m_lt_32) {
13613 for (uint32_t m = 1; m < 32; m++) {
13614 for (uint32_t n = 1; n < 10; n += 2) {
13615 for (size_t k = 1; k <= 5; k += 2) {
13616 SpMMMicrokernelTester()
13617 .mr(32)
13618 .nr(1)
13619 .m(m)
13620 .n(n)
13621 .k(k)
13622 .sparsity(0.0f)
13623 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13624 }
13625 }
13626 }
13627 }
13628
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,m_div_32)13629 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, m_div_32) {
13630 for (uint32_t m = 64; m <= 96; m += 32) {
13631 for (uint32_t n = 1; n < 10; n += 2) {
13632 for (size_t k = 1; k <= 5; k += 2) {
13633 SpMMMicrokernelTester()
13634 .mr(32)
13635 .nr(1)
13636 .m(m)
13637 .n(n)
13638 .k(k)
13639 .sparsity(0.0f)
13640 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13641 }
13642 }
13643 }
13644 }
13645
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,m_gt_32)13646 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, m_gt_32) {
13647 for (uint32_t m = 33; m < 64; m++) {
13648 for (uint32_t n = 1; n < 10; n += 2) {
13649 for (size_t k = 1; k <= 5; k += 2) {
13650 SpMMMicrokernelTester()
13651 .mr(32)
13652 .nr(1)
13653 .m(m)
13654 .n(n)
13655 .k(k)
13656 .sparsity(0.0f)
13657 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13658 }
13659 }
13660 }
13661 }
13662
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,output_stride)13663 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, output_stride) {
13664 for (uint32_t n = 1; n < 10; n += 2) {
13665 for (size_t k = 1; k <= 5; k += 2) {
13666 SpMMMicrokernelTester()
13667 .mr(32)
13668 .nr(1)
13669 .m(64)
13670 .n(n)
13671 .k(k)
13672 .output_stride(67)
13673 .sparsity(0.0f)
13674 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13675 }
13676 }
13677 }
13678
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,qmin)13679 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, qmin) {
13680 for (uint32_t n = 1; n < 10; n += 2) {
13681 for (size_t k = 1; k <= 5; k += 2) {
13682 SpMMMicrokernelTester()
13683 .mr(32)
13684 .nr(1)
13685 .m(64)
13686 .n(n)
13687 .k(k)
13688 .sparsity(0.0f)
13689 .qmin(128)
13690 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13691 }
13692 }
13693 }
13694
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,qmax)13695 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, qmax) {
13696 for (uint32_t n = 1; n < 10; n += 2) {
13697 for (size_t k = 1; k <= 5; k += 2) {
13698 SpMMMicrokernelTester()
13699 .mr(32)
13700 .nr(1)
13701 .m(64)
13702 .n(n)
13703 .k(k)
13704 .sparsity(0.0f)
13705 .qmax(128)
13706 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13707 }
13708 }
13709 }
13710
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,half_sparse)13711 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, half_sparse) {
13712 for (uint32_t n = 1; n < 10; n += 2) {
13713 for (size_t k = 1; k <= 5; k += 2) {
13714 SpMMMicrokernelTester()
13715 .mr(32)
13716 .nr(1)
13717 .m(64)
13718 .n(n)
13719 .k(k)
13720 .sparsity(0.5f)
13721 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13722 }
13723 }
13724 }
13725
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM,zero_weights)13726 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, zero_weights) {
13727 for (uint32_t n = 1; n < 10; n += 2) {
13728 for (size_t k = 1; k <= 5; k += 2) {
13729 SpMMMicrokernelTester()
13730 .mr(32)
13731 .nr(1)
13732 .m(64)
13733 .n(n)
13734 .k(k)
13735 .sparsity(1.0f)
13736 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
13737 }
13738 }
13739 }
13740 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
13741
13742
13743 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,k_eq_1)13744 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
13745 SpMMMicrokernelTester()
13746 .mr(32)
13747 .nr(1)
13748 .m(32)
13749 .n(1)
13750 .k(1)
13751 .sparsity(0.0f)
13752 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13753 }
13754
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,k_gt_1)13755 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
13756 for (size_t k = 2; k < 10; k++) {
13757 SpMMMicrokernelTester()
13758 .mr(32)
13759 .nr(1)
13760 .m(32)
13761 .n(1)
13762 .k(k)
13763 .sparsity(0.0f)
13764 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13765 }
13766 }
13767
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,n_gt_1)13768 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
13769 for (uint32_t n = 2; n < 10; n++) {
13770 for (size_t k = 1; k <= 5; k += 2) {
13771 SpMMMicrokernelTester()
13772 .mr(32)
13773 .nr(1)
13774 .m(32)
13775 .n(n)
13776 .k(k)
13777 .sparsity(0.0f)
13778 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13779 }
13780 }
13781 }
13782
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,m_lt_32)13783 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, m_lt_32) {
13784 for (uint32_t m = 1; m < 32; m++) {
13785 for (uint32_t n = 1; n < 10; n += 2) {
13786 for (size_t k = 1; k <= 5; k += 2) {
13787 SpMMMicrokernelTester()
13788 .mr(32)
13789 .nr(1)
13790 .m(m)
13791 .n(n)
13792 .k(k)
13793 .sparsity(0.0f)
13794 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13795 }
13796 }
13797 }
13798 }
13799
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,m_div_32)13800 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, m_div_32) {
13801 for (uint32_t m = 64; m <= 96; m += 32) {
13802 for (uint32_t n = 1; n < 10; n += 2) {
13803 for (size_t k = 1; k <= 5; k += 2) {
13804 SpMMMicrokernelTester()
13805 .mr(32)
13806 .nr(1)
13807 .m(m)
13808 .n(n)
13809 .k(k)
13810 .sparsity(0.0f)
13811 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13812 }
13813 }
13814 }
13815 }
13816
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,m_gt_32)13817 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, m_gt_32) {
13818 for (uint32_t m = 33; m < 64; m++) {
13819 for (uint32_t n = 1; n < 10; n += 2) {
13820 for (size_t k = 1; k <= 5; k += 2) {
13821 SpMMMicrokernelTester()
13822 .mr(32)
13823 .nr(1)
13824 .m(m)
13825 .n(n)
13826 .k(k)
13827 .sparsity(0.0f)
13828 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13829 }
13830 }
13831 }
13832 }
13833
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,output_stride)13834 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, output_stride) {
13835 for (uint32_t n = 1; n < 10; n += 2) {
13836 for (size_t k = 1; k <= 5; k += 2) {
13837 SpMMMicrokernelTester()
13838 .mr(32)
13839 .nr(1)
13840 .m(64)
13841 .n(n)
13842 .k(k)
13843 .output_stride(67)
13844 .sparsity(0.0f)
13845 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13846 }
13847 }
13848 }
13849
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,qmin)13850 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, qmin) {
13851 for (uint32_t n = 1; n < 10; n += 2) {
13852 for (size_t k = 1; k <= 5; k += 2) {
13853 SpMMMicrokernelTester()
13854 .mr(32)
13855 .nr(1)
13856 .m(64)
13857 .n(n)
13858 .k(k)
13859 .sparsity(0.0f)
13860 .qmin(128)
13861 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13862 }
13863 }
13864 }
13865
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,qmax)13866 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, qmax) {
13867 for (uint32_t n = 1; n < 10; n += 2) {
13868 for (size_t k = 1; k <= 5; k += 2) {
13869 SpMMMicrokernelTester()
13870 .mr(32)
13871 .nr(1)
13872 .m(64)
13873 .n(n)
13874 .k(k)
13875 .sparsity(0.0f)
13876 .qmax(128)
13877 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13878 }
13879 }
13880 }
13881
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,half_sparse)13882 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
13883 for (uint32_t n = 1; n < 10; n += 2) {
13884 for (size_t k = 1; k <= 5; k += 2) {
13885 SpMMMicrokernelTester()
13886 .mr(32)
13887 .nr(1)
13888 .m(64)
13889 .n(n)
13890 .k(k)
13891 .sparsity(0.5f)
13892 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13893 }
13894 }
13895 }
13896
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED,zero_weights)13897 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
13898 for (uint32_t n = 1; n < 10; n += 2) {
13899 for (size_t k = 1; k <= 5; k += 2) {
13900 SpMMMicrokernelTester()
13901 .mr(32)
13902 .nr(1)
13903 .m(64)
13904 .n(n)
13905 .k(k)
13906 .sparsity(1.0f)
13907 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, xnn_init_f32_minmax_wasmsimd_params);
13908 }
13909 }
13910 }
13911 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
13912
13913
13914 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,k_eq_2)13915 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
13916 SpMMMicrokernelTester()
13917 .mr(32)
13918 .nr(1)
13919 .m(32)
13920 .n(1)
13921 .k(2)
13922 .sparsity(0.0f)
13923 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13924 }
13925
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,k_lt_2)13926 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
13927 for (size_t k = 1; k < 2; k++) {
13928 SpMMMicrokernelTester()
13929 .mr(32)
13930 .nr(1)
13931 .m(32)
13932 .n(1)
13933 .k(k)
13934 .sparsity(0.0f)
13935 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13936 }
13937 }
13938
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,k_gt_2)13939 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
13940 for (size_t k = 3; k < 4; k++) {
13941 SpMMMicrokernelTester()
13942 .mr(32)
13943 .nr(1)
13944 .m(32)
13945 .n(1)
13946 .k(k)
13947 .sparsity(0.0f)
13948 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13949 }
13950 }
13951
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,k_div_2)13952 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
13953 for (size_t k = 4; k <= 20; k += 2) {
13954 SpMMMicrokernelTester()
13955 .mr(32)
13956 .nr(1)
13957 .m(32)
13958 .n(1)
13959 .k(k)
13960 .sparsity(0.0f)
13961 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13962 }
13963 }
13964
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,n_gt_1)13965 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
13966 for (uint32_t n = 2; n < 10; n++) {
13967 for (size_t k = 1; k <= 10; k += 3) {
13968 SpMMMicrokernelTester()
13969 .mr(32)
13970 .nr(1)
13971 .m(32)
13972 .n(n)
13973 .k(k)
13974 .sparsity(0.0f)
13975 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13976 }
13977 }
13978 }
13979
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,m_lt_32)13980 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_32) {
13981 for (uint32_t m = 1; m < 32; m++) {
13982 for (uint32_t n = 1; n < 10; n += 2) {
13983 for (size_t k = 1; k <= 10; k += 3) {
13984 SpMMMicrokernelTester()
13985 .mr(32)
13986 .nr(1)
13987 .m(m)
13988 .n(n)
13989 .k(k)
13990 .sparsity(0.0f)
13991 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
13992 }
13993 }
13994 }
13995 }
13996
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,m_div_32)13997 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, m_div_32) {
13998 for (uint32_t m = 64; m <= 96; m += 32) {
13999 for (uint32_t n = 1; n < 10; n += 2) {
14000 for (size_t k = 1; k <= 10; k += 3) {
14001 SpMMMicrokernelTester()
14002 .mr(32)
14003 .nr(1)
14004 .m(m)
14005 .n(n)
14006 .k(k)
14007 .sparsity(0.0f)
14008 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14009 }
14010 }
14011 }
14012 }
14013
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,m_gt_32)14014 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_32) {
14015 for (uint32_t m = 33; m < 64; m++) {
14016 for (uint32_t n = 1; n < 10; n += 2) {
14017 for (size_t k = 1; k <= 10; k += 3) {
14018 SpMMMicrokernelTester()
14019 .mr(32)
14020 .nr(1)
14021 .m(m)
14022 .n(n)
14023 .k(k)
14024 .sparsity(0.0f)
14025 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14026 }
14027 }
14028 }
14029 }
14030
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,output_stride)14031 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
14032 for (uint32_t n = 1; n < 10; n += 2) {
14033 for (size_t k = 1; k <= 10; k += 3) {
14034 SpMMMicrokernelTester()
14035 .mr(32)
14036 .nr(1)
14037 .m(64)
14038 .n(n)
14039 .k(k)
14040 .output_stride(67)
14041 .sparsity(0.0f)
14042 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14043 }
14044 }
14045 }
14046
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,qmin)14047 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
14048 for (uint32_t n = 1; n < 10; n += 2) {
14049 for (size_t k = 1; k <= 10; k += 3) {
14050 SpMMMicrokernelTester()
14051 .mr(32)
14052 .nr(1)
14053 .m(64)
14054 .n(n)
14055 .k(k)
14056 .sparsity(0.0f)
14057 .qmin(128)
14058 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14059 }
14060 }
14061 }
14062
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,qmax)14063 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
14064 for (uint32_t n = 1; n < 10; n += 2) {
14065 for (size_t k = 1; k <= 10; k += 3) {
14066 SpMMMicrokernelTester()
14067 .mr(32)
14068 .nr(1)
14069 .m(64)
14070 .n(n)
14071 .k(k)
14072 .sparsity(0.0f)
14073 .qmax(128)
14074 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14075 }
14076 }
14077 }
14078
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,half_sparse)14079 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
14080 for (uint32_t n = 1; n < 10; n += 2) {
14081 for (size_t k = 1; k <= 10; k += 3) {
14082 SpMMMicrokernelTester()
14083 .mr(32)
14084 .nr(1)
14085 .m(64)
14086 .n(n)
14087 .k(k)
14088 .sparsity(0.5f)
14089 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14090 }
14091 }
14092 }
14093
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2,zero_weights)14094 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
14095 for (uint32_t n = 1; n < 10; n += 2) {
14096 for (size_t k = 1; k <= 10; k += 3) {
14097 SpMMMicrokernelTester()
14098 .mr(32)
14099 .nr(1)
14100 .m(64)
14101 .n(n)
14102 .k(k)
14103 .sparsity(1.0f)
14104 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14105 }
14106 }
14107 }
14108 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14109
14110
14111 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,k_eq_2)14112 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_eq_2) {
14113 SpMMMicrokernelTester()
14114 .mr(32)
14115 .nr(1)
14116 .m(32)
14117 .n(1)
14118 .k(2)
14119 .sparsity(0.0f)
14120 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14121 }
14122
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,k_lt_2)14123 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_lt_2) {
14124 for (size_t k = 1; k < 2; k++) {
14125 SpMMMicrokernelTester()
14126 .mr(32)
14127 .nr(1)
14128 .m(32)
14129 .n(1)
14130 .k(k)
14131 .sparsity(0.0f)
14132 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14133 }
14134 }
14135
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,k_gt_2)14136 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_gt_2) {
14137 for (size_t k = 3; k < 4; k++) {
14138 SpMMMicrokernelTester()
14139 .mr(32)
14140 .nr(1)
14141 .m(32)
14142 .n(1)
14143 .k(k)
14144 .sparsity(0.0f)
14145 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14146 }
14147 }
14148
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,k_div_2)14149 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_div_2) {
14150 for (size_t k = 4; k <= 20; k += 2) {
14151 SpMMMicrokernelTester()
14152 .mr(32)
14153 .nr(1)
14154 .m(32)
14155 .n(1)
14156 .k(k)
14157 .sparsity(0.0f)
14158 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14159 }
14160 }
14161
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,n_gt_1)14162 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, n_gt_1) {
14163 for (uint32_t n = 2; n < 10; n++) {
14164 for (size_t k = 1; k <= 10; k += 3) {
14165 SpMMMicrokernelTester()
14166 .mr(32)
14167 .nr(1)
14168 .m(32)
14169 .n(n)
14170 .k(k)
14171 .sparsity(0.0f)
14172 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14173 }
14174 }
14175 }
14176
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,m_lt_32)14177 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, m_lt_32) {
14178 for (uint32_t m = 1; m < 32; m++) {
14179 for (uint32_t n = 1; n < 10; n += 2) {
14180 for (size_t k = 1; k <= 10; k += 3) {
14181 SpMMMicrokernelTester()
14182 .mr(32)
14183 .nr(1)
14184 .m(m)
14185 .n(n)
14186 .k(k)
14187 .sparsity(0.0f)
14188 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14189 }
14190 }
14191 }
14192 }
14193
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,m_div_32)14194 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, m_div_32) {
14195 for (uint32_t m = 64; m <= 96; m += 32) {
14196 for (uint32_t n = 1; n < 10; n += 2) {
14197 for (size_t k = 1; k <= 10; k += 3) {
14198 SpMMMicrokernelTester()
14199 .mr(32)
14200 .nr(1)
14201 .m(m)
14202 .n(n)
14203 .k(k)
14204 .sparsity(0.0f)
14205 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14206 }
14207 }
14208 }
14209 }
14210
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,m_gt_32)14211 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, m_gt_32) {
14212 for (uint32_t m = 33; m < 64; m++) {
14213 for (uint32_t n = 1; n < 10; n += 2) {
14214 for (size_t k = 1; k <= 10; k += 3) {
14215 SpMMMicrokernelTester()
14216 .mr(32)
14217 .nr(1)
14218 .m(m)
14219 .n(n)
14220 .k(k)
14221 .sparsity(0.0f)
14222 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14223 }
14224 }
14225 }
14226 }
14227
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,output_stride)14228 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, output_stride) {
14229 for (uint32_t n = 1; n < 10; n += 2) {
14230 for (size_t k = 1; k <= 10; k += 3) {
14231 SpMMMicrokernelTester()
14232 .mr(32)
14233 .nr(1)
14234 .m(64)
14235 .n(n)
14236 .k(k)
14237 .output_stride(67)
14238 .sparsity(0.0f)
14239 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14240 }
14241 }
14242 }
14243
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,qmin)14244 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, qmin) {
14245 for (uint32_t n = 1; n < 10; n += 2) {
14246 for (size_t k = 1; k <= 10; k += 3) {
14247 SpMMMicrokernelTester()
14248 .mr(32)
14249 .nr(1)
14250 .m(64)
14251 .n(n)
14252 .k(k)
14253 .sparsity(0.0f)
14254 .qmin(128)
14255 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14256 }
14257 }
14258 }
14259
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,qmax)14260 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, qmax) {
14261 for (uint32_t n = 1; n < 10; n += 2) {
14262 for (size_t k = 1; k <= 10; k += 3) {
14263 SpMMMicrokernelTester()
14264 .mr(32)
14265 .nr(1)
14266 .m(64)
14267 .n(n)
14268 .k(k)
14269 .sparsity(0.0f)
14270 .qmax(128)
14271 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14272 }
14273 }
14274 }
14275
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,half_sparse)14276 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, half_sparse) {
14277 for (uint32_t n = 1; n < 10; n += 2) {
14278 for (size_t k = 1; k <= 10; k += 3) {
14279 SpMMMicrokernelTester()
14280 .mr(32)
14281 .nr(1)
14282 .m(64)
14283 .n(n)
14284 .k(k)
14285 .sparsity(0.5f)
14286 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14287 }
14288 }
14289 }
14290
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2,zero_weights)14291 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, zero_weights) {
14292 for (uint32_t n = 1; n < 10; n += 2) {
14293 for (size_t k = 1; k <= 10; k += 3) {
14294 SpMMMicrokernelTester()
14295 .mr(32)
14296 .nr(1)
14297 .m(64)
14298 .n(n)
14299 .k(k)
14300 .sparsity(1.0f)
14301 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, xnn_init_f32_minmax_wasmsimd_params);
14302 }
14303 }
14304 }
14305 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14306
14307
14308 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,k_eq_4)14309 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_eq_4) {
14310 SpMMMicrokernelTester()
14311 .mr(32)
14312 .nr(1)
14313 .m(32)
14314 .n(1)
14315 .k(4)
14316 .sparsity(0.0f)
14317 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14318 }
14319
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,k_lt_4)14320 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_lt_4) {
14321 for (size_t k = 1; k < 4; k++) {
14322 SpMMMicrokernelTester()
14323 .mr(32)
14324 .nr(1)
14325 .m(32)
14326 .n(1)
14327 .k(k)
14328 .sparsity(0.0f)
14329 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14330 }
14331 }
14332
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,k_gt_4)14333 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_gt_4) {
14334 for (size_t k = 5; k < 8; k++) {
14335 SpMMMicrokernelTester()
14336 .mr(32)
14337 .nr(1)
14338 .m(32)
14339 .n(1)
14340 .k(k)
14341 .sparsity(0.0f)
14342 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14343 }
14344 }
14345
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,k_div_4)14346 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_div_4) {
14347 for (size_t k = 8; k <= 40; k += 4) {
14348 SpMMMicrokernelTester()
14349 .mr(32)
14350 .nr(1)
14351 .m(32)
14352 .n(1)
14353 .k(k)
14354 .sparsity(0.0f)
14355 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14356 }
14357 }
14358
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,n_gt_1)14359 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, n_gt_1) {
14360 for (uint32_t n = 2; n < 10; n++) {
14361 for (size_t k = 1; k <= 20; k += 5) {
14362 SpMMMicrokernelTester()
14363 .mr(32)
14364 .nr(1)
14365 .m(32)
14366 .n(n)
14367 .k(k)
14368 .sparsity(0.0f)
14369 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14370 }
14371 }
14372 }
14373
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,m_lt_32)14374 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, m_lt_32) {
14375 for (uint32_t m = 1; m < 32; m++) {
14376 for (uint32_t n = 1; n < 10; n += 2) {
14377 for (size_t k = 1; k <= 20; k += 5) {
14378 SpMMMicrokernelTester()
14379 .mr(32)
14380 .nr(1)
14381 .m(m)
14382 .n(n)
14383 .k(k)
14384 .sparsity(0.0f)
14385 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14386 }
14387 }
14388 }
14389 }
14390
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,m_div_32)14391 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, m_div_32) {
14392 for (uint32_t m = 64; m <= 96; m += 32) {
14393 for (uint32_t n = 1; n < 10; n += 2) {
14394 for (size_t k = 1; k <= 20; k += 5) {
14395 SpMMMicrokernelTester()
14396 .mr(32)
14397 .nr(1)
14398 .m(m)
14399 .n(n)
14400 .k(k)
14401 .sparsity(0.0f)
14402 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14403 }
14404 }
14405 }
14406 }
14407
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,m_gt_32)14408 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, m_gt_32) {
14409 for (uint32_t m = 33; m < 64; m++) {
14410 for (uint32_t n = 1; n < 10; n += 2) {
14411 for (size_t k = 1; k <= 20; k += 5) {
14412 SpMMMicrokernelTester()
14413 .mr(32)
14414 .nr(1)
14415 .m(m)
14416 .n(n)
14417 .k(k)
14418 .sparsity(0.0f)
14419 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14420 }
14421 }
14422 }
14423 }
14424
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,output_stride)14425 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, output_stride) {
14426 for (uint32_t n = 1; n < 10; n += 2) {
14427 for (size_t k = 1; k <= 20; k += 5) {
14428 SpMMMicrokernelTester()
14429 .mr(32)
14430 .nr(1)
14431 .m(64)
14432 .n(n)
14433 .k(k)
14434 .output_stride(67)
14435 .sparsity(0.0f)
14436 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14437 }
14438 }
14439 }
14440
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,qmin)14441 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, qmin) {
14442 for (uint32_t n = 1; n < 10; n += 2) {
14443 for (size_t k = 1; k <= 20; k += 5) {
14444 SpMMMicrokernelTester()
14445 .mr(32)
14446 .nr(1)
14447 .m(64)
14448 .n(n)
14449 .k(k)
14450 .sparsity(0.0f)
14451 .qmin(128)
14452 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14453 }
14454 }
14455 }
14456
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,qmax)14457 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, qmax) {
14458 for (uint32_t n = 1; n < 10; n += 2) {
14459 for (size_t k = 1; k <= 20; k += 5) {
14460 SpMMMicrokernelTester()
14461 .mr(32)
14462 .nr(1)
14463 .m(64)
14464 .n(n)
14465 .k(k)
14466 .sparsity(0.0f)
14467 .qmax(128)
14468 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14469 }
14470 }
14471 }
14472
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,half_sparse)14473 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, half_sparse) {
14474 for (uint32_t n = 1; n < 10; n += 2) {
14475 for (size_t k = 1; k <= 20; k += 5) {
14476 SpMMMicrokernelTester()
14477 .mr(32)
14478 .nr(1)
14479 .m(64)
14480 .n(n)
14481 .k(k)
14482 .sparsity(0.5f)
14483 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14484 }
14485 }
14486 }
14487
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4,zero_weights)14488 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, zero_weights) {
14489 for (uint32_t n = 1; n < 10; n += 2) {
14490 for (size_t k = 1; k <= 20; k += 5) {
14491 SpMMMicrokernelTester()
14492 .mr(32)
14493 .nr(1)
14494 .m(64)
14495 .n(n)
14496 .k(k)
14497 .sparsity(1.0f)
14498 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, xnn_init_f32_minmax_wasmsimd_params);
14499 }
14500 }
14501 }
14502 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14503
14504
14505 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,k_eq_1)14506 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, k_eq_1) {
14507 SpMMMicrokernelTester()
14508 .mr(32)
14509 .nr(1)
14510 .m(32)
14511 .n(1)
14512 .k(1)
14513 .sparsity(0.0f)
14514 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14515 }
14516
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,k_gt_1)14517 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, k_gt_1) {
14518 for (size_t k = 2; k < 10; k++) {
14519 SpMMMicrokernelTester()
14520 .mr(32)
14521 .nr(1)
14522 .m(32)
14523 .n(1)
14524 .k(k)
14525 .sparsity(0.0f)
14526 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14527 }
14528 }
14529
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,n_gt_1)14530 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, n_gt_1) {
14531 for (uint32_t n = 2; n < 10; n++) {
14532 for (size_t k = 1; k <= 5; k += 2) {
14533 SpMMMicrokernelTester()
14534 .mr(32)
14535 .nr(1)
14536 .m(32)
14537 .n(n)
14538 .k(k)
14539 .sparsity(0.0f)
14540 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14541 }
14542 }
14543 }
14544
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,m_lt_32)14545 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, m_lt_32) {
14546 for (uint32_t m = 1; m < 32; m++) {
14547 for (uint32_t n = 1; n < 10; n += 2) {
14548 for (size_t k = 1; k <= 5; k += 2) {
14549 SpMMMicrokernelTester()
14550 .mr(32)
14551 .nr(1)
14552 .m(m)
14553 .n(n)
14554 .k(k)
14555 .sparsity(0.0f)
14556 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14557 }
14558 }
14559 }
14560 }
14561
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,m_div_32)14562 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, m_div_32) {
14563 for (uint32_t m = 64; m <= 96; m += 32) {
14564 for (uint32_t n = 1; n < 10; n += 2) {
14565 for (size_t k = 1; k <= 5; k += 2) {
14566 SpMMMicrokernelTester()
14567 .mr(32)
14568 .nr(1)
14569 .m(m)
14570 .n(n)
14571 .k(k)
14572 .sparsity(0.0f)
14573 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14574 }
14575 }
14576 }
14577 }
14578
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,m_gt_32)14579 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, m_gt_32) {
14580 for (uint32_t m = 33; m < 64; m++) {
14581 for (uint32_t n = 1; n < 10; n += 2) {
14582 for (size_t k = 1; k <= 5; k += 2) {
14583 SpMMMicrokernelTester()
14584 .mr(32)
14585 .nr(1)
14586 .m(m)
14587 .n(n)
14588 .k(k)
14589 .sparsity(0.0f)
14590 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14591 }
14592 }
14593 }
14594 }
14595
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,output_stride)14596 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, output_stride) {
14597 for (uint32_t n = 1; n < 10; n += 2) {
14598 for (size_t k = 1; k <= 5; k += 2) {
14599 SpMMMicrokernelTester()
14600 .mr(32)
14601 .nr(1)
14602 .m(64)
14603 .n(n)
14604 .k(k)
14605 .output_stride(67)
14606 .sparsity(0.0f)
14607 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14608 }
14609 }
14610 }
14611
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,qmin)14612 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, qmin) {
14613 for (uint32_t n = 1; n < 10; n += 2) {
14614 for (size_t k = 1; k <= 5; k += 2) {
14615 SpMMMicrokernelTester()
14616 .mr(32)
14617 .nr(1)
14618 .m(64)
14619 .n(n)
14620 .k(k)
14621 .sparsity(0.0f)
14622 .qmin(128)
14623 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14624 }
14625 }
14626 }
14627
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,qmax)14628 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, qmax) {
14629 for (uint32_t n = 1; n < 10; n += 2) {
14630 for (size_t k = 1; k <= 5; k += 2) {
14631 SpMMMicrokernelTester()
14632 .mr(32)
14633 .nr(1)
14634 .m(64)
14635 .n(n)
14636 .k(k)
14637 .sparsity(0.0f)
14638 .qmax(128)
14639 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14640 }
14641 }
14642 }
14643
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,half_sparse)14644 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, half_sparse) {
14645 for (uint32_t n = 1; n < 10; n += 2) {
14646 for (size_t k = 1; k <= 5; k += 2) {
14647 SpMMMicrokernelTester()
14648 .mr(32)
14649 .nr(1)
14650 .m(64)
14651 .n(n)
14652 .k(k)
14653 .sparsity(0.5f)
14654 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14655 }
14656 }
14657 }
14658
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86,zero_weights)14659 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, zero_weights) {
14660 for (uint32_t n = 1; n < 10; n += 2) {
14661 for (size_t k = 1; k <= 5; k += 2) {
14662 SpMMMicrokernelTester()
14663 .mr(32)
14664 .nr(1)
14665 .m(64)
14666 .n(n)
14667 .k(k)
14668 .sparsity(1.0f)
14669 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
14670 }
14671 }
14672 }
14673 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14674
14675
14676 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,k_eq_1)14677 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
14678 SpMMMicrokernelTester()
14679 .mr(32)
14680 .nr(1)
14681 .m(32)
14682 .n(1)
14683 .k(1)
14684 .sparsity(0.0f)
14685 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14686 }
14687
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,k_gt_1)14688 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
14689 for (size_t k = 2; k < 10; k++) {
14690 SpMMMicrokernelTester()
14691 .mr(32)
14692 .nr(1)
14693 .m(32)
14694 .n(1)
14695 .k(k)
14696 .sparsity(0.0f)
14697 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14698 }
14699 }
14700
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,n_gt_1)14701 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
14702 for (uint32_t n = 2; n < 10; n++) {
14703 for (size_t k = 1; k <= 5; k += 2) {
14704 SpMMMicrokernelTester()
14705 .mr(32)
14706 .nr(1)
14707 .m(32)
14708 .n(n)
14709 .k(k)
14710 .sparsity(0.0f)
14711 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14712 }
14713 }
14714 }
14715
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,m_lt_32)14716 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, m_lt_32) {
14717 for (uint32_t m = 1; m < 32; m++) {
14718 for (uint32_t n = 1; n < 10; n += 2) {
14719 for (size_t k = 1; k <= 5; k += 2) {
14720 SpMMMicrokernelTester()
14721 .mr(32)
14722 .nr(1)
14723 .m(m)
14724 .n(n)
14725 .k(k)
14726 .sparsity(0.0f)
14727 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14728 }
14729 }
14730 }
14731 }
14732
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,m_div_32)14733 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, m_div_32) {
14734 for (uint32_t m = 64; m <= 96; m += 32) {
14735 for (uint32_t n = 1; n < 10; n += 2) {
14736 for (size_t k = 1; k <= 5; k += 2) {
14737 SpMMMicrokernelTester()
14738 .mr(32)
14739 .nr(1)
14740 .m(m)
14741 .n(n)
14742 .k(k)
14743 .sparsity(0.0f)
14744 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14745 }
14746 }
14747 }
14748 }
14749
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,m_gt_32)14750 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, m_gt_32) {
14751 for (uint32_t m = 33; m < 64; m++) {
14752 for (uint32_t n = 1; n < 10; n += 2) {
14753 for (size_t k = 1; k <= 5; k += 2) {
14754 SpMMMicrokernelTester()
14755 .mr(32)
14756 .nr(1)
14757 .m(m)
14758 .n(n)
14759 .k(k)
14760 .sparsity(0.0f)
14761 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14762 }
14763 }
14764 }
14765 }
14766
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,output_stride)14767 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, output_stride) {
14768 for (uint32_t n = 1; n < 10; n += 2) {
14769 for (size_t k = 1; k <= 5; k += 2) {
14770 SpMMMicrokernelTester()
14771 .mr(32)
14772 .nr(1)
14773 .m(64)
14774 .n(n)
14775 .k(k)
14776 .output_stride(67)
14777 .sparsity(0.0f)
14778 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14779 }
14780 }
14781 }
14782
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,qmin)14783 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, qmin) {
14784 for (uint32_t n = 1; n < 10; n += 2) {
14785 for (size_t k = 1; k <= 5; k += 2) {
14786 SpMMMicrokernelTester()
14787 .mr(32)
14788 .nr(1)
14789 .m(64)
14790 .n(n)
14791 .k(k)
14792 .sparsity(0.0f)
14793 .qmin(128)
14794 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14795 }
14796 }
14797 }
14798
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,qmax)14799 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, qmax) {
14800 for (uint32_t n = 1; n < 10; n += 2) {
14801 for (size_t k = 1; k <= 5; k += 2) {
14802 SpMMMicrokernelTester()
14803 .mr(32)
14804 .nr(1)
14805 .m(64)
14806 .n(n)
14807 .k(k)
14808 .sparsity(0.0f)
14809 .qmax(128)
14810 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14811 }
14812 }
14813 }
14814
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,half_sparse)14815 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, half_sparse) {
14816 for (uint32_t n = 1; n < 10; n += 2) {
14817 for (size_t k = 1; k <= 5; k += 2) {
14818 SpMMMicrokernelTester()
14819 .mr(32)
14820 .nr(1)
14821 .m(64)
14822 .n(n)
14823 .k(k)
14824 .sparsity(0.5f)
14825 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14826 }
14827 }
14828 }
14829
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED,zero_weights)14830 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, zero_weights) {
14831 for (uint32_t n = 1; n < 10; n += 2) {
14832 for (size_t k = 1; k <= 5; k += 2) {
14833 SpMMMicrokernelTester()
14834 .mr(32)
14835 .nr(1)
14836 .m(64)
14837 .n(n)
14838 .k(k)
14839 .sparsity(1.0f)
14840 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, xnn_init_f32_minmax_wasmsimd_params);
14841 }
14842 }
14843 }
14844 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14845
14846
14847 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,k_eq_2)14848 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
14849 SpMMMicrokernelTester()
14850 .mr(32)
14851 .nr(1)
14852 .m(32)
14853 .n(1)
14854 .k(2)
14855 .sparsity(0.0f)
14856 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14857 }
14858
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,k_lt_2)14859 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
14860 for (size_t k = 1; k < 2; k++) {
14861 SpMMMicrokernelTester()
14862 .mr(32)
14863 .nr(1)
14864 .m(32)
14865 .n(1)
14866 .k(k)
14867 .sparsity(0.0f)
14868 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14869 }
14870 }
14871
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,k_gt_2)14872 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
14873 for (size_t k = 3; k < 4; k++) {
14874 SpMMMicrokernelTester()
14875 .mr(32)
14876 .nr(1)
14877 .m(32)
14878 .n(1)
14879 .k(k)
14880 .sparsity(0.0f)
14881 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14882 }
14883 }
14884
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,k_div_2)14885 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
14886 for (size_t k = 4; k <= 20; k += 2) {
14887 SpMMMicrokernelTester()
14888 .mr(32)
14889 .nr(1)
14890 .m(32)
14891 .n(1)
14892 .k(k)
14893 .sparsity(0.0f)
14894 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14895 }
14896 }
14897
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,n_gt_1)14898 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
14899 for (uint32_t n = 2; n < 10; n++) {
14900 for (size_t k = 1; k <= 10; k += 3) {
14901 SpMMMicrokernelTester()
14902 .mr(32)
14903 .nr(1)
14904 .m(32)
14905 .n(n)
14906 .k(k)
14907 .sparsity(0.0f)
14908 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14909 }
14910 }
14911 }
14912
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,m_lt_32)14913 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, m_lt_32) {
14914 for (uint32_t m = 1; m < 32; m++) {
14915 for (uint32_t n = 1; n < 10; n += 2) {
14916 for (size_t k = 1; k <= 10; k += 3) {
14917 SpMMMicrokernelTester()
14918 .mr(32)
14919 .nr(1)
14920 .m(m)
14921 .n(n)
14922 .k(k)
14923 .sparsity(0.0f)
14924 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14925 }
14926 }
14927 }
14928 }
14929
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,m_div_32)14930 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, m_div_32) {
14931 for (uint32_t m = 64; m <= 96; m += 32) {
14932 for (uint32_t n = 1; n < 10; n += 2) {
14933 for (size_t k = 1; k <= 10; k += 3) {
14934 SpMMMicrokernelTester()
14935 .mr(32)
14936 .nr(1)
14937 .m(m)
14938 .n(n)
14939 .k(k)
14940 .sparsity(0.0f)
14941 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14942 }
14943 }
14944 }
14945 }
14946
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,m_gt_32)14947 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, m_gt_32) {
14948 for (uint32_t m = 33; m < 64; m++) {
14949 for (uint32_t n = 1; n < 10; n += 2) {
14950 for (size_t k = 1; k <= 10; k += 3) {
14951 SpMMMicrokernelTester()
14952 .mr(32)
14953 .nr(1)
14954 .m(m)
14955 .n(n)
14956 .k(k)
14957 .sparsity(0.0f)
14958 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14959 }
14960 }
14961 }
14962 }
14963
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,output_stride)14964 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
14965 for (uint32_t n = 1; n < 10; n += 2) {
14966 for (size_t k = 1; k <= 10; k += 3) {
14967 SpMMMicrokernelTester()
14968 .mr(32)
14969 .nr(1)
14970 .m(64)
14971 .n(n)
14972 .k(k)
14973 .output_stride(67)
14974 .sparsity(0.0f)
14975 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14976 }
14977 }
14978 }
14979
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,qmin)14980 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
14981 for (uint32_t n = 1; n < 10; n += 2) {
14982 for (size_t k = 1; k <= 10; k += 3) {
14983 SpMMMicrokernelTester()
14984 .mr(32)
14985 .nr(1)
14986 .m(64)
14987 .n(n)
14988 .k(k)
14989 .sparsity(0.0f)
14990 .qmin(128)
14991 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
14992 }
14993 }
14994 }
14995
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,qmax)14996 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
14997 for (uint32_t n = 1; n < 10; n += 2) {
14998 for (size_t k = 1; k <= 10; k += 3) {
14999 SpMMMicrokernelTester()
15000 .mr(32)
15001 .nr(1)
15002 .m(64)
15003 .n(n)
15004 .k(k)
15005 .sparsity(0.0f)
15006 .qmax(128)
15007 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
15008 }
15009 }
15010 }
15011
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,half_sparse)15012 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
15013 for (uint32_t n = 1; n < 10; n += 2) {
15014 for (size_t k = 1; k <= 10; k += 3) {
15015 SpMMMicrokernelTester()
15016 .mr(32)
15017 .nr(1)
15018 .m(64)
15019 .n(n)
15020 .k(k)
15021 .sparsity(0.5f)
15022 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
15023 }
15024 }
15025 }
15026
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2,zero_weights)15027 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
15028 for (uint32_t n = 1; n < 10; n += 2) {
15029 for (size_t k = 1; k <= 10; k += 3) {
15030 SpMMMicrokernelTester()
15031 .mr(32)
15032 .nr(1)
15033 .m(64)
15034 .n(n)
15035 .k(k)
15036 .sparsity(1.0f)
15037 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, xnn_init_f32_minmax_wasmsimd_params);
15038 }
15039 }
15040 }
15041 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
15042
15043
15044 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,k_eq_2)15045 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_eq_2) {
15046 SpMMMicrokernelTester()
15047 .mr(32)
15048 .nr(1)
15049 .m(32)
15050 .n(1)
15051 .k(2)
15052 .sparsity(0.0f)
15053 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15054 }
15055
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,k_lt_2)15056 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_lt_2) {
15057 for (size_t k = 1; k < 2; k++) {
15058 SpMMMicrokernelTester()
15059 .mr(32)
15060 .nr(1)
15061 .m(32)
15062 .n(1)
15063 .k(k)
15064 .sparsity(0.0f)
15065 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15066 }
15067 }
15068
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,k_gt_2)15069 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_gt_2) {
15070 for (size_t k = 3; k < 4; k++) {
15071 SpMMMicrokernelTester()
15072 .mr(32)
15073 .nr(1)
15074 .m(32)
15075 .n(1)
15076 .k(k)
15077 .sparsity(0.0f)
15078 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15079 }
15080 }
15081
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,k_div_2)15082 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_div_2) {
15083 for (size_t k = 4; k <= 20; k += 2) {
15084 SpMMMicrokernelTester()
15085 .mr(32)
15086 .nr(1)
15087 .m(32)
15088 .n(1)
15089 .k(k)
15090 .sparsity(0.0f)
15091 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15092 }
15093 }
15094
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,n_gt_1)15095 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, n_gt_1) {
15096 for (uint32_t n = 2; n < 10; n++) {
15097 for (size_t k = 1; k <= 10; k += 3) {
15098 SpMMMicrokernelTester()
15099 .mr(32)
15100 .nr(1)
15101 .m(32)
15102 .n(n)
15103 .k(k)
15104 .sparsity(0.0f)
15105 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15106 }
15107 }
15108 }
15109
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,m_lt_32)15110 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, m_lt_32) {
15111 for (uint32_t m = 1; m < 32; m++) {
15112 for (uint32_t n = 1; n < 10; n += 2) {
15113 for (size_t k = 1; k <= 10; k += 3) {
15114 SpMMMicrokernelTester()
15115 .mr(32)
15116 .nr(1)
15117 .m(m)
15118 .n(n)
15119 .k(k)
15120 .sparsity(0.0f)
15121 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15122 }
15123 }
15124 }
15125 }
15126
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,m_div_32)15127 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, m_div_32) {
15128 for (uint32_t m = 64; m <= 96; m += 32) {
15129 for (uint32_t n = 1; n < 10; n += 2) {
15130 for (size_t k = 1; k <= 10; k += 3) {
15131 SpMMMicrokernelTester()
15132 .mr(32)
15133 .nr(1)
15134 .m(m)
15135 .n(n)
15136 .k(k)
15137 .sparsity(0.0f)
15138 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15139 }
15140 }
15141 }
15142 }
15143
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,m_gt_32)15144 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, m_gt_32) {
15145 for (uint32_t m = 33; m < 64; m++) {
15146 for (uint32_t n = 1; n < 10; n += 2) {
15147 for (size_t k = 1; k <= 10; k += 3) {
15148 SpMMMicrokernelTester()
15149 .mr(32)
15150 .nr(1)
15151 .m(m)
15152 .n(n)
15153 .k(k)
15154 .sparsity(0.0f)
15155 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15156 }
15157 }
15158 }
15159 }
15160
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,output_stride)15161 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, output_stride) {
15162 for (uint32_t n = 1; n < 10; n += 2) {
15163 for (size_t k = 1; k <= 10; k += 3) {
15164 SpMMMicrokernelTester()
15165 .mr(32)
15166 .nr(1)
15167 .m(64)
15168 .n(n)
15169 .k(k)
15170 .output_stride(67)
15171 .sparsity(0.0f)
15172 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15173 }
15174 }
15175 }
15176
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,qmin)15177 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, qmin) {
15178 for (uint32_t n = 1; n < 10; n += 2) {
15179 for (size_t k = 1; k <= 10; k += 3) {
15180 SpMMMicrokernelTester()
15181 .mr(32)
15182 .nr(1)
15183 .m(64)
15184 .n(n)
15185 .k(k)
15186 .sparsity(0.0f)
15187 .qmin(128)
15188 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15189 }
15190 }
15191 }
15192
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,qmax)15193 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, qmax) {
15194 for (uint32_t n = 1; n < 10; n += 2) {
15195 for (size_t k = 1; k <= 10; k += 3) {
15196 SpMMMicrokernelTester()
15197 .mr(32)
15198 .nr(1)
15199 .m(64)
15200 .n(n)
15201 .k(k)
15202 .sparsity(0.0f)
15203 .qmax(128)
15204 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15205 }
15206 }
15207 }
15208
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,half_sparse)15209 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, half_sparse) {
15210 for (uint32_t n = 1; n < 10; n += 2) {
15211 for (size_t k = 1; k <= 10; k += 3) {
15212 SpMMMicrokernelTester()
15213 .mr(32)
15214 .nr(1)
15215 .m(64)
15216 .n(n)
15217 .k(k)
15218 .sparsity(0.5f)
15219 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15220 }
15221 }
15222 }
15223
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2,zero_weights)15224 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, zero_weights) {
15225 for (uint32_t n = 1; n < 10; n += 2) {
15226 for (size_t k = 1; k <= 10; k += 3) {
15227 SpMMMicrokernelTester()
15228 .mr(32)
15229 .nr(1)
15230 .m(64)
15231 .n(n)
15232 .k(k)
15233 .sparsity(1.0f)
15234 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, xnn_init_f32_minmax_wasmsimd_params);
15235 }
15236 }
15237 }
15238 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
15239
15240
15241 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,k_eq_4)15242 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_eq_4) {
15243 SpMMMicrokernelTester()
15244 .mr(32)
15245 .nr(1)
15246 .m(32)
15247 .n(1)
15248 .k(4)
15249 .sparsity(0.0f)
15250 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15251 }
15252
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,k_lt_4)15253 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_lt_4) {
15254 for (size_t k = 1; k < 4; k++) {
15255 SpMMMicrokernelTester()
15256 .mr(32)
15257 .nr(1)
15258 .m(32)
15259 .n(1)
15260 .k(k)
15261 .sparsity(0.0f)
15262 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15263 }
15264 }
15265
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,k_gt_4)15266 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_gt_4) {
15267 for (size_t k = 5; k < 8; k++) {
15268 SpMMMicrokernelTester()
15269 .mr(32)
15270 .nr(1)
15271 .m(32)
15272 .n(1)
15273 .k(k)
15274 .sparsity(0.0f)
15275 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15276 }
15277 }
15278
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,k_div_4)15279 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_div_4) {
15280 for (size_t k = 8; k <= 40; k += 4) {
15281 SpMMMicrokernelTester()
15282 .mr(32)
15283 .nr(1)
15284 .m(32)
15285 .n(1)
15286 .k(k)
15287 .sparsity(0.0f)
15288 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15289 }
15290 }
15291
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,n_gt_1)15292 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, n_gt_1) {
15293 for (uint32_t n = 2; n < 10; n++) {
15294 for (size_t k = 1; k <= 20; k += 5) {
15295 SpMMMicrokernelTester()
15296 .mr(32)
15297 .nr(1)
15298 .m(32)
15299 .n(n)
15300 .k(k)
15301 .sparsity(0.0f)
15302 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15303 }
15304 }
15305 }
15306
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,m_lt_32)15307 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, m_lt_32) {
15308 for (uint32_t m = 1; m < 32; m++) {
15309 for (uint32_t n = 1; n < 10; n += 2) {
15310 for (size_t k = 1; k <= 20; k += 5) {
15311 SpMMMicrokernelTester()
15312 .mr(32)
15313 .nr(1)
15314 .m(m)
15315 .n(n)
15316 .k(k)
15317 .sparsity(0.0f)
15318 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15319 }
15320 }
15321 }
15322 }
15323
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,m_div_32)15324 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, m_div_32) {
15325 for (uint32_t m = 64; m <= 96; m += 32) {
15326 for (uint32_t n = 1; n < 10; n += 2) {
15327 for (size_t k = 1; k <= 20; k += 5) {
15328 SpMMMicrokernelTester()
15329 .mr(32)
15330 .nr(1)
15331 .m(m)
15332 .n(n)
15333 .k(k)
15334 .sparsity(0.0f)
15335 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15336 }
15337 }
15338 }
15339 }
15340
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,m_gt_32)15341 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, m_gt_32) {
15342 for (uint32_t m = 33; m < 64; m++) {
15343 for (uint32_t n = 1; n < 10; n += 2) {
15344 for (size_t k = 1; k <= 20; k += 5) {
15345 SpMMMicrokernelTester()
15346 .mr(32)
15347 .nr(1)
15348 .m(m)
15349 .n(n)
15350 .k(k)
15351 .sparsity(0.0f)
15352 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15353 }
15354 }
15355 }
15356 }
15357
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,output_stride)15358 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, output_stride) {
15359 for (uint32_t n = 1; n < 10; n += 2) {
15360 for (size_t k = 1; k <= 20; k += 5) {
15361 SpMMMicrokernelTester()
15362 .mr(32)
15363 .nr(1)
15364 .m(64)
15365 .n(n)
15366 .k(k)
15367 .output_stride(67)
15368 .sparsity(0.0f)
15369 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15370 }
15371 }
15372 }
15373
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,qmin)15374 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, qmin) {
15375 for (uint32_t n = 1; n < 10; n += 2) {
15376 for (size_t k = 1; k <= 20; k += 5) {
15377 SpMMMicrokernelTester()
15378 .mr(32)
15379 .nr(1)
15380 .m(64)
15381 .n(n)
15382 .k(k)
15383 .sparsity(0.0f)
15384 .qmin(128)
15385 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15386 }
15387 }
15388 }
15389
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,qmax)15390 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, qmax) {
15391 for (uint32_t n = 1; n < 10; n += 2) {
15392 for (size_t k = 1; k <= 20; k += 5) {
15393 SpMMMicrokernelTester()
15394 .mr(32)
15395 .nr(1)
15396 .m(64)
15397 .n(n)
15398 .k(k)
15399 .sparsity(0.0f)
15400 .qmax(128)
15401 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15402 }
15403 }
15404 }
15405
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,half_sparse)15406 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, half_sparse) {
15407 for (uint32_t n = 1; n < 10; n += 2) {
15408 for (size_t k = 1; k <= 20; k += 5) {
15409 SpMMMicrokernelTester()
15410 .mr(32)
15411 .nr(1)
15412 .m(64)
15413 .n(n)
15414 .k(k)
15415 .sparsity(0.5f)
15416 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15417 }
15418 }
15419 }
15420
TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4,zero_weights)15421 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, zero_weights) {
15422 for (uint32_t n = 1; n < 10; n += 2) {
15423 for (size_t k = 1; k <= 20; k += 5) {
15424 SpMMMicrokernelTester()
15425 .mr(32)
15426 .nr(1)
15427 .m(64)
15428 .n(n)
15429 .k(k)
15430 .sparsity(1.0f)
15431 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, xnn_init_f32_minmax_wasmsimd_params);
15432 }
15433 }
15434 }
15435 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
15436
15437
TEST(F32_SPMM_MINMAX_1X1__SCALAR,k_eq_1)15438 TEST(F32_SPMM_MINMAX_1X1__SCALAR, k_eq_1) {
15439 SpMMMicrokernelTester()
15440 .mr(1)
15441 .nr(1)
15442 .m(1)
15443 .n(1)
15444 .k(1)
15445 .sparsity(0.0f)
15446 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15447 }
15448
TEST(F32_SPMM_MINMAX_1X1__SCALAR,k_gt_1)15449 TEST(F32_SPMM_MINMAX_1X1__SCALAR, k_gt_1) {
15450 for (size_t k = 2; k < 10; k++) {
15451 SpMMMicrokernelTester()
15452 .mr(1)
15453 .nr(1)
15454 .m(1)
15455 .n(1)
15456 .k(k)
15457 .sparsity(0.0f)
15458 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15459 }
15460 }
15461
TEST(F32_SPMM_MINMAX_1X1__SCALAR,n_gt_1)15462 TEST(F32_SPMM_MINMAX_1X1__SCALAR, n_gt_1) {
15463 for (uint32_t n = 2; n < 10; n++) {
15464 for (size_t k = 1; k <= 5; k += 2) {
15465 SpMMMicrokernelTester()
15466 .mr(1)
15467 .nr(1)
15468 .m(1)
15469 .n(n)
15470 .k(k)
15471 .sparsity(0.0f)
15472 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15473 }
15474 }
15475 }
15476
TEST(F32_SPMM_MINMAX_1X1__SCALAR,m_lt_1)15477 TEST(F32_SPMM_MINMAX_1X1__SCALAR, m_lt_1) {
15478 for (uint32_t m = 1; m < 1; m++) {
15479 for (uint32_t n = 1; n < 10; n += 2) {
15480 for (size_t k = 1; k <= 5; k += 2) {
15481 SpMMMicrokernelTester()
15482 .mr(1)
15483 .nr(1)
15484 .m(m)
15485 .n(n)
15486 .k(k)
15487 .sparsity(0.0f)
15488 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15489 }
15490 }
15491 }
15492 }
15493
TEST(F32_SPMM_MINMAX_1X1__SCALAR,m_div_1)15494 TEST(F32_SPMM_MINMAX_1X1__SCALAR, m_div_1) {
15495 for (uint32_t m = 2; m <= 3; m += 1) {
15496 for (uint32_t n = 1; n < 10; n += 2) {
15497 for (size_t k = 1; k <= 5; k += 2) {
15498 SpMMMicrokernelTester()
15499 .mr(1)
15500 .nr(1)
15501 .m(m)
15502 .n(n)
15503 .k(k)
15504 .sparsity(0.0f)
15505 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15506 }
15507 }
15508 }
15509 }
15510
TEST(F32_SPMM_MINMAX_1X1__SCALAR,m_gt_1)15511 TEST(F32_SPMM_MINMAX_1X1__SCALAR, m_gt_1) {
15512 for (uint32_t m = 2; m < 2; m++) {
15513 for (uint32_t n = 1; n < 10; n += 2) {
15514 for (size_t k = 1; k <= 5; k += 2) {
15515 SpMMMicrokernelTester()
15516 .mr(1)
15517 .nr(1)
15518 .m(m)
15519 .n(n)
15520 .k(k)
15521 .sparsity(0.0f)
15522 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15523 }
15524 }
15525 }
15526 }
15527
TEST(F32_SPMM_MINMAX_1X1__SCALAR,output_stride)15528 TEST(F32_SPMM_MINMAX_1X1__SCALAR, output_stride) {
15529 for (uint32_t n = 1; n < 10; n += 2) {
15530 for (size_t k = 1; k <= 5; k += 2) {
15531 SpMMMicrokernelTester()
15532 .mr(1)
15533 .nr(1)
15534 .m(2)
15535 .n(n)
15536 .k(k)
15537 .output_stride(5)
15538 .sparsity(0.0f)
15539 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15540 }
15541 }
15542 }
15543
TEST(F32_SPMM_MINMAX_1X1__SCALAR,qmin)15544 TEST(F32_SPMM_MINMAX_1X1__SCALAR, qmin) {
15545 for (uint32_t n = 1; n < 10; n += 2) {
15546 for (size_t k = 1; k <= 5; k += 2) {
15547 SpMMMicrokernelTester()
15548 .mr(1)
15549 .nr(1)
15550 .m(2)
15551 .n(n)
15552 .k(k)
15553 .sparsity(0.0f)
15554 .qmin(128)
15555 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15556 }
15557 }
15558 }
15559
TEST(F32_SPMM_MINMAX_1X1__SCALAR,qmax)15560 TEST(F32_SPMM_MINMAX_1X1__SCALAR, qmax) {
15561 for (uint32_t n = 1; n < 10; n += 2) {
15562 for (size_t k = 1; k <= 5; k += 2) {
15563 SpMMMicrokernelTester()
15564 .mr(1)
15565 .nr(1)
15566 .m(2)
15567 .n(n)
15568 .k(k)
15569 .sparsity(0.0f)
15570 .qmax(128)
15571 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15572 }
15573 }
15574 }
15575
TEST(F32_SPMM_MINMAX_1X1__SCALAR,half_sparse)15576 TEST(F32_SPMM_MINMAX_1X1__SCALAR, half_sparse) {
15577 for (uint32_t n = 1; n < 10; n += 2) {
15578 for (size_t k = 1; k <= 5; k += 2) {
15579 SpMMMicrokernelTester()
15580 .mr(1)
15581 .nr(1)
15582 .m(2)
15583 .n(n)
15584 .k(k)
15585 .sparsity(0.5f)
15586 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15587 }
15588 }
15589 }
15590
TEST(F32_SPMM_MINMAX_1X1__SCALAR,zero_weights)15591 TEST(F32_SPMM_MINMAX_1X1__SCALAR, zero_weights) {
15592 for (uint32_t n = 1; n < 10; n += 2) {
15593 for (size_t k = 1; k <= 5; k += 2) {
15594 SpMMMicrokernelTester()
15595 .mr(1)
15596 .nr(1)
15597 .m(2)
15598 .n(n)
15599 .k(k)
15600 .sparsity(1.0f)
15601 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, xnn_init_f32_minmax_scalar_params);
15602 }
15603 }
15604 }
15605
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,k_eq_1)15606 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, k_eq_1) {
15607 SpMMMicrokernelTester()
15608 .mr(1)
15609 .nr(1)
15610 .m(1)
15611 .n(1)
15612 .k(1)
15613 .sparsity(0.0f)
15614 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15615 }
15616
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,k_gt_1)15617 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, k_gt_1) {
15618 for (size_t k = 2; k < 10; k++) {
15619 SpMMMicrokernelTester()
15620 .mr(1)
15621 .nr(1)
15622 .m(1)
15623 .n(1)
15624 .k(k)
15625 .sparsity(0.0f)
15626 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15627 }
15628 }
15629
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,n_gt_1)15630 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, n_gt_1) {
15631 for (uint32_t n = 2; n < 10; n++) {
15632 for (size_t k = 1; k <= 5; k += 2) {
15633 SpMMMicrokernelTester()
15634 .mr(1)
15635 .nr(1)
15636 .m(1)
15637 .n(n)
15638 .k(k)
15639 .sparsity(0.0f)
15640 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15641 }
15642 }
15643 }
15644
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,m_lt_1)15645 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, m_lt_1) {
15646 for (uint32_t m = 1; m < 1; m++) {
15647 for (uint32_t n = 1; n < 10; n += 2) {
15648 for (size_t k = 1; k <= 5; k += 2) {
15649 SpMMMicrokernelTester()
15650 .mr(1)
15651 .nr(1)
15652 .m(m)
15653 .n(n)
15654 .k(k)
15655 .sparsity(0.0f)
15656 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15657 }
15658 }
15659 }
15660 }
15661
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,m_div_1)15662 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, m_div_1) {
15663 for (uint32_t m = 2; m <= 3; m += 1) {
15664 for (uint32_t n = 1; n < 10; n += 2) {
15665 for (size_t k = 1; k <= 5; k += 2) {
15666 SpMMMicrokernelTester()
15667 .mr(1)
15668 .nr(1)
15669 .m(m)
15670 .n(n)
15671 .k(k)
15672 .sparsity(0.0f)
15673 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15674 }
15675 }
15676 }
15677 }
15678
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,m_gt_1)15679 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, m_gt_1) {
15680 for (uint32_t m = 2; m < 2; m++) {
15681 for (uint32_t n = 1; n < 10; n += 2) {
15682 for (size_t k = 1; k <= 5; k += 2) {
15683 SpMMMicrokernelTester()
15684 .mr(1)
15685 .nr(1)
15686 .m(m)
15687 .n(n)
15688 .k(k)
15689 .sparsity(0.0f)
15690 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15691 }
15692 }
15693 }
15694 }
15695
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,output_stride)15696 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, output_stride) {
15697 for (uint32_t n = 1; n < 10; n += 2) {
15698 for (size_t k = 1; k <= 5; k += 2) {
15699 SpMMMicrokernelTester()
15700 .mr(1)
15701 .nr(1)
15702 .m(2)
15703 .n(n)
15704 .k(k)
15705 .output_stride(5)
15706 .sparsity(0.0f)
15707 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15708 }
15709 }
15710 }
15711
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,qmin)15712 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, qmin) {
15713 for (uint32_t n = 1; n < 10; n += 2) {
15714 for (size_t k = 1; k <= 5; k += 2) {
15715 SpMMMicrokernelTester()
15716 .mr(1)
15717 .nr(1)
15718 .m(2)
15719 .n(n)
15720 .k(k)
15721 .sparsity(0.0f)
15722 .qmin(128)
15723 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15724 }
15725 }
15726 }
15727
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,qmax)15728 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, qmax) {
15729 for (uint32_t n = 1; n < 10; n += 2) {
15730 for (size_t k = 1; k <= 5; k += 2) {
15731 SpMMMicrokernelTester()
15732 .mr(1)
15733 .nr(1)
15734 .m(2)
15735 .n(n)
15736 .k(k)
15737 .sparsity(0.0f)
15738 .qmax(128)
15739 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15740 }
15741 }
15742 }
15743
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,half_sparse)15744 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, half_sparse) {
15745 for (uint32_t n = 1; n < 10; n += 2) {
15746 for (size_t k = 1; k <= 5; k += 2) {
15747 SpMMMicrokernelTester()
15748 .mr(1)
15749 .nr(1)
15750 .m(2)
15751 .n(n)
15752 .k(k)
15753 .sparsity(0.5f)
15754 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15755 }
15756 }
15757 }
15758
TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED,zero_weights)15759 TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, zero_weights) {
15760 for (uint32_t n = 1; n < 10; n += 2) {
15761 for (size_t k = 1; k <= 5; k += 2) {
15762 SpMMMicrokernelTester()
15763 .mr(1)
15764 .nr(1)
15765 .m(2)
15766 .n(n)
15767 .k(k)
15768 .sparsity(1.0f)
15769 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15770 }
15771 }
15772 }
15773
TEST(F32_SPMM_MINMAX_2X1__SCALAR,k_eq_1)15774 TEST(F32_SPMM_MINMAX_2X1__SCALAR, k_eq_1) {
15775 SpMMMicrokernelTester()
15776 .mr(2)
15777 .nr(1)
15778 .m(2)
15779 .n(1)
15780 .k(1)
15781 .sparsity(0.0f)
15782 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15783 }
15784
TEST(F32_SPMM_MINMAX_2X1__SCALAR,k_gt_1)15785 TEST(F32_SPMM_MINMAX_2X1__SCALAR, k_gt_1) {
15786 for (size_t k = 2; k < 10; k++) {
15787 SpMMMicrokernelTester()
15788 .mr(2)
15789 .nr(1)
15790 .m(2)
15791 .n(1)
15792 .k(k)
15793 .sparsity(0.0f)
15794 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15795 }
15796 }
15797
TEST(F32_SPMM_MINMAX_2X1__SCALAR,n_gt_1)15798 TEST(F32_SPMM_MINMAX_2X1__SCALAR, n_gt_1) {
15799 for (uint32_t n = 2; n < 10; n++) {
15800 for (size_t k = 1; k <= 5; k += 2) {
15801 SpMMMicrokernelTester()
15802 .mr(2)
15803 .nr(1)
15804 .m(2)
15805 .n(n)
15806 .k(k)
15807 .sparsity(0.0f)
15808 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15809 }
15810 }
15811 }
15812
TEST(F32_SPMM_MINMAX_2X1__SCALAR,m_lt_2)15813 TEST(F32_SPMM_MINMAX_2X1__SCALAR, m_lt_2) {
15814 for (uint32_t m = 1; m < 2; m++) {
15815 for (uint32_t n = 1; n < 10; n += 2) {
15816 for (size_t k = 1; k <= 5; k += 2) {
15817 SpMMMicrokernelTester()
15818 .mr(2)
15819 .nr(1)
15820 .m(m)
15821 .n(n)
15822 .k(k)
15823 .sparsity(0.0f)
15824 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15825 }
15826 }
15827 }
15828 }
15829
TEST(F32_SPMM_MINMAX_2X1__SCALAR,m_div_2)15830 TEST(F32_SPMM_MINMAX_2X1__SCALAR, m_div_2) {
15831 for (uint32_t m = 4; m <= 6; m += 2) {
15832 for (uint32_t n = 1; n < 10; n += 2) {
15833 for (size_t k = 1; k <= 5; k += 2) {
15834 SpMMMicrokernelTester()
15835 .mr(2)
15836 .nr(1)
15837 .m(m)
15838 .n(n)
15839 .k(k)
15840 .sparsity(0.0f)
15841 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15842 }
15843 }
15844 }
15845 }
15846
TEST(F32_SPMM_MINMAX_2X1__SCALAR,m_gt_2)15847 TEST(F32_SPMM_MINMAX_2X1__SCALAR, m_gt_2) {
15848 for (uint32_t m = 3; m < 4; m++) {
15849 for (uint32_t n = 1; n < 10; n += 2) {
15850 for (size_t k = 1; k <= 5; k += 2) {
15851 SpMMMicrokernelTester()
15852 .mr(2)
15853 .nr(1)
15854 .m(m)
15855 .n(n)
15856 .k(k)
15857 .sparsity(0.0f)
15858 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15859 }
15860 }
15861 }
15862 }
15863
TEST(F32_SPMM_MINMAX_2X1__SCALAR,output_stride)15864 TEST(F32_SPMM_MINMAX_2X1__SCALAR, output_stride) {
15865 for (uint32_t n = 1; n < 10; n += 2) {
15866 for (size_t k = 1; k <= 5; k += 2) {
15867 SpMMMicrokernelTester()
15868 .mr(2)
15869 .nr(1)
15870 .m(4)
15871 .n(n)
15872 .k(k)
15873 .output_stride(7)
15874 .sparsity(0.0f)
15875 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15876 }
15877 }
15878 }
15879
TEST(F32_SPMM_MINMAX_2X1__SCALAR,qmin)15880 TEST(F32_SPMM_MINMAX_2X1__SCALAR, qmin) {
15881 for (uint32_t n = 1; n < 10; n += 2) {
15882 for (size_t k = 1; k <= 5; k += 2) {
15883 SpMMMicrokernelTester()
15884 .mr(2)
15885 .nr(1)
15886 .m(4)
15887 .n(n)
15888 .k(k)
15889 .sparsity(0.0f)
15890 .qmin(128)
15891 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15892 }
15893 }
15894 }
15895
TEST(F32_SPMM_MINMAX_2X1__SCALAR,qmax)15896 TEST(F32_SPMM_MINMAX_2X1__SCALAR, qmax) {
15897 for (uint32_t n = 1; n < 10; n += 2) {
15898 for (size_t k = 1; k <= 5; k += 2) {
15899 SpMMMicrokernelTester()
15900 .mr(2)
15901 .nr(1)
15902 .m(4)
15903 .n(n)
15904 .k(k)
15905 .sparsity(0.0f)
15906 .qmax(128)
15907 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15908 }
15909 }
15910 }
15911
TEST(F32_SPMM_MINMAX_2X1__SCALAR,half_sparse)15912 TEST(F32_SPMM_MINMAX_2X1__SCALAR, half_sparse) {
15913 for (uint32_t n = 1; n < 10; n += 2) {
15914 for (size_t k = 1; k <= 5; k += 2) {
15915 SpMMMicrokernelTester()
15916 .mr(2)
15917 .nr(1)
15918 .m(4)
15919 .n(n)
15920 .k(k)
15921 .sparsity(0.5f)
15922 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15923 }
15924 }
15925 }
15926
TEST(F32_SPMM_MINMAX_2X1__SCALAR,zero_weights)15927 TEST(F32_SPMM_MINMAX_2X1__SCALAR, zero_weights) {
15928 for (uint32_t n = 1; n < 10; n += 2) {
15929 for (size_t k = 1; k <= 5; k += 2) {
15930 SpMMMicrokernelTester()
15931 .mr(2)
15932 .nr(1)
15933 .m(4)
15934 .n(n)
15935 .k(k)
15936 .sparsity(1.0f)
15937 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, xnn_init_f32_minmax_scalar_params);
15938 }
15939 }
15940 }
15941
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,k_eq_1)15942 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, k_eq_1) {
15943 SpMMMicrokernelTester()
15944 .mr(2)
15945 .nr(1)
15946 .m(2)
15947 .n(1)
15948 .k(1)
15949 .sparsity(0.0f)
15950 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15951 }
15952
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,k_gt_1)15953 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, k_gt_1) {
15954 for (size_t k = 2; k < 10; k++) {
15955 SpMMMicrokernelTester()
15956 .mr(2)
15957 .nr(1)
15958 .m(2)
15959 .n(1)
15960 .k(k)
15961 .sparsity(0.0f)
15962 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15963 }
15964 }
15965
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,n_gt_1)15966 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, n_gt_1) {
15967 for (uint32_t n = 2; n < 10; n++) {
15968 for (size_t k = 1; k <= 5; k += 2) {
15969 SpMMMicrokernelTester()
15970 .mr(2)
15971 .nr(1)
15972 .m(2)
15973 .n(n)
15974 .k(k)
15975 .sparsity(0.0f)
15976 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15977 }
15978 }
15979 }
15980
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,m_lt_2)15981 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, m_lt_2) {
15982 for (uint32_t m = 1; m < 2; m++) {
15983 for (uint32_t n = 1; n < 10; n += 2) {
15984 for (size_t k = 1; k <= 5; k += 2) {
15985 SpMMMicrokernelTester()
15986 .mr(2)
15987 .nr(1)
15988 .m(m)
15989 .n(n)
15990 .k(k)
15991 .sparsity(0.0f)
15992 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
15993 }
15994 }
15995 }
15996 }
15997
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,m_div_2)15998 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, m_div_2) {
15999 for (uint32_t m = 4; m <= 6; m += 2) {
16000 for (uint32_t n = 1; n < 10; n += 2) {
16001 for (size_t k = 1; k <= 5; k += 2) {
16002 SpMMMicrokernelTester()
16003 .mr(2)
16004 .nr(1)
16005 .m(m)
16006 .n(n)
16007 .k(k)
16008 .sparsity(0.0f)
16009 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16010 }
16011 }
16012 }
16013 }
16014
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,m_gt_2)16015 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, m_gt_2) {
16016 for (uint32_t m = 3; m < 4; m++) {
16017 for (uint32_t n = 1; n < 10; n += 2) {
16018 for (size_t k = 1; k <= 5; k += 2) {
16019 SpMMMicrokernelTester()
16020 .mr(2)
16021 .nr(1)
16022 .m(m)
16023 .n(n)
16024 .k(k)
16025 .sparsity(0.0f)
16026 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16027 }
16028 }
16029 }
16030 }
16031
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,output_stride)16032 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, output_stride) {
16033 for (uint32_t n = 1; n < 10; n += 2) {
16034 for (size_t k = 1; k <= 5; k += 2) {
16035 SpMMMicrokernelTester()
16036 .mr(2)
16037 .nr(1)
16038 .m(4)
16039 .n(n)
16040 .k(k)
16041 .output_stride(7)
16042 .sparsity(0.0f)
16043 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16044 }
16045 }
16046 }
16047
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,qmin)16048 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, qmin) {
16049 for (uint32_t n = 1; n < 10; n += 2) {
16050 for (size_t k = 1; k <= 5; k += 2) {
16051 SpMMMicrokernelTester()
16052 .mr(2)
16053 .nr(1)
16054 .m(4)
16055 .n(n)
16056 .k(k)
16057 .sparsity(0.0f)
16058 .qmin(128)
16059 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16060 }
16061 }
16062 }
16063
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,qmax)16064 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, qmax) {
16065 for (uint32_t n = 1; n < 10; n += 2) {
16066 for (size_t k = 1; k <= 5; k += 2) {
16067 SpMMMicrokernelTester()
16068 .mr(2)
16069 .nr(1)
16070 .m(4)
16071 .n(n)
16072 .k(k)
16073 .sparsity(0.0f)
16074 .qmax(128)
16075 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16076 }
16077 }
16078 }
16079
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,half_sparse)16080 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, half_sparse) {
16081 for (uint32_t n = 1; n < 10; n += 2) {
16082 for (size_t k = 1; k <= 5; k += 2) {
16083 SpMMMicrokernelTester()
16084 .mr(2)
16085 .nr(1)
16086 .m(4)
16087 .n(n)
16088 .k(k)
16089 .sparsity(0.5f)
16090 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16091 }
16092 }
16093 }
16094
TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED,zero_weights)16095 TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, zero_weights) {
16096 for (uint32_t n = 1; n < 10; n += 2) {
16097 for (size_t k = 1; k <= 5; k += 2) {
16098 SpMMMicrokernelTester()
16099 .mr(2)
16100 .nr(1)
16101 .m(4)
16102 .n(n)
16103 .k(k)
16104 .sparsity(1.0f)
16105 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16106 }
16107 }
16108 }
16109
TEST(F32_SPMM_MINMAX_4X1__SCALAR,k_eq_1)16110 TEST(F32_SPMM_MINMAX_4X1__SCALAR, k_eq_1) {
16111 SpMMMicrokernelTester()
16112 .mr(4)
16113 .nr(1)
16114 .m(4)
16115 .n(1)
16116 .k(1)
16117 .sparsity(0.0f)
16118 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16119 }
16120
TEST(F32_SPMM_MINMAX_4X1__SCALAR,k_gt_1)16121 TEST(F32_SPMM_MINMAX_4X1__SCALAR, k_gt_1) {
16122 for (size_t k = 2; k < 10; k++) {
16123 SpMMMicrokernelTester()
16124 .mr(4)
16125 .nr(1)
16126 .m(4)
16127 .n(1)
16128 .k(k)
16129 .sparsity(0.0f)
16130 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16131 }
16132 }
16133
TEST(F32_SPMM_MINMAX_4X1__SCALAR,n_gt_1)16134 TEST(F32_SPMM_MINMAX_4X1__SCALAR, n_gt_1) {
16135 for (uint32_t n = 2; n < 10; n++) {
16136 for (size_t k = 1; k <= 5; k += 2) {
16137 SpMMMicrokernelTester()
16138 .mr(4)
16139 .nr(1)
16140 .m(4)
16141 .n(n)
16142 .k(k)
16143 .sparsity(0.0f)
16144 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16145 }
16146 }
16147 }
16148
TEST(F32_SPMM_MINMAX_4X1__SCALAR,m_lt_4)16149 TEST(F32_SPMM_MINMAX_4X1__SCALAR, m_lt_4) {
16150 for (uint32_t m = 1; m < 4; m++) {
16151 for (uint32_t n = 1; n < 10; n += 2) {
16152 for (size_t k = 1; k <= 5; k += 2) {
16153 SpMMMicrokernelTester()
16154 .mr(4)
16155 .nr(1)
16156 .m(m)
16157 .n(n)
16158 .k(k)
16159 .sparsity(0.0f)
16160 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16161 }
16162 }
16163 }
16164 }
16165
TEST(F32_SPMM_MINMAX_4X1__SCALAR,m_div_4)16166 TEST(F32_SPMM_MINMAX_4X1__SCALAR, m_div_4) {
16167 for (uint32_t m = 8; m <= 12; m += 4) {
16168 for (uint32_t n = 1; n < 10; n += 2) {
16169 for (size_t k = 1; k <= 5; k += 2) {
16170 SpMMMicrokernelTester()
16171 .mr(4)
16172 .nr(1)
16173 .m(m)
16174 .n(n)
16175 .k(k)
16176 .sparsity(0.0f)
16177 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16178 }
16179 }
16180 }
16181 }
16182
TEST(F32_SPMM_MINMAX_4X1__SCALAR,m_gt_4)16183 TEST(F32_SPMM_MINMAX_4X1__SCALAR, m_gt_4) {
16184 for (uint32_t m = 5; m < 8; m++) {
16185 for (uint32_t n = 1; n < 10; n += 2) {
16186 for (size_t k = 1; k <= 5; k += 2) {
16187 SpMMMicrokernelTester()
16188 .mr(4)
16189 .nr(1)
16190 .m(m)
16191 .n(n)
16192 .k(k)
16193 .sparsity(0.0f)
16194 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16195 }
16196 }
16197 }
16198 }
16199
TEST(F32_SPMM_MINMAX_4X1__SCALAR,output_stride)16200 TEST(F32_SPMM_MINMAX_4X1__SCALAR, output_stride) {
16201 for (uint32_t n = 1; n < 10; n += 2) {
16202 for (size_t k = 1; k <= 5; k += 2) {
16203 SpMMMicrokernelTester()
16204 .mr(4)
16205 .nr(1)
16206 .m(8)
16207 .n(n)
16208 .k(k)
16209 .output_stride(11)
16210 .sparsity(0.0f)
16211 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16212 }
16213 }
16214 }
16215
TEST(F32_SPMM_MINMAX_4X1__SCALAR,qmin)16216 TEST(F32_SPMM_MINMAX_4X1__SCALAR, qmin) {
16217 for (uint32_t n = 1; n < 10; n += 2) {
16218 for (size_t k = 1; k <= 5; k += 2) {
16219 SpMMMicrokernelTester()
16220 .mr(4)
16221 .nr(1)
16222 .m(8)
16223 .n(n)
16224 .k(k)
16225 .sparsity(0.0f)
16226 .qmin(128)
16227 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16228 }
16229 }
16230 }
16231
TEST(F32_SPMM_MINMAX_4X1__SCALAR,qmax)16232 TEST(F32_SPMM_MINMAX_4X1__SCALAR, qmax) {
16233 for (uint32_t n = 1; n < 10; n += 2) {
16234 for (size_t k = 1; k <= 5; k += 2) {
16235 SpMMMicrokernelTester()
16236 .mr(4)
16237 .nr(1)
16238 .m(8)
16239 .n(n)
16240 .k(k)
16241 .sparsity(0.0f)
16242 .qmax(128)
16243 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16244 }
16245 }
16246 }
16247
TEST(F32_SPMM_MINMAX_4X1__SCALAR,half_sparse)16248 TEST(F32_SPMM_MINMAX_4X1__SCALAR, half_sparse) {
16249 for (uint32_t n = 1; n < 10; n += 2) {
16250 for (size_t k = 1; k <= 5; k += 2) {
16251 SpMMMicrokernelTester()
16252 .mr(4)
16253 .nr(1)
16254 .m(8)
16255 .n(n)
16256 .k(k)
16257 .sparsity(0.5f)
16258 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16259 }
16260 }
16261 }
16262
TEST(F32_SPMM_MINMAX_4X1__SCALAR,zero_weights)16263 TEST(F32_SPMM_MINMAX_4X1__SCALAR, zero_weights) {
16264 for (uint32_t n = 1; n < 10; n += 2) {
16265 for (size_t k = 1; k <= 5; k += 2) {
16266 SpMMMicrokernelTester()
16267 .mr(4)
16268 .nr(1)
16269 .m(8)
16270 .n(n)
16271 .k(k)
16272 .sparsity(1.0f)
16273 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, xnn_init_f32_minmax_scalar_params);
16274 }
16275 }
16276 }
16277
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,k_eq_1)16278 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, k_eq_1) {
16279 SpMMMicrokernelTester()
16280 .mr(4)
16281 .nr(1)
16282 .m(4)
16283 .n(1)
16284 .k(1)
16285 .sparsity(0.0f)
16286 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16287 }
16288
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,k_gt_1)16289 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, k_gt_1) {
16290 for (size_t k = 2; k < 10; k++) {
16291 SpMMMicrokernelTester()
16292 .mr(4)
16293 .nr(1)
16294 .m(4)
16295 .n(1)
16296 .k(k)
16297 .sparsity(0.0f)
16298 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16299 }
16300 }
16301
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,n_gt_1)16302 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, n_gt_1) {
16303 for (uint32_t n = 2; n < 10; n++) {
16304 for (size_t k = 1; k <= 5; k += 2) {
16305 SpMMMicrokernelTester()
16306 .mr(4)
16307 .nr(1)
16308 .m(4)
16309 .n(n)
16310 .k(k)
16311 .sparsity(0.0f)
16312 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16313 }
16314 }
16315 }
16316
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,m_lt_4)16317 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, m_lt_4) {
16318 for (uint32_t m = 1; m < 4; m++) {
16319 for (uint32_t n = 1; n < 10; n += 2) {
16320 for (size_t k = 1; k <= 5; k += 2) {
16321 SpMMMicrokernelTester()
16322 .mr(4)
16323 .nr(1)
16324 .m(m)
16325 .n(n)
16326 .k(k)
16327 .sparsity(0.0f)
16328 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16329 }
16330 }
16331 }
16332 }
16333
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,m_div_4)16334 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, m_div_4) {
16335 for (uint32_t m = 8; m <= 12; m += 4) {
16336 for (uint32_t n = 1; n < 10; n += 2) {
16337 for (size_t k = 1; k <= 5; k += 2) {
16338 SpMMMicrokernelTester()
16339 .mr(4)
16340 .nr(1)
16341 .m(m)
16342 .n(n)
16343 .k(k)
16344 .sparsity(0.0f)
16345 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16346 }
16347 }
16348 }
16349 }
16350
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,m_gt_4)16351 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, m_gt_4) {
16352 for (uint32_t m = 5; m < 8; m++) {
16353 for (uint32_t n = 1; n < 10; n += 2) {
16354 for (size_t k = 1; k <= 5; k += 2) {
16355 SpMMMicrokernelTester()
16356 .mr(4)
16357 .nr(1)
16358 .m(m)
16359 .n(n)
16360 .k(k)
16361 .sparsity(0.0f)
16362 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16363 }
16364 }
16365 }
16366 }
16367
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,output_stride)16368 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, output_stride) {
16369 for (uint32_t n = 1; n < 10; n += 2) {
16370 for (size_t k = 1; k <= 5; k += 2) {
16371 SpMMMicrokernelTester()
16372 .mr(4)
16373 .nr(1)
16374 .m(8)
16375 .n(n)
16376 .k(k)
16377 .output_stride(11)
16378 .sparsity(0.0f)
16379 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16380 }
16381 }
16382 }
16383
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,qmin)16384 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, qmin) {
16385 for (uint32_t n = 1; n < 10; n += 2) {
16386 for (size_t k = 1; k <= 5; k += 2) {
16387 SpMMMicrokernelTester()
16388 .mr(4)
16389 .nr(1)
16390 .m(8)
16391 .n(n)
16392 .k(k)
16393 .sparsity(0.0f)
16394 .qmin(128)
16395 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16396 }
16397 }
16398 }
16399
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,qmax)16400 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, qmax) {
16401 for (uint32_t n = 1; n < 10; n += 2) {
16402 for (size_t k = 1; k <= 5; k += 2) {
16403 SpMMMicrokernelTester()
16404 .mr(4)
16405 .nr(1)
16406 .m(8)
16407 .n(n)
16408 .k(k)
16409 .sparsity(0.0f)
16410 .qmax(128)
16411 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16412 }
16413 }
16414 }
16415
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,half_sparse)16416 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, half_sparse) {
16417 for (uint32_t n = 1; n < 10; n += 2) {
16418 for (size_t k = 1; k <= 5; k += 2) {
16419 SpMMMicrokernelTester()
16420 .mr(4)
16421 .nr(1)
16422 .m(8)
16423 .n(n)
16424 .k(k)
16425 .sparsity(0.5f)
16426 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16427 }
16428 }
16429 }
16430
TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED,zero_weights)16431 TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, zero_weights) {
16432 for (uint32_t n = 1; n < 10; n += 2) {
16433 for (size_t k = 1; k <= 5; k += 2) {
16434 SpMMMicrokernelTester()
16435 .mr(4)
16436 .nr(1)
16437 .m(8)
16438 .n(n)
16439 .k(k)
16440 .sparsity(1.0f)
16441 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16442 }
16443 }
16444 }
16445
TEST(F32_SPMM_MINMAX_8X1__SCALAR,k_eq_1)16446 TEST(F32_SPMM_MINMAX_8X1__SCALAR, k_eq_1) {
16447 SpMMMicrokernelTester()
16448 .mr(8)
16449 .nr(1)
16450 .m(8)
16451 .n(1)
16452 .k(1)
16453 .sparsity(0.0f)
16454 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16455 }
16456
TEST(F32_SPMM_MINMAX_8X1__SCALAR,k_gt_1)16457 TEST(F32_SPMM_MINMAX_8X1__SCALAR, k_gt_1) {
16458 for (size_t k = 2; k < 10; k++) {
16459 SpMMMicrokernelTester()
16460 .mr(8)
16461 .nr(1)
16462 .m(8)
16463 .n(1)
16464 .k(k)
16465 .sparsity(0.0f)
16466 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16467 }
16468 }
16469
TEST(F32_SPMM_MINMAX_8X1__SCALAR,n_gt_1)16470 TEST(F32_SPMM_MINMAX_8X1__SCALAR, n_gt_1) {
16471 for (uint32_t n = 2; n < 10; n++) {
16472 for (size_t k = 1; k <= 5; k += 2) {
16473 SpMMMicrokernelTester()
16474 .mr(8)
16475 .nr(1)
16476 .m(8)
16477 .n(n)
16478 .k(k)
16479 .sparsity(0.0f)
16480 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16481 }
16482 }
16483 }
16484
TEST(F32_SPMM_MINMAX_8X1__SCALAR,m_lt_8)16485 TEST(F32_SPMM_MINMAX_8X1__SCALAR, m_lt_8) {
16486 for (uint32_t m = 1; m < 8; m++) {
16487 for (uint32_t n = 1; n < 10; n += 2) {
16488 for (size_t k = 1; k <= 5; k += 2) {
16489 SpMMMicrokernelTester()
16490 .mr(8)
16491 .nr(1)
16492 .m(m)
16493 .n(n)
16494 .k(k)
16495 .sparsity(0.0f)
16496 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16497 }
16498 }
16499 }
16500 }
16501
TEST(F32_SPMM_MINMAX_8X1__SCALAR,m_div_8)16502 TEST(F32_SPMM_MINMAX_8X1__SCALAR, m_div_8) {
16503 for (uint32_t m = 16; m <= 24; m += 8) {
16504 for (uint32_t n = 1; n < 10; n += 2) {
16505 for (size_t k = 1; k <= 5; k += 2) {
16506 SpMMMicrokernelTester()
16507 .mr(8)
16508 .nr(1)
16509 .m(m)
16510 .n(n)
16511 .k(k)
16512 .sparsity(0.0f)
16513 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16514 }
16515 }
16516 }
16517 }
16518
TEST(F32_SPMM_MINMAX_8X1__SCALAR,m_gt_8)16519 TEST(F32_SPMM_MINMAX_8X1__SCALAR, m_gt_8) {
16520 for (uint32_t m = 9; m < 16; m++) {
16521 for (uint32_t n = 1; n < 10; n += 2) {
16522 for (size_t k = 1; k <= 5; k += 2) {
16523 SpMMMicrokernelTester()
16524 .mr(8)
16525 .nr(1)
16526 .m(m)
16527 .n(n)
16528 .k(k)
16529 .sparsity(0.0f)
16530 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16531 }
16532 }
16533 }
16534 }
16535
TEST(F32_SPMM_MINMAX_8X1__SCALAR,output_stride)16536 TEST(F32_SPMM_MINMAX_8X1__SCALAR, output_stride) {
16537 for (uint32_t n = 1; n < 10; n += 2) {
16538 for (size_t k = 1; k <= 5; k += 2) {
16539 SpMMMicrokernelTester()
16540 .mr(8)
16541 .nr(1)
16542 .m(16)
16543 .n(n)
16544 .k(k)
16545 .output_stride(19)
16546 .sparsity(0.0f)
16547 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16548 }
16549 }
16550 }
16551
TEST(F32_SPMM_MINMAX_8X1__SCALAR,qmin)16552 TEST(F32_SPMM_MINMAX_8X1__SCALAR, qmin) {
16553 for (uint32_t n = 1; n < 10; n += 2) {
16554 for (size_t k = 1; k <= 5; k += 2) {
16555 SpMMMicrokernelTester()
16556 .mr(8)
16557 .nr(1)
16558 .m(16)
16559 .n(n)
16560 .k(k)
16561 .sparsity(0.0f)
16562 .qmin(128)
16563 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16564 }
16565 }
16566 }
16567
TEST(F32_SPMM_MINMAX_8X1__SCALAR,qmax)16568 TEST(F32_SPMM_MINMAX_8X1__SCALAR, qmax) {
16569 for (uint32_t n = 1; n < 10; n += 2) {
16570 for (size_t k = 1; k <= 5; k += 2) {
16571 SpMMMicrokernelTester()
16572 .mr(8)
16573 .nr(1)
16574 .m(16)
16575 .n(n)
16576 .k(k)
16577 .sparsity(0.0f)
16578 .qmax(128)
16579 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16580 }
16581 }
16582 }
16583
TEST(F32_SPMM_MINMAX_8X1__SCALAR,half_sparse)16584 TEST(F32_SPMM_MINMAX_8X1__SCALAR, half_sparse) {
16585 for (uint32_t n = 1; n < 10; n += 2) {
16586 for (size_t k = 1; k <= 5; k += 2) {
16587 SpMMMicrokernelTester()
16588 .mr(8)
16589 .nr(1)
16590 .m(16)
16591 .n(n)
16592 .k(k)
16593 .sparsity(0.5f)
16594 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16595 }
16596 }
16597 }
16598
TEST(F32_SPMM_MINMAX_8X1__SCALAR,zero_weights)16599 TEST(F32_SPMM_MINMAX_8X1__SCALAR, zero_weights) {
16600 for (uint32_t n = 1; n < 10; n += 2) {
16601 for (size_t k = 1; k <= 5; k += 2) {
16602 SpMMMicrokernelTester()
16603 .mr(8)
16604 .nr(1)
16605 .m(16)
16606 .n(n)
16607 .k(k)
16608 .sparsity(1.0f)
16609 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, xnn_init_f32_minmax_scalar_params);
16610 }
16611 }
16612 }
16613
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,k_eq_1)16614 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, k_eq_1) {
16615 SpMMMicrokernelTester()
16616 .mr(8)
16617 .nr(1)
16618 .m(8)
16619 .n(1)
16620 .k(1)
16621 .sparsity(0.0f)
16622 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16623 }
16624
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,k_gt_1)16625 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, k_gt_1) {
16626 for (size_t k = 2; k < 10; k++) {
16627 SpMMMicrokernelTester()
16628 .mr(8)
16629 .nr(1)
16630 .m(8)
16631 .n(1)
16632 .k(k)
16633 .sparsity(0.0f)
16634 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16635 }
16636 }
16637
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,n_gt_1)16638 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, n_gt_1) {
16639 for (uint32_t n = 2; n < 10; n++) {
16640 for (size_t k = 1; k <= 5; k += 2) {
16641 SpMMMicrokernelTester()
16642 .mr(8)
16643 .nr(1)
16644 .m(8)
16645 .n(n)
16646 .k(k)
16647 .sparsity(0.0f)
16648 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16649 }
16650 }
16651 }
16652
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,m_lt_8)16653 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, m_lt_8) {
16654 for (uint32_t m = 1; m < 8; m++) {
16655 for (uint32_t n = 1; n < 10; n += 2) {
16656 for (size_t k = 1; k <= 5; k += 2) {
16657 SpMMMicrokernelTester()
16658 .mr(8)
16659 .nr(1)
16660 .m(m)
16661 .n(n)
16662 .k(k)
16663 .sparsity(0.0f)
16664 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16665 }
16666 }
16667 }
16668 }
16669
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,m_div_8)16670 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, m_div_8) {
16671 for (uint32_t m = 16; m <= 24; m += 8) {
16672 for (uint32_t n = 1; n < 10; n += 2) {
16673 for (size_t k = 1; k <= 5; k += 2) {
16674 SpMMMicrokernelTester()
16675 .mr(8)
16676 .nr(1)
16677 .m(m)
16678 .n(n)
16679 .k(k)
16680 .sparsity(0.0f)
16681 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16682 }
16683 }
16684 }
16685 }
16686
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,m_gt_8)16687 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, m_gt_8) {
16688 for (uint32_t m = 9; m < 16; m++) {
16689 for (uint32_t n = 1; n < 10; n += 2) {
16690 for (size_t k = 1; k <= 5; k += 2) {
16691 SpMMMicrokernelTester()
16692 .mr(8)
16693 .nr(1)
16694 .m(m)
16695 .n(n)
16696 .k(k)
16697 .sparsity(0.0f)
16698 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16699 }
16700 }
16701 }
16702 }
16703
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,output_stride)16704 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, output_stride) {
16705 for (uint32_t n = 1; n < 10; n += 2) {
16706 for (size_t k = 1; k <= 5; k += 2) {
16707 SpMMMicrokernelTester()
16708 .mr(8)
16709 .nr(1)
16710 .m(16)
16711 .n(n)
16712 .k(k)
16713 .output_stride(19)
16714 .sparsity(0.0f)
16715 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16716 }
16717 }
16718 }
16719
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,qmin)16720 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, qmin) {
16721 for (uint32_t n = 1; n < 10; n += 2) {
16722 for (size_t k = 1; k <= 5; k += 2) {
16723 SpMMMicrokernelTester()
16724 .mr(8)
16725 .nr(1)
16726 .m(16)
16727 .n(n)
16728 .k(k)
16729 .sparsity(0.0f)
16730 .qmin(128)
16731 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16732 }
16733 }
16734 }
16735
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,qmax)16736 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, qmax) {
16737 for (uint32_t n = 1; n < 10; n += 2) {
16738 for (size_t k = 1; k <= 5; k += 2) {
16739 SpMMMicrokernelTester()
16740 .mr(8)
16741 .nr(1)
16742 .m(16)
16743 .n(n)
16744 .k(k)
16745 .sparsity(0.0f)
16746 .qmax(128)
16747 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16748 }
16749 }
16750 }
16751
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,half_sparse)16752 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, half_sparse) {
16753 for (uint32_t n = 1; n < 10; n += 2) {
16754 for (size_t k = 1; k <= 5; k += 2) {
16755 SpMMMicrokernelTester()
16756 .mr(8)
16757 .nr(1)
16758 .m(16)
16759 .n(n)
16760 .k(k)
16761 .sparsity(0.5f)
16762 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16763 }
16764 }
16765 }
16766
TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED,zero_weights)16767 TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, zero_weights) {
16768 for (uint32_t n = 1; n < 10; n += 2) {
16769 for (size_t k = 1; k <= 5; k += 2) {
16770 SpMMMicrokernelTester()
16771 .mr(8)
16772 .nr(1)
16773 .m(16)
16774 .n(n)
16775 .k(k)
16776 .sparsity(1.0f)
16777 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, xnn_init_f32_minmax_scalar_params);
16778 }
16779 }
16780 }
16781
TEST(F32_SPMM_MINMAX_8X2__SCALAR,k_eq_1)16782 TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_eq_1) {
16783 SpMMMicrokernelTester()
16784 .mr(8)
16785 .nr(2)
16786 .m(8)
16787 .n(2)
16788 .k(1)
16789 .sparsity(0.0f)
16790 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16791 }
16792
TEST(F32_SPMM_MINMAX_8X2__SCALAR,k_eq_1_subtile)16793 TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_eq_1_subtile) {
16794 for (uint32_t n = 1; n <= 2; n++) {
16795 SpMMMicrokernelTester()
16796 .mr(8)
16797 .nr(2)
16798 .m(8)
16799 .n(n)
16800 .k(1)
16801 .sparsity(0.0f)
16802 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16803 }
16804 }
16805
TEST(F32_SPMM_MINMAX_8X2__SCALAR,k_gt_1)16806 TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_gt_1) {
16807 for (size_t k = 2; k < 10; k++) {
16808 SpMMMicrokernelTester()
16809 .mr(8)
16810 .nr(2)
16811 .m(8)
16812 .n(2)
16813 .k(k)
16814 .sparsity(0.0f)
16815 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16816 }
16817 }
16818
TEST(F32_SPMM_MINMAX_8X2__SCALAR,k_gt_1_subtile)16819 TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_gt_1_subtile) {
16820 for (size_t k = 2; k < 10; k++) {
16821 for (uint32_t n = 1; n <= 2; n++) {
16822 SpMMMicrokernelTester()
16823 .mr(8)
16824 .nr(2)
16825 .m(8)
16826 .n(n)
16827 .k(k)
16828 .sparsity(0.0f)
16829 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16830 }
16831 }
16832 }
16833
TEST(F32_SPMM_MINMAX_8X2__SCALAR,n_gt_2)16834 TEST(F32_SPMM_MINMAX_8X2__SCALAR, n_gt_2) {
16835 for (uint32_t n = 3; n < 10; n++) {
16836 for (size_t k = 1; k <= 5; k += 2) {
16837 SpMMMicrokernelTester()
16838 .mr(8)
16839 .nr(2)
16840 .m(8)
16841 .n(n)
16842 .k(k)
16843 .sparsity(0.0f)
16844 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16845 }
16846 }
16847 }
16848
TEST(F32_SPMM_MINMAX_8X2__SCALAR,n_div_2)16849 TEST(F32_SPMM_MINMAX_8X2__SCALAR, n_div_2) {
16850 for (uint32_t n = 4; n <= 6; n += 2) {
16851 for (size_t k = 1; k <= 5; k += 2) {
16852 SpMMMicrokernelTester()
16853 .mr(8)
16854 .nr(2)
16855 .m(8)
16856 .n(n)
16857 .k(k)
16858 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16859 }
16860 }
16861 }
16862
TEST(F32_SPMM_MINMAX_8X2__SCALAR,m_lt_8)16863 TEST(F32_SPMM_MINMAX_8X2__SCALAR, m_lt_8) {
16864 for (uint32_t m = 1; m < 8; m++) {
16865 for (uint32_t n = 1; n < 10; n += 3) {
16866 for (size_t k = 1; k <= 5; k += 2) {
16867 SpMMMicrokernelTester()
16868 .mr(8)
16869 .nr(2)
16870 .m(m)
16871 .n(n)
16872 .k(k)
16873 .sparsity(0.0f)
16874 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16875 }
16876 }
16877 }
16878 }
16879
TEST(F32_SPMM_MINMAX_8X2__SCALAR,m_div_8)16880 TEST(F32_SPMM_MINMAX_8X2__SCALAR, m_div_8) {
16881 for (uint32_t m = 16; m <= 24; m += 8) {
16882 for (uint32_t n = 1; n < 10; n += 3) {
16883 for (size_t k = 1; k <= 5; k += 2) {
16884 SpMMMicrokernelTester()
16885 .mr(8)
16886 .nr(2)
16887 .m(m)
16888 .n(n)
16889 .k(k)
16890 .sparsity(0.0f)
16891 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16892 }
16893 }
16894 }
16895 }
16896
TEST(F32_SPMM_MINMAX_8X2__SCALAR,m_gt_8)16897 TEST(F32_SPMM_MINMAX_8X2__SCALAR, m_gt_8) {
16898 for (uint32_t m = 9; m < 16; m++) {
16899 for (uint32_t n = 1; n < 10; n += 3) {
16900 for (size_t k = 1; k <= 5; k += 2) {
16901 SpMMMicrokernelTester()
16902 .mr(8)
16903 .nr(2)
16904 .m(m)
16905 .n(n)
16906 .k(k)
16907 .sparsity(0.0f)
16908 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16909 }
16910 }
16911 }
16912 }
16913
TEST(F32_SPMM_MINMAX_8X2__SCALAR,output_stride)16914 TEST(F32_SPMM_MINMAX_8X2__SCALAR, output_stride) {
16915 for (uint32_t n = 1; n < 10; n += 3) {
16916 for (size_t k = 1; k <= 5; k += 2) {
16917 SpMMMicrokernelTester()
16918 .mr(8)
16919 .nr(2)
16920 .m(16)
16921 .n(n)
16922 .k(k)
16923 .output_stride(19)
16924 .sparsity(0.0f)
16925 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16926 }
16927 }
16928 }
16929
TEST(F32_SPMM_MINMAX_8X2__SCALAR,qmin)16930 TEST(F32_SPMM_MINMAX_8X2__SCALAR, qmin) {
16931 for (uint32_t n = 1; n < 10; n += 3) {
16932 for (size_t k = 1; k <= 5; k += 2) {
16933 SpMMMicrokernelTester()
16934 .mr(8)
16935 .nr(2)
16936 .m(16)
16937 .n(n)
16938 .k(k)
16939 .sparsity(0.0f)
16940 .qmin(128)
16941 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16942 }
16943 }
16944 }
16945
TEST(F32_SPMM_MINMAX_8X2__SCALAR,qmax)16946 TEST(F32_SPMM_MINMAX_8X2__SCALAR, qmax) {
16947 for (uint32_t n = 1; n < 10; n += 3) {
16948 for (size_t k = 1; k <= 5; k += 2) {
16949 SpMMMicrokernelTester()
16950 .mr(8)
16951 .nr(2)
16952 .m(16)
16953 .n(n)
16954 .k(k)
16955 .sparsity(0.0f)
16956 .qmax(128)
16957 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16958 }
16959 }
16960 }
16961
TEST(F32_SPMM_MINMAX_8X2__SCALAR,half_sparse)16962 TEST(F32_SPMM_MINMAX_8X2__SCALAR, half_sparse) {
16963 for (uint32_t n = 1; n < 10; n += 3) {
16964 for (size_t k = 1; k <= 5; k += 2) {
16965 SpMMMicrokernelTester()
16966 .mr(8)
16967 .nr(2)
16968 .m(16)
16969 .n(n)
16970 .k(k)
16971 .sparsity(0.5f)
16972 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16973 }
16974 }
16975 }
16976
TEST(F32_SPMM_MINMAX_8X2__SCALAR,zero_weights)16977 TEST(F32_SPMM_MINMAX_8X2__SCALAR, zero_weights) {
16978 for (uint32_t n = 1; n < 10; n += 3) {
16979 for (size_t k = 1; k <= 5; k += 2) {
16980 SpMMMicrokernelTester()
16981 .mr(8)
16982 .nr(2)
16983 .m(16)
16984 .n(n)
16985 .k(k)
16986 .sparsity(1.0f)
16987 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, xnn_init_f32_minmax_scalar_params);
16988 }
16989 }
16990 }
16991
TEST(F32_SPMM_MINMAX_8X4__SCALAR,k_eq_1)16992 TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_eq_1) {
16993 SpMMMicrokernelTester()
16994 .mr(8)
16995 .nr(4)
16996 .m(8)
16997 .n(4)
16998 .k(1)
16999 .sparsity(0.0f)
17000 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17001 }
17002
TEST(F32_SPMM_MINMAX_8X4__SCALAR,k_eq_1_subtile)17003 TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_eq_1_subtile) {
17004 for (uint32_t n = 1; n <= 4; n++) {
17005 SpMMMicrokernelTester()
17006 .mr(8)
17007 .nr(4)
17008 .m(8)
17009 .n(n)
17010 .k(1)
17011 .sparsity(0.0f)
17012 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17013 }
17014 }
17015
TEST(F32_SPMM_MINMAX_8X4__SCALAR,k_gt_1)17016 TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_gt_1) {
17017 for (size_t k = 2; k < 10; k++) {
17018 SpMMMicrokernelTester()
17019 .mr(8)
17020 .nr(4)
17021 .m(8)
17022 .n(4)
17023 .k(k)
17024 .sparsity(0.0f)
17025 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17026 }
17027 }
17028
TEST(F32_SPMM_MINMAX_8X4__SCALAR,k_gt_1_subtile)17029 TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_gt_1_subtile) {
17030 for (size_t k = 2; k < 10; k++) {
17031 for (uint32_t n = 1; n <= 4; n++) {
17032 SpMMMicrokernelTester()
17033 .mr(8)
17034 .nr(4)
17035 .m(8)
17036 .n(n)
17037 .k(k)
17038 .sparsity(0.0f)
17039 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17040 }
17041 }
17042 }
17043
TEST(F32_SPMM_MINMAX_8X4__SCALAR,n_gt_4)17044 TEST(F32_SPMM_MINMAX_8X4__SCALAR, n_gt_4) {
17045 for (uint32_t n = 5; n < 10; n++) {
17046 for (size_t k = 1; k <= 5; k += 2) {
17047 SpMMMicrokernelTester()
17048 .mr(8)
17049 .nr(4)
17050 .m(8)
17051 .n(n)
17052 .k(k)
17053 .sparsity(0.0f)
17054 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17055 }
17056 }
17057 }
17058
TEST(F32_SPMM_MINMAX_8X4__SCALAR,n_div_4)17059 TEST(F32_SPMM_MINMAX_8X4__SCALAR, n_div_4) {
17060 for (uint32_t n = 8; n <= 12; n += 4) {
17061 for (size_t k = 1; k <= 5; k += 2) {
17062 SpMMMicrokernelTester()
17063 .mr(8)
17064 .nr(4)
17065 .m(8)
17066 .n(n)
17067 .k(k)
17068 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17069 }
17070 }
17071 }
17072
TEST(F32_SPMM_MINMAX_8X4__SCALAR,m_lt_8)17073 TEST(F32_SPMM_MINMAX_8X4__SCALAR, m_lt_8) {
17074 for (uint32_t m = 1; m < 8; m++) {
17075 for (uint32_t n = 1; n < 20; n += 5) {
17076 for (size_t k = 1; k <= 5; k += 2) {
17077 SpMMMicrokernelTester()
17078 .mr(8)
17079 .nr(4)
17080 .m(m)
17081 .n(n)
17082 .k(k)
17083 .sparsity(0.0f)
17084 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17085 }
17086 }
17087 }
17088 }
17089
TEST(F32_SPMM_MINMAX_8X4__SCALAR,m_div_8)17090 TEST(F32_SPMM_MINMAX_8X4__SCALAR, m_div_8) {
17091 for (uint32_t m = 16; m <= 24; m += 8) {
17092 for (uint32_t n = 1; n < 20; n += 5) {
17093 for (size_t k = 1; k <= 5; k += 2) {
17094 SpMMMicrokernelTester()
17095 .mr(8)
17096 .nr(4)
17097 .m(m)
17098 .n(n)
17099 .k(k)
17100 .sparsity(0.0f)
17101 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17102 }
17103 }
17104 }
17105 }
17106
TEST(F32_SPMM_MINMAX_8X4__SCALAR,m_gt_8)17107 TEST(F32_SPMM_MINMAX_8X4__SCALAR, m_gt_8) {
17108 for (uint32_t m = 9; m < 16; m++) {
17109 for (uint32_t n = 1; n < 20; n += 5) {
17110 for (size_t k = 1; k <= 5; k += 2) {
17111 SpMMMicrokernelTester()
17112 .mr(8)
17113 .nr(4)
17114 .m(m)
17115 .n(n)
17116 .k(k)
17117 .sparsity(0.0f)
17118 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17119 }
17120 }
17121 }
17122 }
17123
TEST(F32_SPMM_MINMAX_8X4__SCALAR,output_stride)17124 TEST(F32_SPMM_MINMAX_8X4__SCALAR, output_stride) {
17125 for (uint32_t n = 1; n < 20; n += 5) {
17126 for (size_t k = 1; k <= 5; k += 2) {
17127 SpMMMicrokernelTester()
17128 .mr(8)
17129 .nr(4)
17130 .m(16)
17131 .n(n)
17132 .k(k)
17133 .output_stride(19)
17134 .sparsity(0.0f)
17135 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17136 }
17137 }
17138 }
17139
TEST(F32_SPMM_MINMAX_8X4__SCALAR,qmin)17140 TEST(F32_SPMM_MINMAX_8X4__SCALAR, qmin) {
17141 for (uint32_t n = 1; n < 20; n += 5) {
17142 for (size_t k = 1; k <= 5; k += 2) {
17143 SpMMMicrokernelTester()
17144 .mr(8)
17145 .nr(4)
17146 .m(16)
17147 .n(n)
17148 .k(k)
17149 .sparsity(0.0f)
17150 .qmin(128)
17151 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17152 }
17153 }
17154 }
17155
TEST(F32_SPMM_MINMAX_8X4__SCALAR,qmax)17156 TEST(F32_SPMM_MINMAX_8X4__SCALAR, qmax) {
17157 for (uint32_t n = 1; n < 20; n += 5) {
17158 for (size_t k = 1; k <= 5; k += 2) {
17159 SpMMMicrokernelTester()
17160 .mr(8)
17161 .nr(4)
17162 .m(16)
17163 .n(n)
17164 .k(k)
17165 .sparsity(0.0f)
17166 .qmax(128)
17167 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17168 }
17169 }
17170 }
17171
TEST(F32_SPMM_MINMAX_8X4__SCALAR,half_sparse)17172 TEST(F32_SPMM_MINMAX_8X4__SCALAR, half_sparse) {
17173 for (uint32_t n = 1; n < 20; n += 5) {
17174 for (size_t k = 1; k <= 5; k += 2) {
17175 SpMMMicrokernelTester()
17176 .mr(8)
17177 .nr(4)
17178 .m(16)
17179 .n(n)
17180 .k(k)
17181 .sparsity(0.5f)
17182 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17183 }
17184 }
17185 }
17186
TEST(F32_SPMM_MINMAX_8X4__SCALAR,zero_weights)17187 TEST(F32_SPMM_MINMAX_8X4__SCALAR, zero_weights) {
17188 for (uint32_t n = 1; n < 20; n += 5) {
17189 for (size_t k = 1; k <= 5; k += 2) {
17190 SpMMMicrokernelTester()
17191 .mr(8)
17192 .nr(4)
17193 .m(16)
17194 .n(n)
17195 .k(k)
17196 .sparsity(1.0f)
17197 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, xnn_init_f32_minmax_scalar_params);
17198 }
17199 }
17200 }