1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-igemm-minmax.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4)28 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4) {
29 TEST_REQUIRES_ARM_NEON;
30 GemmMicrokernelTester()
31 .mr(4)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(4)
36 .n(8)
37 .k(4)
38 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
39 }
40
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,strided_cn)41 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, strided_cn) {
42 TEST_REQUIRES_ARM_NEON;
43 GemmMicrokernelTester()
44 .mr(4)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(4)
49 .n(8)
50 .k(4)
51 .cn_stride(11)
52 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
53 }
54
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4_subtile)55 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile) {
56 TEST_REQUIRES_ARM_NEON;
57 for (uint32_t n = 1; n <= 8; n++) {
58 for (uint32_t m = 1; m <= 4; m++) {
59 GemmMicrokernelTester()
60 .mr(4)
61 .nr(8)
62 .kr(1)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(4)
67 .iterations(1)
68 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
69 }
70 }
71 }
72
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4_subtile_m)73 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_m) {
74 TEST_REQUIRES_ARM_NEON;
75 for (uint32_t m = 1; m <= 4; m++) {
76 GemmMicrokernelTester()
77 .mr(4)
78 .nr(8)
79 .kr(1)
80 .sr(1)
81 .m(m)
82 .n(8)
83 .k(4)
84 .iterations(1)
85 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
86 }
87 }
88
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4_subtile_n)89 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_n) {
90 TEST_REQUIRES_ARM_NEON;
91 for (uint32_t n = 1; n <= 8; n++) {
92 GemmMicrokernelTester()
93 .mr(4)
94 .nr(8)
95 .kr(1)
96 .sr(1)
97 .m(4)
98 .n(n)
99 .k(4)
100 .iterations(1)
101 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
102 }
103 }
104
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_eq_8)105 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8) {
106 TEST_REQUIRES_ARM_NEON;
107 GemmMicrokernelTester()
108 .mr(4)
109 .nr(8)
110 .kr(1)
111 .sr(1)
112 .m(4)
113 .n(8)
114 .k(8)
115 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
116 }
117
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_eq_8_subtile)118 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8_subtile) {
119 TEST_REQUIRES_ARM_NEON;
120 for (uint32_t n = 1; n <= 8; n++) {
121 for (uint32_t m = 1; m <= 4; m++) {
122 GemmMicrokernelTester()
123 .mr(4)
124 .nr(8)
125 .kr(1)
126 .sr(1)
127 .m(m)
128 .n(n)
129 .k(8)
130 .iterations(1)
131 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
132 }
133 }
134 }
135
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_lt_8)136 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8) {
137 TEST_REQUIRES_ARM_NEON;
138 for (size_t k = 1; k < 8; k++) {
139 GemmMicrokernelTester()
140 .mr(4)
141 .nr(8)
142 .kr(1)
143 .sr(1)
144 .m(4)
145 .n(8)
146 .k(k)
147 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
148 }
149 }
150
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_lt_8_subtile)151 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8_subtile) {
152 TEST_REQUIRES_ARM_NEON;
153 for (size_t k = 1; k < 8; k++) {
154 for (uint32_t n = 1; n <= 8; n++) {
155 for (uint32_t m = 1; m <= 4; m++) {
156 GemmMicrokernelTester()
157 .mr(4)
158 .nr(8)
159 .kr(1)
160 .sr(1)
161 .m(m)
162 .n(n)
163 .k(k)
164 .iterations(1)
165 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
166 }
167 }
168 }
169 }
170
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_gt_8)171 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8) {
172 TEST_REQUIRES_ARM_NEON;
173 for (size_t k = 9; k < 16; k++) {
174 GemmMicrokernelTester()
175 .mr(4)
176 .nr(8)
177 .kr(1)
178 .sr(1)
179 .m(4)
180 .n(8)
181 .k(k)
182 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
183 }
184 }
185
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_gt_8_subtile)186 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8_subtile) {
187 TEST_REQUIRES_ARM_NEON;
188 for (size_t k = 9; k < 16; k++) {
189 for (uint32_t n = 1; n <= 8; n++) {
190 for (uint32_t m = 1; m <= 4; m++) {
191 GemmMicrokernelTester()
192 .mr(4)
193 .nr(8)
194 .kr(1)
195 .sr(1)
196 .m(m)
197 .n(n)
198 .k(k)
199 .iterations(1)
200 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
201 }
202 }
203 }
204 }
205
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_div_4)206 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_div_4) {
207 TEST_REQUIRES_ARM_NEON;
208 for (size_t k = 12; k <= 40; k += 4) {
209 GemmMicrokernelTester()
210 .mr(4)
211 .nr(8)
212 .kr(1)
213 .sr(1)
214 .m(4)
215 .n(8)
216 .k(k)
217 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
218 }
219 }
220
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,k_div_4_subtile)221 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_div_4_subtile) {
222 TEST_REQUIRES_ARM_NEON;
223 for (size_t k = 12; k <= 40; k += 4) {
224 for (uint32_t n = 1; n <= 8; n++) {
225 for (uint32_t m = 1; m <= 4; m++) {
226 GemmMicrokernelTester()
227 .mr(4)
228 .nr(8)
229 .kr(1)
230 .sr(1)
231 .m(m)
232 .n(n)
233 .k(k)
234 .iterations(1)
235 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
236 }
237 }
238 }
239 }
240
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8)241 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8) {
242 TEST_REQUIRES_ARM_NEON;
243 for (uint32_t n = 9; n < 16; n++) {
244 for (size_t k = 1; k <= 20; k += 5) {
245 GemmMicrokernelTester()
246 .mr(4)
247 .nr(8)
248 .kr(1)
249 .sr(1)
250 .m(4)
251 .n(n)
252 .k(k)
253 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
254 }
255 }
256 }
257
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8_strided_cn)258 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_strided_cn) {
259 TEST_REQUIRES_ARM_NEON;
260 for (uint32_t n = 9; n < 16; n++) {
261 for (size_t k = 1; k <= 20; k += 5) {
262 GemmMicrokernelTester()
263 .mr(4)
264 .nr(8)
265 .kr(1)
266 .sr(1)
267 .m(4)
268 .n(n)
269 .k(k)
270 .cn_stride(11)
271 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
272 }
273 }
274 }
275
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8_subtile)276 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_subtile) {
277 TEST_REQUIRES_ARM_NEON;
278 for (uint32_t n = 9; n < 16; n++) {
279 for (size_t k = 1; k <= 20; k += 5) {
280 for (uint32_t m = 1; m <= 4; m++) {
281 GemmMicrokernelTester()
282 .mr(4)
283 .nr(8)
284 .kr(1)
285 .sr(1)
286 .m(m)
287 .n(n)
288 .k(k)
289 .iterations(1)
290 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
291 }
292 }
293 }
294 }
295
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_div_8)296 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8) {
297 TEST_REQUIRES_ARM_NEON;
298 for (uint32_t n = 16; n <= 24; n += 8) {
299 for (size_t k = 1; k <= 20; k += 5) {
300 GemmMicrokernelTester()
301 .mr(4)
302 .nr(8)
303 .kr(1)
304 .sr(1)
305 .m(4)
306 .n(n)
307 .k(k)
308 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
309 }
310 }
311 }
312
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_div_8_strided_cn)313 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_strided_cn) {
314 TEST_REQUIRES_ARM_NEON;
315 for (uint32_t n = 16; n <= 24; n += 8) {
316 for (size_t k = 1; k <= 20; k += 5) {
317 GemmMicrokernelTester()
318 .mr(4)
319 .nr(8)
320 .kr(1)
321 .sr(1)
322 .m(4)
323 .n(n)
324 .k(k)
325 .cn_stride(11)
326 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
327 }
328 }
329 }
330
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_div_8_subtile)331 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_subtile) {
332 TEST_REQUIRES_ARM_NEON;
333 for (uint32_t n = 16; n <= 24; n += 8) {
334 for (size_t k = 1; k <= 20; k += 5) {
335 for (uint32_t m = 1; m <= 4; m++) {
336 GemmMicrokernelTester()
337 .mr(4)
338 .nr(8)
339 .kr(1)
340 .sr(1)
341 .m(m)
342 .n(n)
343 .k(k)
344 .iterations(1)
345 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
346 }
347 }
348 }
349 }
350
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,small_kernel)351 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, small_kernel) {
352 TEST_REQUIRES_ARM_NEON;
353 for (size_t k = 1; k <= 20; k += 5) {
354 GemmMicrokernelTester()
355 .mr(4)
356 .nr(8)
357 .kr(1)
358 .sr(1)
359 .m(4)
360 .n(8)
361 .k(k)
362 .ks(3)
363 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
364 }
365 }
366
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,small_kernel_subtile)367 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, small_kernel_subtile) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t k = 1; k <= 20; k += 5) {
370 for (uint32_t n = 1; n <= 8; n++) {
371 for (uint32_t m = 1; m <= 4; m++) {
372 GemmMicrokernelTester()
373 .mr(4)
374 .nr(8)
375 .kr(1)
376 .sr(1)
377 .m(m)
378 .n(n)
379 .k(k)
380 .ks(3)
381 .iterations(1)
382 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
383 }
384 }
385 }
386 }
387
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8_small_kernel)388 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_small_kernel) {
389 TEST_REQUIRES_ARM_NEON;
390 for (uint32_t n = 9; n < 16; n++) {
391 for (size_t k = 1; k <= 20; k += 5) {
392 GemmMicrokernelTester()
393 .mr(4)
394 .nr(8)
395 .kr(1)
396 .sr(1)
397 .m(4)
398 .n(n)
399 .k(k)
400 .ks(3)
401 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
402 }
403 }
404 }
405
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,n_div_8_small_kernel)406 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_small_kernel) {
407 TEST_REQUIRES_ARM_NEON;
408 for (uint32_t n = 16; n <= 24; n += 8) {
409 for (size_t k = 1; k <= 20; k += 5) {
410 GemmMicrokernelTester()
411 .mr(4)
412 .nr(8)
413 .kr(1)
414 .sr(1)
415 .m(4)
416 .n(n)
417 .k(k)
418 .ks(3)
419 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
420 }
421 }
422 }
423
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,strided_cm_subtile)424 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, strided_cm_subtile) {
425 TEST_REQUIRES_ARM_NEON;
426 for (size_t k = 1; k <= 20; k += 5) {
427 for (uint32_t n = 1; n <= 8; n++) {
428 for (uint32_t m = 1; m <= 4; m++) {
429 GemmMicrokernelTester()
430 .mr(4)
431 .nr(8)
432 .kr(1)
433 .sr(1)
434 .m(m)
435 .n(n)
436 .k(k)
437 .cm_stride(11)
438 .iterations(1)
439 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
440 }
441 }
442 }
443 }
444
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,a_offset)445 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, a_offset) {
446 TEST_REQUIRES_ARM_NEON;
447 for (size_t k = 1; k <= 20; k += 5) {
448 GemmMicrokernelTester()
449 .mr(4)
450 .nr(8)
451 .kr(1)
452 .sr(1)
453 .m(4)
454 .n(8)
455 .k(k)
456 .ks(3)
457 .a_offset(83)
458 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
459 }
460 }
461
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,zero)462 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, zero) {
463 TEST_REQUIRES_ARM_NEON;
464 for (size_t k = 1; k <= 20; k += 5) {
465 for (uint32_t mz = 0; mz < 4; mz++) {
466 GemmMicrokernelTester()
467 .mr(4)
468 .nr(8)
469 .kr(1)
470 .sr(1)
471 .m(4)
472 .n(8)
473 .k(k)
474 .ks(3)
475 .a_offset(83)
476 .zero_index(mz)
477 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
478 }
479 }
480 }
481
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,qmin)482 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, qmin) {
483 TEST_REQUIRES_ARM_NEON;
484 GemmMicrokernelTester()
485 .mr(4)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(4)
490 .n(8)
491 .k(4)
492 .qmin(128)
493 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
494 }
495
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,qmax)496 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, qmax) {
497 TEST_REQUIRES_ARM_NEON;
498 GemmMicrokernelTester()
499 .mr(4)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(4)
504 .n(8)
505 .k(4)
506 .qmax(128)
507 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
508 }
509
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53,strided_cm)510 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, strided_cm) {
511 TEST_REQUIRES_ARM_NEON;
512 GemmMicrokernelTester()
513 .mr(4)
514 .nr(8)
515 .kr(1)
516 .sr(1)
517 .m(4)
518 .n(8)
519 .k(4)
520 .cm_stride(11)
521 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
522 }
523 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
524
525
526 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_eq_2)527 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2) {
528 TEST_REQUIRES_ARM_NEON;
529 GemmMicrokernelTester()
530 .mr(4)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(4)
535 .n(8)
536 .k(2)
537 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
538 }
539
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,strided_cn)540 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, strided_cn) {
541 TEST_REQUIRES_ARM_NEON;
542 GemmMicrokernelTester()
543 .mr(4)
544 .nr(8)
545 .kr(1)
546 .sr(1)
547 .m(4)
548 .n(8)
549 .k(2)
550 .cn_stride(11)
551 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
552 }
553
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_eq_2_subtile)554 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2_subtile) {
555 TEST_REQUIRES_ARM_NEON;
556 for (uint32_t n = 1; n <= 8; n++) {
557 for (uint32_t m = 1; m <= 4; m++) {
558 GemmMicrokernelTester()
559 .mr(4)
560 .nr(8)
561 .kr(1)
562 .sr(1)
563 .m(m)
564 .n(n)
565 .k(2)
566 .iterations(1)
567 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
568 }
569 }
570 }
571
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_eq_2_subtile_m)572 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_m) {
573 TEST_REQUIRES_ARM_NEON;
574 for (uint32_t m = 1; m <= 4; m++) {
575 GemmMicrokernelTester()
576 .mr(4)
577 .nr(8)
578 .kr(1)
579 .sr(1)
580 .m(m)
581 .n(8)
582 .k(2)
583 .iterations(1)
584 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
585 }
586 }
587
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_eq_2_subtile_n)588 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_n) {
589 TEST_REQUIRES_ARM_NEON;
590 for (uint32_t n = 1; n <= 8; n++) {
591 GemmMicrokernelTester()
592 .mr(4)
593 .nr(8)
594 .kr(1)
595 .sr(1)
596 .m(4)
597 .n(n)
598 .k(2)
599 .iterations(1)
600 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
601 }
602 }
603
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_lt_2)604 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_lt_2) {
605 TEST_REQUIRES_ARM_NEON;
606 for (size_t k = 1; k < 2; k++) {
607 GemmMicrokernelTester()
608 .mr(4)
609 .nr(8)
610 .kr(1)
611 .sr(1)
612 .m(4)
613 .n(8)
614 .k(k)
615 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
616 }
617 }
618
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_lt_2_subtile)619 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_lt_2_subtile) {
620 TEST_REQUIRES_ARM_NEON;
621 for (size_t k = 1; k < 2; k++) {
622 for (uint32_t n = 1; n <= 8; n++) {
623 for (uint32_t m = 1; m <= 4; m++) {
624 GemmMicrokernelTester()
625 .mr(4)
626 .nr(8)
627 .kr(1)
628 .sr(1)
629 .m(m)
630 .n(n)
631 .k(k)
632 .iterations(1)
633 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
634 }
635 }
636 }
637 }
638
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_gt_2)639 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_gt_2) {
640 TEST_REQUIRES_ARM_NEON;
641 for (size_t k = 3; k < 4; k++) {
642 GemmMicrokernelTester()
643 .mr(4)
644 .nr(8)
645 .kr(1)
646 .sr(1)
647 .m(4)
648 .n(8)
649 .k(k)
650 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
651 }
652 }
653
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_gt_2_subtile)654 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_gt_2_subtile) {
655 TEST_REQUIRES_ARM_NEON;
656 for (size_t k = 3; k < 4; k++) {
657 for (uint32_t n = 1; n <= 8; n++) {
658 for (uint32_t m = 1; m <= 4; m++) {
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(m)
665 .n(n)
666 .k(k)
667 .iterations(1)
668 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
669 }
670 }
671 }
672 }
673
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_div_2)674 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_div_2) {
675 TEST_REQUIRES_ARM_NEON;
676 for (size_t k = 4; k <= 20; k += 2) {
677 GemmMicrokernelTester()
678 .mr(4)
679 .nr(8)
680 .kr(1)
681 .sr(1)
682 .m(4)
683 .n(8)
684 .k(k)
685 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
686 }
687 }
688
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,k_div_2_subtile)689 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_div_2_subtile) {
690 TEST_REQUIRES_ARM_NEON;
691 for (size_t k = 4; k <= 20; k += 2) {
692 for (uint32_t n = 1; n <= 8; n++) {
693 for (uint32_t m = 1; m <= 4; m++) {
694 GemmMicrokernelTester()
695 .mr(4)
696 .nr(8)
697 .kr(1)
698 .sr(1)
699 .m(m)
700 .n(n)
701 .k(k)
702 .iterations(1)
703 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
704 }
705 }
706 }
707 }
708
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_gt_8)709 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8) {
710 TEST_REQUIRES_ARM_NEON;
711 for (uint32_t n = 9; n < 16; n++) {
712 for (size_t k = 1; k <= 10; k += 3) {
713 GemmMicrokernelTester()
714 .mr(4)
715 .nr(8)
716 .kr(1)
717 .sr(1)
718 .m(4)
719 .n(n)
720 .k(k)
721 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
722 }
723 }
724 }
725
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_gt_8_strided_cn)726 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8_strided_cn) {
727 TEST_REQUIRES_ARM_NEON;
728 for (uint32_t n = 9; n < 16; n++) {
729 for (size_t k = 1; k <= 10; k += 3) {
730 GemmMicrokernelTester()
731 .mr(4)
732 .nr(8)
733 .kr(1)
734 .sr(1)
735 .m(4)
736 .n(n)
737 .k(k)
738 .cn_stride(11)
739 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
740 }
741 }
742 }
743
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_gt_8_subtile)744 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8_subtile) {
745 TEST_REQUIRES_ARM_NEON;
746 for (uint32_t n = 9; n < 16; n++) {
747 for (size_t k = 1; k <= 10; k += 3) {
748 for (uint32_t m = 1; m <= 4; m++) {
749 GemmMicrokernelTester()
750 .mr(4)
751 .nr(8)
752 .kr(1)
753 .sr(1)
754 .m(m)
755 .n(n)
756 .k(k)
757 .iterations(1)
758 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
759 }
760 }
761 }
762 }
763
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_div_8)764 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8) {
765 TEST_REQUIRES_ARM_NEON;
766 for (uint32_t n = 16; n <= 24; n += 8) {
767 for (size_t k = 1; k <= 10; k += 3) {
768 GemmMicrokernelTester()
769 .mr(4)
770 .nr(8)
771 .kr(1)
772 .sr(1)
773 .m(4)
774 .n(n)
775 .k(k)
776 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
777 }
778 }
779 }
780
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_div_8_strided_cn)781 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8_strided_cn) {
782 TEST_REQUIRES_ARM_NEON;
783 for (uint32_t n = 16; n <= 24; n += 8) {
784 for (size_t k = 1; k <= 10; k += 3) {
785 GemmMicrokernelTester()
786 .mr(4)
787 .nr(8)
788 .kr(1)
789 .sr(1)
790 .m(4)
791 .n(n)
792 .k(k)
793 .cn_stride(11)
794 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
795 }
796 }
797 }
798
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_div_8_subtile)799 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8_subtile) {
800 TEST_REQUIRES_ARM_NEON;
801 for (uint32_t n = 16; n <= 24; n += 8) {
802 for (size_t k = 1; k <= 10; k += 3) {
803 for (uint32_t m = 1; m <= 4; m++) {
804 GemmMicrokernelTester()
805 .mr(4)
806 .nr(8)
807 .kr(1)
808 .sr(1)
809 .m(m)
810 .n(n)
811 .k(k)
812 .iterations(1)
813 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
814 }
815 }
816 }
817 }
818
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,small_kernel)819 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, small_kernel) {
820 TEST_REQUIRES_ARM_NEON;
821 for (size_t k = 1; k <= 10; k += 3) {
822 GemmMicrokernelTester()
823 .mr(4)
824 .nr(8)
825 .kr(1)
826 .sr(1)
827 .m(4)
828 .n(8)
829 .k(k)
830 .ks(3)
831 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
832 }
833 }
834
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,small_kernel_subtile)835 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, small_kernel_subtile) {
836 TEST_REQUIRES_ARM_NEON;
837 for (size_t k = 1; k <= 10; k += 3) {
838 for (uint32_t n = 1; n <= 8; n++) {
839 for (uint32_t m = 1; m <= 4; m++) {
840 GemmMicrokernelTester()
841 .mr(4)
842 .nr(8)
843 .kr(1)
844 .sr(1)
845 .m(m)
846 .n(n)
847 .k(k)
848 .ks(3)
849 .iterations(1)
850 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
851 }
852 }
853 }
854 }
855
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_gt_8_small_kernel)856 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8_small_kernel) {
857 TEST_REQUIRES_ARM_NEON;
858 for (uint32_t n = 9; n < 16; n++) {
859 for (size_t k = 1; k <= 10; k += 3) {
860 GemmMicrokernelTester()
861 .mr(4)
862 .nr(8)
863 .kr(1)
864 .sr(1)
865 .m(4)
866 .n(n)
867 .k(k)
868 .ks(3)
869 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
870 }
871 }
872 }
873
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,n_div_8_small_kernel)874 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8_small_kernel) {
875 TEST_REQUIRES_ARM_NEON;
876 for (uint32_t n = 16; n <= 24; n += 8) {
877 for (size_t k = 1; k <= 10; k += 3) {
878 GemmMicrokernelTester()
879 .mr(4)
880 .nr(8)
881 .kr(1)
882 .sr(1)
883 .m(4)
884 .n(n)
885 .k(k)
886 .ks(3)
887 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
888 }
889 }
890 }
891
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,strided_cm_subtile)892 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, strided_cm_subtile) {
893 TEST_REQUIRES_ARM_NEON;
894 for (size_t k = 1; k <= 10; k += 3) {
895 for (uint32_t n = 1; n <= 8; n++) {
896 for (uint32_t m = 1; m <= 4; m++) {
897 GemmMicrokernelTester()
898 .mr(4)
899 .nr(8)
900 .kr(1)
901 .sr(1)
902 .m(m)
903 .n(n)
904 .k(k)
905 .cm_stride(11)
906 .iterations(1)
907 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
908 }
909 }
910 }
911 }
912
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,a_offset)913 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, a_offset) {
914 TEST_REQUIRES_ARM_NEON;
915 for (size_t k = 1; k <= 10; k += 3) {
916 GemmMicrokernelTester()
917 .mr(4)
918 .nr(8)
919 .kr(1)
920 .sr(1)
921 .m(4)
922 .n(8)
923 .k(k)
924 .ks(3)
925 .a_offset(43)
926 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
927 }
928 }
929
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,zero)930 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, zero) {
931 TEST_REQUIRES_ARM_NEON;
932 for (size_t k = 1; k <= 10; k += 3) {
933 for (uint32_t mz = 0; mz < 4; mz++) {
934 GemmMicrokernelTester()
935 .mr(4)
936 .nr(8)
937 .kr(1)
938 .sr(1)
939 .m(4)
940 .n(8)
941 .k(k)
942 .ks(3)
943 .a_offset(43)
944 .zero_index(mz)
945 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
946 }
947 }
948 }
949
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,qmin)950 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, qmin) {
951 TEST_REQUIRES_ARM_NEON;
952 GemmMicrokernelTester()
953 .mr(4)
954 .nr(8)
955 .kr(1)
956 .sr(1)
957 .m(4)
958 .n(8)
959 .k(2)
960 .qmin(128)
961 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
962 }
963
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,qmax)964 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, qmax) {
965 TEST_REQUIRES_ARM_NEON;
966 GemmMicrokernelTester()
967 .mr(4)
968 .nr(8)
969 .kr(1)
970 .sr(1)
971 .m(4)
972 .n(8)
973 .k(2)
974 .qmax(128)
975 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
976 }
977
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64,strided_cm)978 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, strided_cm) {
979 TEST_REQUIRES_ARM_NEON;
980 GemmMicrokernelTester()
981 .mr(4)
982 .nr(8)
983 .kr(1)
984 .sr(1)
985 .m(4)
986 .n(8)
987 .k(2)
988 .cm_stride(11)
989 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
990 }
991 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
992
993
994 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_eq_4)995 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_eq_4) {
996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(4)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(4)
1003 .n(8)
1004 .k(4)
1005 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1006 }
1007
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,strided_cn)1008 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, strided_cn) {
1009 TEST_REQUIRES_ARM_NEON;
1010 GemmMicrokernelTester()
1011 .mr(4)
1012 .nr(8)
1013 .kr(1)
1014 .sr(1)
1015 .m(4)
1016 .n(8)
1017 .k(4)
1018 .cn_stride(11)
1019 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1020 }
1021
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_eq_4_subtile)1022 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_eq_4_subtile) {
1023 TEST_REQUIRES_ARM_NEON;
1024 for (uint32_t n = 1; n <= 8; n++) {
1025 for (uint32_t m = 1; m <= 4; m++) {
1026 GemmMicrokernelTester()
1027 .mr(4)
1028 .nr(8)
1029 .kr(1)
1030 .sr(1)
1031 .m(m)
1032 .n(n)
1033 .k(4)
1034 .iterations(1)
1035 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1036 }
1037 }
1038 }
1039
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_eq_4_subtile_m)1040 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_eq_4_subtile_m) {
1041 TEST_REQUIRES_ARM_NEON;
1042 for (uint32_t m = 1; m <= 4; m++) {
1043 GemmMicrokernelTester()
1044 .mr(4)
1045 .nr(8)
1046 .kr(1)
1047 .sr(1)
1048 .m(m)
1049 .n(8)
1050 .k(4)
1051 .iterations(1)
1052 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1053 }
1054 }
1055
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_eq_4_subtile_n)1056 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_eq_4_subtile_n) {
1057 TEST_REQUIRES_ARM_NEON;
1058 for (uint32_t n = 1; n <= 8; n++) {
1059 GemmMicrokernelTester()
1060 .mr(4)
1061 .nr(8)
1062 .kr(1)
1063 .sr(1)
1064 .m(4)
1065 .n(n)
1066 .k(4)
1067 .iterations(1)
1068 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1069 }
1070 }
1071
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_eq_8)1072 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_eq_8) {
1073 TEST_REQUIRES_ARM_NEON;
1074 GemmMicrokernelTester()
1075 .mr(4)
1076 .nr(8)
1077 .kr(1)
1078 .sr(1)
1079 .m(4)
1080 .n(8)
1081 .k(8)
1082 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1083 }
1084
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_eq_8_subtile)1085 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_eq_8_subtile) {
1086 TEST_REQUIRES_ARM_NEON;
1087 for (uint32_t n = 1; n <= 8; n++) {
1088 for (uint32_t m = 1; m <= 4; m++) {
1089 GemmMicrokernelTester()
1090 .mr(4)
1091 .nr(8)
1092 .kr(1)
1093 .sr(1)
1094 .m(m)
1095 .n(n)
1096 .k(8)
1097 .iterations(1)
1098 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1099 }
1100 }
1101 }
1102
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_lt_8)1103 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_lt_8) {
1104 TEST_REQUIRES_ARM_NEON;
1105 for (size_t k = 1; k < 8; k++) {
1106 GemmMicrokernelTester()
1107 .mr(4)
1108 .nr(8)
1109 .kr(1)
1110 .sr(1)
1111 .m(4)
1112 .n(8)
1113 .k(k)
1114 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1115 }
1116 }
1117
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_lt_8_subtile)1118 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_lt_8_subtile) {
1119 TEST_REQUIRES_ARM_NEON;
1120 for (size_t k = 1; k < 8; k++) {
1121 for (uint32_t n = 1; n <= 8; n++) {
1122 for (uint32_t m = 1; m <= 4; m++) {
1123 GemmMicrokernelTester()
1124 .mr(4)
1125 .nr(8)
1126 .kr(1)
1127 .sr(1)
1128 .m(m)
1129 .n(n)
1130 .k(k)
1131 .iterations(1)
1132 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1133 }
1134 }
1135 }
1136 }
1137
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_gt_8)1138 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_gt_8) {
1139 TEST_REQUIRES_ARM_NEON;
1140 for (size_t k = 9; k < 16; k++) {
1141 GemmMicrokernelTester()
1142 .mr(4)
1143 .nr(8)
1144 .kr(1)
1145 .sr(1)
1146 .m(4)
1147 .n(8)
1148 .k(k)
1149 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1150 }
1151 }
1152
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_gt_8_subtile)1153 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_gt_8_subtile) {
1154 TEST_REQUIRES_ARM_NEON;
1155 for (size_t k = 9; k < 16; k++) {
1156 for (uint32_t n = 1; n <= 8; n++) {
1157 for (uint32_t m = 1; m <= 4; m++) {
1158 GemmMicrokernelTester()
1159 .mr(4)
1160 .nr(8)
1161 .kr(1)
1162 .sr(1)
1163 .m(m)
1164 .n(n)
1165 .k(k)
1166 .iterations(1)
1167 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1168 }
1169 }
1170 }
1171 }
1172
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_div_4)1173 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_div_4) {
1174 TEST_REQUIRES_ARM_NEON;
1175 for (size_t k = 12; k <= 40; k += 4) {
1176 GemmMicrokernelTester()
1177 .mr(4)
1178 .nr(8)
1179 .kr(1)
1180 .sr(1)
1181 .m(4)
1182 .n(8)
1183 .k(k)
1184 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1185 }
1186 }
1187
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,k_div_4_subtile)1188 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, k_div_4_subtile) {
1189 TEST_REQUIRES_ARM_NEON;
1190 for (size_t k = 12; k <= 40; k += 4) {
1191 for (uint32_t n = 1; n <= 8; n++) {
1192 for (uint32_t m = 1; m <= 4; m++) {
1193 GemmMicrokernelTester()
1194 .mr(4)
1195 .nr(8)
1196 .kr(1)
1197 .sr(1)
1198 .m(m)
1199 .n(n)
1200 .k(k)
1201 .iterations(1)
1202 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1203 }
1204 }
1205 }
1206 }
1207
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_gt_8)1208 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_gt_8) {
1209 TEST_REQUIRES_ARM_NEON;
1210 for (uint32_t n = 9; n < 16; n++) {
1211 for (size_t k = 1; k <= 20; k += 5) {
1212 GemmMicrokernelTester()
1213 .mr(4)
1214 .nr(8)
1215 .kr(1)
1216 .sr(1)
1217 .m(4)
1218 .n(n)
1219 .k(k)
1220 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1221 }
1222 }
1223 }
1224
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_gt_8_strided_cn)1225 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
1226 TEST_REQUIRES_ARM_NEON;
1227 for (uint32_t n = 9; n < 16; n++) {
1228 for (size_t k = 1; k <= 20; k += 5) {
1229 GemmMicrokernelTester()
1230 .mr(4)
1231 .nr(8)
1232 .kr(1)
1233 .sr(1)
1234 .m(4)
1235 .n(n)
1236 .k(k)
1237 .cn_stride(11)
1238 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1239 }
1240 }
1241 }
1242
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_gt_8_subtile)1243 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_gt_8_subtile) {
1244 TEST_REQUIRES_ARM_NEON;
1245 for (uint32_t n = 9; n < 16; n++) {
1246 for (size_t k = 1; k <= 20; k += 5) {
1247 for (uint32_t m = 1; m <= 4; m++) {
1248 GemmMicrokernelTester()
1249 .mr(4)
1250 .nr(8)
1251 .kr(1)
1252 .sr(1)
1253 .m(m)
1254 .n(n)
1255 .k(k)
1256 .iterations(1)
1257 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1258 }
1259 }
1260 }
1261 }
1262
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_div_8)1263 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_div_8) {
1264 TEST_REQUIRES_ARM_NEON;
1265 for (uint32_t n = 16; n <= 24; n += 8) {
1266 for (size_t k = 1; k <= 20; k += 5) {
1267 GemmMicrokernelTester()
1268 .mr(4)
1269 .nr(8)
1270 .kr(1)
1271 .sr(1)
1272 .m(4)
1273 .n(n)
1274 .k(k)
1275 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1276 }
1277 }
1278 }
1279
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_div_8_strided_cn)1280 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_div_8_strided_cn) {
1281 TEST_REQUIRES_ARM_NEON;
1282 for (uint32_t n = 16; n <= 24; n += 8) {
1283 for (size_t k = 1; k <= 20; k += 5) {
1284 GemmMicrokernelTester()
1285 .mr(4)
1286 .nr(8)
1287 .kr(1)
1288 .sr(1)
1289 .m(4)
1290 .n(n)
1291 .k(k)
1292 .cn_stride(11)
1293 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1294 }
1295 }
1296 }
1297
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_div_8_subtile)1298 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_div_8_subtile) {
1299 TEST_REQUIRES_ARM_NEON;
1300 for (uint32_t n = 16; n <= 24; n += 8) {
1301 for (size_t k = 1; k <= 20; k += 5) {
1302 for (uint32_t m = 1; m <= 4; m++) {
1303 GemmMicrokernelTester()
1304 .mr(4)
1305 .nr(8)
1306 .kr(1)
1307 .sr(1)
1308 .m(m)
1309 .n(n)
1310 .k(k)
1311 .iterations(1)
1312 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1313 }
1314 }
1315 }
1316 }
1317
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,small_kernel)1318 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, small_kernel) {
1319 TEST_REQUIRES_ARM_NEON;
1320 for (size_t k = 1; k <= 20; k += 5) {
1321 GemmMicrokernelTester()
1322 .mr(4)
1323 .nr(8)
1324 .kr(1)
1325 .sr(1)
1326 .m(4)
1327 .n(8)
1328 .k(k)
1329 .ks(3)
1330 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1331 }
1332 }
1333
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,small_kernel_subtile)1334 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, small_kernel_subtile) {
1335 TEST_REQUIRES_ARM_NEON;
1336 for (size_t k = 1; k <= 20; k += 5) {
1337 for (uint32_t n = 1; n <= 8; n++) {
1338 for (uint32_t m = 1; m <= 4; m++) {
1339 GemmMicrokernelTester()
1340 .mr(4)
1341 .nr(8)
1342 .kr(1)
1343 .sr(1)
1344 .m(m)
1345 .n(n)
1346 .k(k)
1347 .ks(3)
1348 .iterations(1)
1349 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1350 }
1351 }
1352 }
1353 }
1354
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_gt_8_small_kernel)1355 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
1356 TEST_REQUIRES_ARM_NEON;
1357 for (uint32_t n = 9; n < 16; n++) {
1358 for (size_t k = 1; k <= 20; k += 5) {
1359 GemmMicrokernelTester()
1360 .mr(4)
1361 .nr(8)
1362 .kr(1)
1363 .sr(1)
1364 .m(4)
1365 .n(n)
1366 .k(k)
1367 .ks(3)
1368 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1369 }
1370 }
1371 }
1372
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,n_div_8_small_kernel)1373 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, n_div_8_small_kernel) {
1374 TEST_REQUIRES_ARM_NEON;
1375 for (uint32_t n = 16; n <= 24; n += 8) {
1376 for (size_t k = 1; k <= 20; k += 5) {
1377 GemmMicrokernelTester()
1378 .mr(4)
1379 .nr(8)
1380 .kr(1)
1381 .sr(1)
1382 .m(4)
1383 .n(n)
1384 .k(k)
1385 .ks(3)
1386 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1387 }
1388 }
1389 }
1390
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,strided_cm_subtile)1391 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, strided_cm_subtile) {
1392 TEST_REQUIRES_ARM_NEON;
1393 for (size_t k = 1; k <= 20; k += 5) {
1394 for (uint32_t n = 1; n <= 8; n++) {
1395 for (uint32_t m = 1; m <= 4; m++) {
1396 GemmMicrokernelTester()
1397 .mr(4)
1398 .nr(8)
1399 .kr(1)
1400 .sr(1)
1401 .m(m)
1402 .n(n)
1403 .k(k)
1404 .cm_stride(11)
1405 .iterations(1)
1406 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1407 }
1408 }
1409 }
1410 }
1411
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,a_offset)1412 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, a_offset) {
1413 TEST_REQUIRES_ARM_NEON;
1414 for (size_t k = 1; k <= 20; k += 5) {
1415 GemmMicrokernelTester()
1416 .mr(4)
1417 .nr(8)
1418 .kr(1)
1419 .sr(1)
1420 .m(4)
1421 .n(8)
1422 .k(k)
1423 .ks(3)
1424 .a_offset(83)
1425 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1426 }
1427 }
1428
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,zero)1429 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, zero) {
1430 TEST_REQUIRES_ARM_NEON;
1431 for (size_t k = 1; k <= 20; k += 5) {
1432 for (uint32_t mz = 0; mz < 4; mz++) {
1433 GemmMicrokernelTester()
1434 .mr(4)
1435 .nr(8)
1436 .kr(1)
1437 .sr(1)
1438 .m(4)
1439 .n(8)
1440 .k(k)
1441 .ks(3)
1442 .a_offset(83)
1443 .zero_index(mz)
1444 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1445 }
1446 }
1447 }
1448
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,qmin)1449 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, qmin) {
1450 TEST_REQUIRES_ARM_NEON;
1451 GemmMicrokernelTester()
1452 .mr(4)
1453 .nr(8)
1454 .kr(1)
1455 .sr(1)
1456 .m(4)
1457 .n(8)
1458 .k(4)
1459 .qmin(128)
1460 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1461 }
1462
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,qmax)1463 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, qmax) {
1464 TEST_REQUIRES_ARM_NEON;
1465 GemmMicrokernelTester()
1466 .mr(4)
1467 .nr(8)
1468 .kr(1)
1469 .sr(1)
1470 .m(4)
1471 .n(8)
1472 .k(4)
1473 .qmax(128)
1474 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1475 }
1476
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53,strided_cm)1477 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A53, strided_cm) {
1478 TEST_REQUIRES_ARM_NEON;
1479 GemmMicrokernelTester()
1480 .mr(4)
1481 .nr(8)
1482 .kr(1)
1483 .sr(1)
1484 .m(4)
1485 .n(8)
1486 .k(4)
1487 .cm_stride(11)
1488 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
1489 }
1490 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1491
1492
1493 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)1494 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
1495 TEST_REQUIRES_ARM_NEON_FMA;
1496 GemmMicrokernelTester()
1497 .mr(1)
1498 .nr(8)
1499 .kr(1)
1500 .sr(1)
1501 .m(1)
1502 .n(8)
1503 .k(8)
1504 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1505 }
1506
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,strided_cn)1507 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
1508 TEST_REQUIRES_ARM_NEON_FMA;
1509 GemmMicrokernelTester()
1510 .mr(1)
1511 .nr(8)
1512 .kr(1)
1513 .sr(1)
1514 .m(1)
1515 .n(8)
1516 .k(8)
1517 .cn_stride(11)
1518 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1519 }
1520
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)1521 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
1522 TEST_REQUIRES_ARM_NEON_FMA;
1523 for (uint32_t n = 1; n <= 8; n++) {
1524 for (uint32_t m = 1; m <= 1; m++) {
1525 GemmMicrokernelTester()
1526 .mr(1)
1527 .nr(8)
1528 .kr(1)
1529 .sr(1)
1530 .m(m)
1531 .n(n)
1532 .k(8)
1533 .iterations(1)
1534 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1535 }
1536 }
1537 }
1538
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile_m)1539 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_m) {
1540 TEST_REQUIRES_ARM_NEON_FMA;
1541 for (uint32_t m = 1; m <= 1; m++) {
1542 GemmMicrokernelTester()
1543 .mr(1)
1544 .nr(8)
1545 .kr(1)
1546 .sr(1)
1547 .m(m)
1548 .n(8)
1549 .k(8)
1550 .iterations(1)
1551 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1552 }
1553 }
1554
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile_n)1555 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_n) {
1556 TEST_REQUIRES_ARM_NEON_FMA;
1557 for (uint32_t n = 1; n <= 8; n++) {
1558 GemmMicrokernelTester()
1559 .mr(1)
1560 .nr(8)
1561 .kr(1)
1562 .sr(1)
1563 .m(1)
1564 .n(n)
1565 .k(8)
1566 .iterations(1)
1567 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1568 }
1569 }
1570
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_16)1571 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16) {
1572 TEST_REQUIRES_ARM_NEON_FMA;
1573 GemmMicrokernelTester()
1574 .mr(1)
1575 .nr(8)
1576 .kr(1)
1577 .sr(1)
1578 .m(1)
1579 .n(8)
1580 .k(16)
1581 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1582 }
1583
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_16_subtile)1584 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_subtile) {
1585 TEST_REQUIRES_ARM_NEON_FMA;
1586 for (uint32_t n = 1; n <= 8; n++) {
1587 for (uint32_t m = 1; m <= 1; m++) {
1588 GemmMicrokernelTester()
1589 .mr(1)
1590 .nr(8)
1591 .kr(1)
1592 .sr(1)
1593 .m(m)
1594 .n(n)
1595 .k(16)
1596 .iterations(1)
1597 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1598 }
1599 }
1600 }
1601
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_16)1602 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16) {
1603 TEST_REQUIRES_ARM_NEON_FMA;
1604 for (size_t k = 1; k < 16; k++) {
1605 GemmMicrokernelTester()
1606 .mr(1)
1607 .nr(8)
1608 .kr(1)
1609 .sr(1)
1610 .m(1)
1611 .n(8)
1612 .k(k)
1613 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1614 }
1615 }
1616
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_16_subtile)1617 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_subtile) {
1618 TEST_REQUIRES_ARM_NEON_FMA;
1619 for (size_t k = 1; k < 16; k++) {
1620 for (uint32_t n = 1; n <= 8; n++) {
1621 for (uint32_t m = 1; m <= 1; m++) {
1622 GemmMicrokernelTester()
1623 .mr(1)
1624 .nr(8)
1625 .kr(1)
1626 .sr(1)
1627 .m(m)
1628 .n(n)
1629 .k(k)
1630 .iterations(1)
1631 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1632 }
1633 }
1634 }
1635 }
1636
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_16)1637 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16) {
1638 TEST_REQUIRES_ARM_NEON_FMA;
1639 for (size_t k = 17; k < 32; k++) {
1640 GemmMicrokernelTester()
1641 .mr(1)
1642 .nr(8)
1643 .kr(1)
1644 .sr(1)
1645 .m(1)
1646 .n(8)
1647 .k(k)
1648 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1649 }
1650 }
1651
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_16_subtile)1652 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_subtile) {
1653 TEST_REQUIRES_ARM_NEON_FMA;
1654 for (size_t k = 17; k < 32; k++) {
1655 for (uint32_t n = 1; n <= 8; n++) {
1656 for (uint32_t m = 1; m <= 1; m++) {
1657 GemmMicrokernelTester()
1658 .mr(1)
1659 .nr(8)
1660 .kr(1)
1661 .sr(1)
1662 .m(m)
1663 .n(n)
1664 .k(k)
1665 .iterations(1)
1666 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1667 }
1668 }
1669 }
1670 }
1671
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_div_8)1672 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8) {
1673 TEST_REQUIRES_ARM_NEON_FMA;
1674 for (size_t k = 24; k <= 80; k += 8) {
1675 GemmMicrokernelTester()
1676 .mr(1)
1677 .nr(8)
1678 .kr(1)
1679 .sr(1)
1680 .m(1)
1681 .n(8)
1682 .k(k)
1683 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1684 }
1685 }
1686
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_div_8_subtile)1687 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_subtile) {
1688 TEST_REQUIRES_ARM_NEON_FMA;
1689 for (size_t k = 24; k <= 80; k += 8) {
1690 for (uint32_t n = 1; n <= 8; n++) {
1691 for (uint32_t m = 1; m <= 1; m++) {
1692 GemmMicrokernelTester()
1693 .mr(1)
1694 .nr(8)
1695 .kr(1)
1696 .sr(1)
1697 .m(m)
1698 .n(n)
1699 .k(k)
1700 .iterations(1)
1701 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1702 }
1703 }
1704 }
1705 }
1706
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8)1707 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
1708 TEST_REQUIRES_ARM_NEON_FMA;
1709 for (uint32_t n = 9; n < 16; n++) {
1710 for (size_t k = 1; k <= 40; k += 9) {
1711 GemmMicrokernelTester()
1712 .mr(1)
1713 .nr(8)
1714 .kr(1)
1715 .sr(1)
1716 .m(1)
1717 .n(n)
1718 .k(k)
1719 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1720 }
1721 }
1722 }
1723
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_cn)1724 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
1725 TEST_REQUIRES_ARM_NEON_FMA;
1726 for (uint32_t n = 9; n < 16; n++) {
1727 for (size_t k = 1; k <= 40; k += 9) {
1728 GemmMicrokernelTester()
1729 .mr(1)
1730 .nr(8)
1731 .kr(1)
1732 .sr(1)
1733 .m(1)
1734 .n(n)
1735 .k(k)
1736 .cn_stride(11)
1737 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1738 }
1739 }
1740 }
1741
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_subtile)1742 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
1743 TEST_REQUIRES_ARM_NEON_FMA;
1744 for (uint32_t n = 9; n < 16; n++) {
1745 for (size_t k = 1; k <= 40; k += 9) {
1746 for (uint32_t m = 1; m <= 1; m++) {
1747 GemmMicrokernelTester()
1748 .mr(1)
1749 .nr(8)
1750 .kr(1)
1751 .sr(1)
1752 .m(m)
1753 .n(n)
1754 .k(k)
1755 .iterations(1)
1756 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1757 }
1758 }
1759 }
1760 }
1761
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8)1762 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
1763 TEST_REQUIRES_ARM_NEON_FMA;
1764 for (uint32_t n = 16; n <= 24; n += 8) {
1765 for (size_t k = 1; k <= 40; k += 9) {
1766 GemmMicrokernelTester()
1767 .mr(1)
1768 .nr(8)
1769 .kr(1)
1770 .sr(1)
1771 .m(1)
1772 .n(n)
1773 .k(k)
1774 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1775 }
1776 }
1777 }
1778
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_cn)1779 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
1780 TEST_REQUIRES_ARM_NEON_FMA;
1781 for (uint32_t n = 16; n <= 24; n += 8) {
1782 for (size_t k = 1; k <= 40; k += 9) {
1783 GemmMicrokernelTester()
1784 .mr(1)
1785 .nr(8)
1786 .kr(1)
1787 .sr(1)
1788 .m(1)
1789 .n(n)
1790 .k(k)
1791 .cn_stride(11)
1792 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1793 }
1794 }
1795 }
1796
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_subtile)1797 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
1798 TEST_REQUIRES_ARM_NEON_FMA;
1799 for (uint32_t n = 16; n <= 24; n += 8) {
1800 for (size_t k = 1; k <= 40; k += 9) {
1801 for (uint32_t m = 1; m <= 1; m++) {
1802 GemmMicrokernelTester()
1803 .mr(1)
1804 .nr(8)
1805 .kr(1)
1806 .sr(1)
1807 .m(m)
1808 .n(n)
1809 .k(k)
1810 .iterations(1)
1811 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1812 }
1813 }
1814 }
1815 }
1816
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,small_kernel)1817 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
1818 TEST_REQUIRES_ARM_NEON_FMA;
1819 for (size_t k = 1; k <= 40; k += 9) {
1820 GemmMicrokernelTester()
1821 .mr(1)
1822 .nr(8)
1823 .kr(1)
1824 .sr(1)
1825 .m(1)
1826 .n(8)
1827 .k(k)
1828 .ks(3)
1829 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1830 }
1831 }
1832
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,small_kernel_subtile)1833 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
1834 TEST_REQUIRES_ARM_NEON_FMA;
1835 for (size_t k = 1; k <= 40; k += 9) {
1836 for (uint32_t n = 1; n <= 8; n++) {
1837 for (uint32_t m = 1; m <= 1; m++) {
1838 GemmMicrokernelTester()
1839 .mr(1)
1840 .nr(8)
1841 .kr(1)
1842 .sr(1)
1843 .m(m)
1844 .n(n)
1845 .k(k)
1846 .ks(3)
1847 .iterations(1)
1848 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1849 }
1850 }
1851 }
1852 }
1853
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_small_kernel)1854 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_small_kernel) {
1855 TEST_REQUIRES_ARM_NEON_FMA;
1856 for (uint32_t n = 9; n < 16; n++) {
1857 for (size_t k = 1; k <= 40; k += 9) {
1858 GemmMicrokernelTester()
1859 .mr(1)
1860 .nr(8)
1861 .kr(1)
1862 .sr(1)
1863 .m(1)
1864 .n(n)
1865 .k(k)
1866 .ks(3)
1867 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1868 }
1869 }
1870 }
1871
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_small_kernel)1872 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_small_kernel) {
1873 TEST_REQUIRES_ARM_NEON_FMA;
1874 for (uint32_t n = 16; n <= 24; n += 8) {
1875 for (size_t k = 1; k <= 40; k += 9) {
1876 GemmMicrokernelTester()
1877 .mr(1)
1878 .nr(8)
1879 .kr(1)
1880 .sr(1)
1881 .m(1)
1882 .n(n)
1883 .k(k)
1884 .ks(3)
1885 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1886 }
1887 }
1888 }
1889
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)1890 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
1891 TEST_REQUIRES_ARM_NEON_FMA;
1892 for (size_t k = 1; k <= 40; k += 9) {
1893 for (uint32_t n = 1; n <= 8; n++) {
1894 for (uint32_t m = 1; m <= 1; m++) {
1895 GemmMicrokernelTester()
1896 .mr(1)
1897 .nr(8)
1898 .kr(1)
1899 .sr(1)
1900 .m(m)
1901 .n(n)
1902 .k(k)
1903 .cm_stride(11)
1904 .iterations(1)
1905 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1906 }
1907 }
1908 }
1909 }
1910
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,a_offset)1911 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
1912 TEST_REQUIRES_ARM_NEON_FMA;
1913 for (size_t k = 1; k <= 40; k += 9) {
1914 GemmMicrokernelTester()
1915 .mr(1)
1916 .nr(8)
1917 .kr(1)
1918 .sr(1)
1919 .m(1)
1920 .n(8)
1921 .k(k)
1922 .ks(3)
1923 .a_offset(43)
1924 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1925 }
1926 }
1927
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,zero)1928 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, zero) {
1929 TEST_REQUIRES_ARM_NEON_FMA;
1930 for (size_t k = 1; k <= 40; k += 9) {
1931 for (uint32_t mz = 0; mz < 1; mz++) {
1932 GemmMicrokernelTester()
1933 .mr(1)
1934 .nr(8)
1935 .kr(1)
1936 .sr(1)
1937 .m(1)
1938 .n(8)
1939 .k(k)
1940 .ks(3)
1941 .a_offset(43)
1942 .zero_index(mz)
1943 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1944 }
1945 }
1946 }
1947
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,qmin)1948 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
1949 TEST_REQUIRES_ARM_NEON_FMA;
1950 GemmMicrokernelTester()
1951 .mr(1)
1952 .nr(8)
1953 .kr(1)
1954 .sr(1)
1955 .m(1)
1956 .n(8)
1957 .k(8)
1958 .qmin(128)
1959 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1960 }
1961
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,qmax)1962 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
1963 TEST_REQUIRES_ARM_NEON_FMA;
1964 GemmMicrokernelTester()
1965 .mr(1)
1966 .nr(8)
1967 .kr(1)
1968 .sr(1)
1969 .m(1)
1970 .n(8)
1971 .k(8)
1972 .qmax(128)
1973 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1974 }
1975
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm)1976 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
1977 TEST_REQUIRES_ARM_NEON_FMA;
1978 GemmMicrokernelTester()
1979 .mr(1)
1980 .nr(8)
1981 .kr(1)
1982 .sr(1)
1983 .m(1)
1984 .n(8)
1985 .k(8)
1986 .cm_stride(11)
1987 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1988 }
1989 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1990
1991
1992 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)1993 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
1994 TEST_REQUIRES_ARM_NEON_FMA;
1995 GemmMicrokernelTester()
1996 .mr(1)
1997 .nr(8)
1998 .kr(1)
1999 .sr(1)
2000 .m(1)
2001 .n(8)
2002 .k(8)
2003 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2004 }
2005
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)2006 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
2007 TEST_REQUIRES_ARM_NEON_FMA;
2008 GemmMicrokernelTester()
2009 .mr(1)
2010 .nr(8)
2011 .kr(1)
2012 .sr(1)
2013 .m(1)
2014 .n(8)
2015 .k(8)
2016 .cn_stride(11)
2017 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2018 }
2019
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)2020 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
2021 TEST_REQUIRES_ARM_NEON_FMA;
2022 for (uint32_t n = 1; n <= 8; n++) {
2023 for (uint32_t m = 1; m <= 1; m++) {
2024 GemmMicrokernelTester()
2025 .mr(1)
2026 .nr(8)
2027 .kr(1)
2028 .sr(1)
2029 .m(m)
2030 .n(n)
2031 .k(8)
2032 .iterations(1)
2033 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2034 }
2035 }
2036 }
2037
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)2038 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
2039 TEST_REQUIRES_ARM_NEON_FMA;
2040 for (uint32_t m = 1; m <= 1; m++) {
2041 GemmMicrokernelTester()
2042 .mr(1)
2043 .nr(8)
2044 .kr(1)
2045 .sr(1)
2046 .m(m)
2047 .n(8)
2048 .k(8)
2049 .iterations(1)
2050 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2051 }
2052 }
2053
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)2054 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
2055 TEST_REQUIRES_ARM_NEON_FMA;
2056 for (uint32_t n = 1; n <= 8; n++) {
2057 GemmMicrokernelTester()
2058 .mr(1)
2059 .nr(8)
2060 .kr(1)
2061 .sr(1)
2062 .m(1)
2063 .n(n)
2064 .k(8)
2065 .iterations(1)
2066 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2067 }
2068 }
2069
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)2070 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
2071 TEST_REQUIRES_ARM_NEON_FMA;
2072 GemmMicrokernelTester()
2073 .mr(1)
2074 .nr(8)
2075 .kr(1)
2076 .sr(1)
2077 .m(1)
2078 .n(8)
2079 .k(16)
2080 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2081 }
2082
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)2083 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
2084 TEST_REQUIRES_ARM_NEON_FMA;
2085 for (uint32_t n = 1; n <= 8; n++) {
2086 for (uint32_t m = 1; m <= 1; m++) {
2087 GemmMicrokernelTester()
2088 .mr(1)
2089 .nr(8)
2090 .kr(1)
2091 .sr(1)
2092 .m(m)
2093 .n(n)
2094 .k(16)
2095 .iterations(1)
2096 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2097 }
2098 }
2099 }
2100
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)2101 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
2102 TEST_REQUIRES_ARM_NEON_FMA;
2103 for (size_t k = 1; k < 16; k++) {
2104 GemmMicrokernelTester()
2105 .mr(1)
2106 .nr(8)
2107 .kr(1)
2108 .sr(1)
2109 .m(1)
2110 .n(8)
2111 .k(k)
2112 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2113 }
2114 }
2115
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)2116 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
2117 TEST_REQUIRES_ARM_NEON_FMA;
2118 for (size_t k = 1; k < 16; k++) {
2119 for (uint32_t n = 1; n <= 8; n++) {
2120 for (uint32_t m = 1; m <= 1; m++) {
2121 GemmMicrokernelTester()
2122 .mr(1)
2123 .nr(8)
2124 .kr(1)
2125 .sr(1)
2126 .m(m)
2127 .n(n)
2128 .k(k)
2129 .iterations(1)
2130 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2131 }
2132 }
2133 }
2134 }
2135
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)2136 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
2137 TEST_REQUIRES_ARM_NEON_FMA;
2138 for (size_t k = 17; k < 32; k++) {
2139 GemmMicrokernelTester()
2140 .mr(1)
2141 .nr(8)
2142 .kr(1)
2143 .sr(1)
2144 .m(1)
2145 .n(8)
2146 .k(k)
2147 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2148 }
2149 }
2150
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)2151 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
2152 TEST_REQUIRES_ARM_NEON_FMA;
2153 for (size_t k = 17; k < 32; k++) {
2154 for (uint32_t n = 1; n <= 8; n++) {
2155 for (uint32_t m = 1; m <= 1; m++) {
2156 GemmMicrokernelTester()
2157 .mr(1)
2158 .nr(8)
2159 .kr(1)
2160 .sr(1)
2161 .m(m)
2162 .n(n)
2163 .k(k)
2164 .iterations(1)
2165 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2166 }
2167 }
2168 }
2169 }
2170
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)2171 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
2172 TEST_REQUIRES_ARM_NEON_FMA;
2173 for (size_t k = 24; k <= 80; k += 8) {
2174 GemmMicrokernelTester()
2175 .mr(1)
2176 .nr(8)
2177 .kr(1)
2178 .sr(1)
2179 .m(1)
2180 .n(8)
2181 .k(k)
2182 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2183 }
2184 }
2185
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)2186 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
2187 TEST_REQUIRES_ARM_NEON_FMA;
2188 for (size_t k = 24; k <= 80; k += 8) {
2189 for (uint32_t n = 1; n <= 8; n++) {
2190 for (uint32_t m = 1; m <= 1; m++) {
2191 GemmMicrokernelTester()
2192 .mr(1)
2193 .nr(8)
2194 .kr(1)
2195 .sr(1)
2196 .m(m)
2197 .n(n)
2198 .k(k)
2199 .iterations(1)
2200 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2201 }
2202 }
2203 }
2204 }
2205
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)2206 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
2207 TEST_REQUIRES_ARM_NEON_FMA;
2208 for (uint32_t n = 9; n < 16; n++) {
2209 for (size_t k = 1; k <= 40; k += 9) {
2210 GemmMicrokernelTester()
2211 .mr(1)
2212 .nr(8)
2213 .kr(1)
2214 .sr(1)
2215 .m(1)
2216 .n(n)
2217 .k(k)
2218 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2219 }
2220 }
2221 }
2222
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)2223 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
2224 TEST_REQUIRES_ARM_NEON_FMA;
2225 for (uint32_t n = 9; n < 16; n++) {
2226 for (size_t k = 1; k <= 40; k += 9) {
2227 GemmMicrokernelTester()
2228 .mr(1)
2229 .nr(8)
2230 .kr(1)
2231 .sr(1)
2232 .m(1)
2233 .n(n)
2234 .k(k)
2235 .cn_stride(11)
2236 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2237 }
2238 }
2239 }
2240
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)2241 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
2242 TEST_REQUIRES_ARM_NEON_FMA;
2243 for (uint32_t n = 9; n < 16; n++) {
2244 for (size_t k = 1; k <= 40; k += 9) {
2245 for (uint32_t m = 1; m <= 1; m++) {
2246 GemmMicrokernelTester()
2247 .mr(1)
2248 .nr(8)
2249 .kr(1)
2250 .sr(1)
2251 .m(m)
2252 .n(n)
2253 .k(k)
2254 .iterations(1)
2255 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2256 }
2257 }
2258 }
2259 }
2260
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)2261 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
2262 TEST_REQUIRES_ARM_NEON_FMA;
2263 for (uint32_t n = 16; n <= 24; n += 8) {
2264 for (size_t k = 1; k <= 40; k += 9) {
2265 GemmMicrokernelTester()
2266 .mr(1)
2267 .nr(8)
2268 .kr(1)
2269 .sr(1)
2270 .m(1)
2271 .n(n)
2272 .k(k)
2273 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2274 }
2275 }
2276 }
2277
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)2278 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
2279 TEST_REQUIRES_ARM_NEON_FMA;
2280 for (uint32_t n = 16; n <= 24; n += 8) {
2281 for (size_t k = 1; k <= 40; k += 9) {
2282 GemmMicrokernelTester()
2283 .mr(1)
2284 .nr(8)
2285 .kr(1)
2286 .sr(1)
2287 .m(1)
2288 .n(n)
2289 .k(k)
2290 .cn_stride(11)
2291 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2292 }
2293 }
2294 }
2295
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)2296 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
2297 TEST_REQUIRES_ARM_NEON_FMA;
2298 for (uint32_t n = 16; n <= 24; n += 8) {
2299 for (size_t k = 1; k <= 40; k += 9) {
2300 for (uint32_t m = 1; m <= 1; m++) {
2301 GemmMicrokernelTester()
2302 .mr(1)
2303 .nr(8)
2304 .kr(1)
2305 .sr(1)
2306 .m(m)
2307 .n(n)
2308 .k(k)
2309 .iterations(1)
2310 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2311 }
2312 }
2313 }
2314 }
2315
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)2316 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
2317 TEST_REQUIRES_ARM_NEON_FMA;
2318 for (size_t k = 1; k <= 40; k += 9) {
2319 GemmMicrokernelTester()
2320 .mr(1)
2321 .nr(8)
2322 .kr(1)
2323 .sr(1)
2324 .m(1)
2325 .n(8)
2326 .k(k)
2327 .ks(3)
2328 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2329 }
2330 }
2331
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)2332 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
2333 TEST_REQUIRES_ARM_NEON_FMA;
2334 for (size_t k = 1; k <= 40; k += 9) {
2335 for (uint32_t n = 1; n <= 8; n++) {
2336 for (uint32_t m = 1; m <= 1; m++) {
2337 GemmMicrokernelTester()
2338 .mr(1)
2339 .nr(8)
2340 .kr(1)
2341 .sr(1)
2342 .m(m)
2343 .n(n)
2344 .k(k)
2345 .ks(3)
2346 .iterations(1)
2347 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2348 }
2349 }
2350 }
2351 }
2352
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_small_kernel)2353 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
2354 TEST_REQUIRES_ARM_NEON_FMA;
2355 for (uint32_t n = 9; n < 16; n++) {
2356 for (size_t k = 1; k <= 40; k += 9) {
2357 GemmMicrokernelTester()
2358 .mr(1)
2359 .nr(8)
2360 .kr(1)
2361 .sr(1)
2362 .m(1)
2363 .n(n)
2364 .k(k)
2365 .ks(3)
2366 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2367 }
2368 }
2369 }
2370
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_small_kernel)2371 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
2372 TEST_REQUIRES_ARM_NEON_FMA;
2373 for (uint32_t n = 16; n <= 24; n += 8) {
2374 for (size_t k = 1; k <= 40; k += 9) {
2375 GemmMicrokernelTester()
2376 .mr(1)
2377 .nr(8)
2378 .kr(1)
2379 .sr(1)
2380 .m(1)
2381 .n(n)
2382 .k(k)
2383 .ks(3)
2384 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2385 }
2386 }
2387 }
2388
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)2389 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
2390 TEST_REQUIRES_ARM_NEON_FMA;
2391 for (size_t k = 1; k <= 40; k += 9) {
2392 for (uint32_t n = 1; n <= 8; n++) {
2393 for (uint32_t m = 1; m <= 1; m++) {
2394 GemmMicrokernelTester()
2395 .mr(1)
2396 .nr(8)
2397 .kr(1)
2398 .sr(1)
2399 .m(m)
2400 .n(n)
2401 .k(k)
2402 .cm_stride(11)
2403 .iterations(1)
2404 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2405 }
2406 }
2407 }
2408 }
2409
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)2410 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
2411 TEST_REQUIRES_ARM_NEON_FMA;
2412 for (size_t k = 1; k <= 40; k += 9) {
2413 GemmMicrokernelTester()
2414 .mr(1)
2415 .nr(8)
2416 .kr(1)
2417 .sr(1)
2418 .m(1)
2419 .n(8)
2420 .k(k)
2421 .ks(3)
2422 .a_offset(43)
2423 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2424 }
2425 }
2426
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)2427 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
2428 TEST_REQUIRES_ARM_NEON_FMA;
2429 for (size_t k = 1; k <= 40; k += 9) {
2430 for (uint32_t mz = 0; mz < 1; mz++) {
2431 GemmMicrokernelTester()
2432 .mr(1)
2433 .nr(8)
2434 .kr(1)
2435 .sr(1)
2436 .m(1)
2437 .n(8)
2438 .k(k)
2439 .ks(3)
2440 .a_offset(43)
2441 .zero_index(mz)
2442 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2443 }
2444 }
2445 }
2446
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)2447 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
2448 TEST_REQUIRES_ARM_NEON_FMA;
2449 GemmMicrokernelTester()
2450 .mr(1)
2451 .nr(8)
2452 .kr(1)
2453 .sr(1)
2454 .m(1)
2455 .n(8)
2456 .k(8)
2457 .qmin(128)
2458 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2459 }
2460
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)2461 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
2462 TEST_REQUIRES_ARM_NEON_FMA;
2463 GemmMicrokernelTester()
2464 .mr(1)
2465 .nr(8)
2466 .kr(1)
2467 .sr(1)
2468 .m(1)
2469 .n(8)
2470 .k(8)
2471 .qmax(128)
2472 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2473 }
2474
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)2475 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
2476 TEST_REQUIRES_ARM_NEON_FMA;
2477 GemmMicrokernelTester()
2478 .mr(1)
2479 .nr(8)
2480 .kr(1)
2481 .sr(1)
2482 .m(1)
2483 .n(8)
2484 .k(8)
2485 .cm_stride(11)
2486 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2487 }
2488 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2489
2490
2491 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)2492 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
2493 TEST_REQUIRES_ARM_NEON_FMA;
2494 GemmMicrokernelTester()
2495 .mr(1)
2496 .nr(12)
2497 .kr(1)
2498 .sr(1)
2499 .m(1)
2500 .n(12)
2501 .k(4)
2502 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2503 }
2504
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,strided_cn)2505 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
2506 TEST_REQUIRES_ARM_NEON_FMA;
2507 GemmMicrokernelTester()
2508 .mr(1)
2509 .nr(12)
2510 .kr(1)
2511 .sr(1)
2512 .m(1)
2513 .n(12)
2514 .k(4)
2515 .cn_stride(17)
2516 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2517 }
2518
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)2519 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
2520 TEST_REQUIRES_ARM_NEON_FMA;
2521 for (uint32_t n = 1; n <= 12; n++) {
2522 for (uint32_t m = 1; m <= 1; m++) {
2523 GemmMicrokernelTester()
2524 .mr(1)
2525 .nr(12)
2526 .kr(1)
2527 .sr(1)
2528 .m(m)
2529 .n(n)
2530 .k(4)
2531 .iterations(1)
2532 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2533 }
2534 }
2535 }
2536
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)2537 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
2538 TEST_REQUIRES_ARM_NEON_FMA;
2539 for (uint32_t m = 1; m <= 1; m++) {
2540 GemmMicrokernelTester()
2541 .mr(1)
2542 .nr(12)
2543 .kr(1)
2544 .sr(1)
2545 .m(m)
2546 .n(12)
2547 .k(4)
2548 .iterations(1)
2549 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2550 }
2551 }
2552
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)2553 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
2554 TEST_REQUIRES_ARM_NEON_FMA;
2555 for (uint32_t n = 1; n <= 12; n++) {
2556 GemmMicrokernelTester()
2557 .mr(1)
2558 .nr(12)
2559 .kr(1)
2560 .sr(1)
2561 .m(1)
2562 .n(n)
2563 .k(4)
2564 .iterations(1)
2565 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2566 }
2567 }
2568
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)2569 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
2570 TEST_REQUIRES_ARM_NEON_FMA;
2571 GemmMicrokernelTester()
2572 .mr(1)
2573 .nr(12)
2574 .kr(1)
2575 .sr(1)
2576 .m(1)
2577 .n(12)
2578 .k(8)
2579 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2580 }
2581
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)2582 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
2583 TEST_REQUIRES_ARM_NEON_FMA;
2584 for (uint32_t n = 1; n <= 12; n++) {
2585 for (uint32_t m = 1; m <= 1; m++) {
2586 GemmMicrokernelTester()
2587 .mr(1)
2588 .nr(12)
2589 .kr(1)
2590 .sr(1)
2591 .m(m)
2592 .n(n)
2593 .k(8)
2594 .iterations(1)
2595 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2596 }
2597 }
2598 }
2599
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)2600 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
2601 TEST_REQUIRES_ARM_NEON_FMA;
2602 for (size_t k = 1; k < 8; k++) {
2603 GemmMicrokernelTester()
2604 .mr(1)
2605 .nr(12)
2606 .kr(1)
2607 .sr(1)
2608 .m(1)
2609 .n(12)
2610 .k(k)
2611 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2612 }
2613 }
2614
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)2615 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
2616 TEST_REQUIRES_ARM_NEON_FMA;
2617 for (size_t k = 1; k < 8; k++) {
2618 for (uint32_t n = 1; n <= 12; n++) {
2619 for (uint32_t m = 1; m <= 1; m++) {
2620 GemmMicrokernelTester()
2621 .mr(1)
2622 .nr(12)
2623 .kr(1)
2624 .sr(1)
2625 .m(m)
2626 .n(n)
2627 .k(k)
2628 .iterations(1)
2629 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2630 }
2631 }
2632 }
2633 }
2634
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)2635 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
2636 TEST_REQUIRES_ARM_NEON_FMA;
2637 for (size_t k = 9; k < 16; k++) {
2638 GemmMicrokernelTester()
2639 .mr(1)
2640 .nr(12)
2641 .kr(1)
2642 .sr(1)
2643 .m(1)
2644 .n(12)
2645 .k(k)
2646 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2647 }
2648 }
2649
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)2650 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
2651 TEST_REQUIRES_ARM_NEON_FMA;
2652 for (size_t k = 9; k < 16; k++) {
2653 for (uint32_t n = 1; n <= 12; n++) {
2654 for (uint32_t m = 1; m <= 1; m++) {
2655 GemmMicrokernelTester()
2656 .mr(1)
2657 .nr(12)
2658 .kr(1)
2659 .sr(1)
2660 .m(m)
2661 .n(n)
2662 .k(k)
2663 .iterations(1)
2664 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2665 }
2666 }
2667 }
2668 }
2669
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4)2670 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
2671 TEST_REQUIRES_ARM_NEON_FMA;
2672 for (size_t k = 12; k <= 40; k += 4) {
2673 GemmMicrokernelTester()
2674 .mr(1)
2675 .nr(12)
2676 .kr(1)
2677 .sr(1)
2678 .m(1)
2679 .n(12)
2680 .k(k)
2681 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2682 }
2683 }
2684
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)2685 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
2686 TEST_REQUIRES_ARM_NEON_FMA;
2687 for (size_t k = 12; k <= 40; k += 4) {
2688 for (uint32_t n = 1; n <= 12; n++) {
2689 for (uint32_t m = 1; m <= 1; m++) {
2690 GemmMicrokernelTester()
2691 .mr(1)
2692 .nr(12)
2693 .kr(1)
2694 .sr(1)
2695 .m(m)
2696 .n(n)
2697 .k(k)
2698 .iterations(1)
2699 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2700 }
2701 }
2702 }
2703 }
2704
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12)2705 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
2706 TEST_REQUIRES_ARM_NEON_FMA;
2707 for (uint32_t n = 13; n < 24; n++) {
2708 for (size_t k = 1; k <= 20; k += 5) {
2709 GemmMicrokernelTester()
2710 .mr(1)
2711 .nr(12)
2712 .kr(1)
2713 .sr(1)
2714 .m(1)
2715 .n(n)
2716 .k(k)
2717 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2718 }
2719 }
2720 }
2721
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_strided_cn)2722 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
2723 TEST_REQUIRES_ARM_NEON_FMA;
2724 for (uint32_t n = 13; n < 24; n++) {
2725 for (size_t k = 1; k <= 20; k += 5) {
2726 GemmMicrokernelTester()
2727 .mr(1)
2728 .nr(12)
2729 .kr(1)
2730 .sr(1)
2731 .m(1)
2732 .n(n)
2733 .k(k)
2734 .cn_stride(17)
2735 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2736 }
2737 }
2738 }
2739
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_subtile)2740 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
2741 TEST_REQUIRES_ARM_NEON_FMA;
2742 for (uint32_t n = 13; n < 24; n++) {
2743 for (size_t k = 1; k <= 20; k += 5) {
2744 for (uint32_t m = 1; m <= 1; m++) {
2745 GemmMicrokernelTester()
2746 .mr(1)
2747 .nr(12)
2748 .kr(1)
2749 .sr(1)
2750 .m(m)
2751 .n(n)
2752 .k(k)
2753 .iterations(1)
2754 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2755 }
2756 }
2757 }
2758 }
2759
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12)2760 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
2761 TEST_REQUIRES_ARM_NEON_FMA;
2762 for (uint32_t n = 24; n <= 36; n += 12) {
2763 for (size_t k = 1; k <= 20; k += 5) {
2764 GemmMicrokernelTester()
2765 .mr(1)
2766 .nr(12)
2767 .kr(1)
2768 .sr(1)
2769 .m(1)
2770 .n(n)
2771 .k(k)
2772 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2773 }
2774 }
2775 }
2776
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_strided_cn)2777 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
2778 TEST_REQUIRES_ARM_NEON_FMA;
2779 for (uint32_t n = 24; n <= 36; n += 12) {
2780 for (size_t k = 1; k <= 20; k += 5) {
2781 GemmMicrokernelTester()
2782 .mr(1)
2783 .nr(12)
2784 .kr(1)
2785 .sr(1)
2786 .m(1)
2787 .n(n)
2788 .k(k)
2789 .cn_stride(17)
2790 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2791 }
2792 }
2793 }
2794
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_subtile)2795 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
2796 TEST_REQUIRES_ARM_NEON_FMA;
2797 for (uint32_t n = 24; n <= 36; n += 12) {
2798 for (size_t k = 1; k <= 20; k += 5) {
2799 for (uint32_t m = 1; m <= 1; m++) {
2800 GemmMicrokernelTester()
2801 .mr(1)
2802 .nr(12)
2803 .kr(1)
2804 .sr(1)
2805 .m(m)
2806 .n(n)
2807 .k(k)
2808 .iterations(1)
2809 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2810 }
2811 }
2812 }
2813 }
2814
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,small_kernel)2815 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
2816 TEST_REQUIRES_ARM_NEON_FMA;
2817 for (size_t k = 1; k <= 20; k += 5) {
2818 GemmMicrokernelTester()
2819 .mr(1)
2820 .nr(12)
2821 .kr(1)
2822 .sr(1)
2823 .m(1)
2824 .n(12)
2825 .k(k)
2826 .ks(3)
2827 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2828 }
2829 }
2830
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,small_kernel_subtile)2831 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
2832 TEST_REQUIRES_ARM_NEON_FMA;
2833 for (size_t k = 1; k <= 20; k += 5) {
2834 for (uint32_t n = 1; n <= 12; n++) {
2835 for (uint32_t m = 1; m <= 1; m++) {
2836 GemmMicrokernelTester()
2837 .mr(1)
2838 .nr(12)
2839 .kr(1)
2840 .sr(1)
2841 .m(m)
2842 .n(n)
2843 .k(k)
2844 .ks(3)
2845 .iterations(1)
2846 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2847 }
2848 }
2849 }
2850 }
2851
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_small_kernel)2852 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_small_kernel) {
2853 TEST_REQUIRES_ARM_NEON_FMA;
2854 for (uint32_t n = 13; n < 24; n++) {
2855 for (size_t k = 1; k <= 20; k += 5) {
2856 GemmMicrokernelTester()
2857 .mr(1)
2858 .nr(12)
2859 .kr(1)
2860 .sr(1)
2861 .m(1)
2862 .n(n)
2863 .k(k)
2864 .ks(3)
2865 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2866 }
2867 }
2868 }
2869
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_small_kernel)2870 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_small_kernel) {
2871 TEST_REQUIRES_ARM_NEON_FMA;
2872 for (uint32_t n = 24; n <= 36; n += 12) {
2873 for (size_t k = 1; k <= 20; k += 5) {
2874 GemmMicrokernelTester()
2875 .mr(1)
2876 .nr(12)
2877 .kr(1)
2878 .sr(1)
2879 .m(1)
2880 .n(n)
2881 .k(k)
2882 .ks(3)
2883 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2884 }
2885 }
2886 }
2887
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)2888 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
2889 TEST_REQUIRES_ARM_NEON_FMA;
2890 for (size_t k = 1; k <= 20; k += 5) {
2891 for (uint32_t n = 1; n <= 12; n++) {
2892 for (uint32_t m = 1; m <= 1; m++) {
2893 GemmMicrokernelTester()
2894 .mr(1)
2895 .nr(12)
2896 .kr(1)
2897 .sr(1)
2898 .m(m)
2899 .n(n)
2900 .k(k)
2901 .cm_stride(17)
2902 .iterations(1)
2903 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2904 }
2905 }
2906 }
2907 }
2908
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,a_offset)2909 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
2910 TEST_REQUIRES_ARM_NEON_FMA;
2911 for (size_t k = 1; k <= 20; k += 5) {
2912 GemmMicrokernelTester()
2913 .mr(1)
2914 .nr(12)
2915 .kr(1)
2916 .sr(1)
2917 .m(1)
2918 .n(12)
2919 .k(k)
2920 .ks(3)
2921 .a_offset(23)
2922 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2923 }
2924 }
2925
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,zero)2926 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, zero) {
2927 TEST_REQUIRES_ARM_NEON_FMA;
2928 for (size_t k = 1; k <= 20; k += 5) {
2929 for (uint32_t mz = 0; mz < 1; mz++) {
2930 GemmMicrokernelTester()
2931 .mr(1)
2932 .nr(12)
2933 .kr(1)
2934 .sr(1)
2935 .m(1)
2936 .n(12)
2937 .k(k)
2938 .ks(3)
2939 .a_offset(23)
2940 .zero_index(mz)
2941 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2942 }
2943 }
2944 }
2945
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,qmin)2946 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
2947 TEST_REQUIRES_ARM_NEON_FMA;
2948 GemmMicrokernelTester()
2949 .mr(1)
2950 .nr(12)
2951 .kr(1)
2952 .sr(1)
2953 .m(1)
2954 .n(12)
2955 .k(4)
2956 .qmin(128)
2957 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2958 }
2959
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,qmax)2960 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
2961 TEST_REQUIRES_ARM_NEON_FMA;
2962 GemmMicrokernelTester()
2963 .mr(1)
2964 .nr(12)
2965 .kr(1)
2966 .sr(1)
2967 .m(1)
2968 .n(12)
2969 .k(4)
2970 .qmax(128)
2971 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2972 }
2973
TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm)2974 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
2975 TEST_REQUIRES_ARM_NEON_FMA;
2976 GemmMicrokernelTester()
2977 .mr(1)
2978 .nr(12)
2979 .kr(1)
2980 .sr(1)
2981 .m(1)
2982 .n(12)
2983 .k(4)
2984 .cm_stride(17)
2985 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2986 }
2987 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2988
2989
2990 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)2991 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
2992 TEST_REQUIRES_ARM_NEON_FMA;
2993 GemmMicrokernelTester()
2994 .mr(4)
2995 .nr(2)
2996 .kr(1)
2997 .sr(1)
2998 .m(4)
2999 .n(2)
3000 .k(8)
3001 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3002 }
3003
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,strided_cn)3004 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
3005 TEST_REQUIRES_ARM_NEON_FMA;
3006 GemmMicrokernelTester()
3007 .mr(4)
3008 .nr(2)
3009 .kr(1)
3010 .sr(1)
3011 .m(4)
3012 .n(2)
3013 .k(8)
3014 .cn_stride(5)
3015 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3016 }
3017
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)3018 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
3019 TEST_REQUIRES_ARM_NEON_FMA;
3020 for (uint32_t n = 1; n <= 2; n++) {
3021 for (uint32_t m = 1; m <= 4; m++) {
3022 GemmMicrokernelTester()
3023 .mr(4)
3024 .nr(2)
3025 .kr(1)
3026 .sr(1)
3027 .m(m)
3028 .n(n)
3029 .k(8)
3030 .iterations(1)
3031 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3032 }
3033 }
3034 }
3035
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)3036 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
3037 TEST_REQUIRES_ARM_NEON_FMA;
3038 for (uint32_t m = 1; m <= 4; m++) {
3039 GemmMicrokernelTester()
3040 .mr(4)
3041 .nr(2)
3042 .kr(1)
3043 .sr(1)
3044 .m(m)
3045 .n(2)
3046 .k(8)
3047 .iterations(1)
3048 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3049 }
3050 }
3051
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)3052 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
3053 TEST_REQUIRES_ARM_NEON_FMA;
3054 for (uint32_t n = 1; n <= 2; n++) {
3055 GemmMicrokernelTester()
3056 .mr(4)
3057 .nr(2)
3058 .kr(1)
3059 .sr(1)
3060 .m(4)
3061 .n(n)
3062 .k(8)
3063 .iterations(1)
3064 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3065 }
3066 }
3067
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)3068 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
3069 TEST_REQUIRES_ARM_NEON_FMA;
3070 GemmMicrokernelTester()
3071 .mr(4)
3072 .nr(2)
3073 .kr(1)
3074 .sr(1)
3075 .m(4)
3076 .n(2)
3077 .k(16)
3078 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3079 }
3080
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)3081 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
3082 TEST_REQUIRES_ARM_NEON_FMA;
3083 for (uint32_t n = 1; n <= 2; n++) {
3084 for (uint32_t m = 1; m <= 4; m++) {
3085 GemmMicrokernelTester()
3086 .mr(4)
3087 .nr(2)
3088 .kr(1)
3089 .sr(1)
3090 .m(m)
3091 .n(n)
3092 .k(16)
3093 .iterations(1)
3094 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3095 }
3096 }
3097 }
3098
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)3099 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
3100 TEST_REQUIRES_ARM_NEON_FMA;
3101 for (size_t k = 1; k < 16; k++) {
3102 GemmMicrokernelTester()
3103 .mr(4)
3104 .nr(2)
3105 .kr(1)
3106 .sr(1)
3107 .m(4)
3108 .n(2)
3109 .k(k)
3110 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3111 }
3112 }
3113
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)3114 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
3115 TEST_REQUIRES_ARM_NEON_FMA;
3116 for (size_t k = 1; k < 16; k++) {
3117 for (uint32_t n = 1; n <= 2; n++) {
3118 for (uint32_t m = 1; m <= 4; m++) {
3119 GemmMicrokernelTester()
3120 .mr(4)
3121 .nr(2)
3122 .kr(1)
3123 .sr(1)
3124 .m(m)
3125 .n(n)
3126 .k(k)
3127 .iterations(1)
3128 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3129 }
3130 }
3131 }
3132 }
3133
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)3134 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
3135 TEST_REQUIRES_ARM_NEON_FMA;
3136 for (size_t k = 17; k < 32; k++) {
3137 GemmMicrokernelTester()
3138 .mr(4)
3139 .nr(2)
3140 .kr(1)
3141 .sr(1)
3142 .m(4)
3143 .n(2)
3144 .k(k)
3145 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3146 }
3147 }
3148
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)3149 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
3150 TEST_REQUIRES_ARM_NEON_FMA;
3151 for (size_t k = 17; k < 32; k++) {
3152 for (uint32_t n = 1; n <= 2; n++) {
3153 for (uint32_t m = 1; m <= 4; m++) {
3154 GemmMicrokernelTester()
3155 .mr(4)
3156 .nr(2)
3157 .kr(1)
3158 .sr(1)
3159 .m(m)
3160 .n(n)
3161 .k(k)
3162 .iterations(1)
3163 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3164 }
3165 }
3166 }
3167 }
3168
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_div_8)3169 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
3170 TEST_REQUIRES_ARM_NEON_FMA;
3171 for (size_t k = 24; k <= 80; k += 8) {
3172 GemmMicrokernelTester()
3173 .mr(4)
3174 .nr(2)
3175 .kr(1)
3176 .sr(1)
3177 .m(4)
3178 .n(2)
3179 .k(k)
3180 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3181 }
3182 }
3183
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)3184 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
3185 TEST_REQUIRES_ARM_NEON_FMA;
3186 for (size_t k = 24; k <= 80; k += 8) {
3187 for (uint32_t n = 1; n <= 2; n++) {
3188 for (uint32_t m = 1; m <= 4; m++) {
3189 GemmMicrokernelTester()
3190 .mr(4)
3191 .nr(2)
3192 .kr(1)
3193 .sr(1)
3194 .m(m)
3195 .n(n)
3196 .k(k)
3197 .iterations(1)
3198 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3199 }
3200 }
3201 }
3202 }
3203
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_gt_2)3204 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_gt_2) {
3205 TEST_REQUIRES_ARM_NEON_FMA;
3206 for (uint32_t n = 3; n < 4; n++) {
3207 for (size_t k = 1; k <= 40; k += 9) {
3208 GemmMicrokernelTester()
3209 .mr(4)
3210 .nr(2)
3211 .kr(1)
3212 .sr(1)
3213 .m(4)
3214 .n(n)
3215 .k(k)
3216 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3217 }
3218 }
3219 }
3220
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_gt_2_strided_cn)3221 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_gt_2_strided_cn) {
3222 TEST_REQUIRES_ARM_NEON_FMA;
3223 for (uint32_t n = 3; n < 4; n++) {
3224 for (size_t k = 1; k <= 40; k += 9) {
3225 GemmMicrokernelTester()
3226 .mr(4)
3227 .nr(2)
3228 .kr(1)
3229 .sr(1)
3230 .m(4)
3231 .n(n)
3232 .k(k)
3233 .cn_stride(5)
3234 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3235 }
3236 }
3237 }
3238
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_gt_2_subtile)3239 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_gt_2_subtile) {
3240 TEST_REQUIRES_ARM_NEON_FMA;
3241 for (uint32_t n = 3; n < 4; n++) {
3242 for (size_t k = 1; k <= 40; k += 9) {
3243 for (uint32_t m = 1; m <= 4; m++) {
3244 GemmMicrokernelTester()
3245 .mr(4)
3246 .nr(2)
3247 .kr(1)
3248 .sr(1)
3249 .m(m)
3250 .n(n)
3251 .k(k)
3252 .iterations(1)
3253 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3254 }
3255 }
3256 }
3257 }
3258
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_div_2)3259 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_div_2) {
3260 TEST_REQUIRES_ARM_NEON_FMA;
3261 for (uint32_t n = 4; n <= 6; n += 2) {
3262 for (size_t k = 1; k <= 40; k += 9) {
3263 GemmMicrokernelTester()
3264 .mr(4)
3265 .nr(2)
3266 .kr(1)
3267 .sr(1)
3268 .m(4)
3269 .n(n)
3270 .k(k)
3271 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3272 }
3273 }
3274 }
3275
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_div_2_strided_cn)3276 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_div_2_strided_cn) {
3277 TEST_REQUIRES_ARM_NEON_FMA;
3278 for (uint32_t n = 4; n <= 6; n += 2) {
3279 for (size_t k = 1; k <= 40; k += 9) {
3280 GemmMicrokernelTester()
3281 .mr(4)
3282 .nr(2)
3283 .kr(1)
3284 .sr(1)
3285 .m(4)
3286 .n(n)
3287 .k(k)
3288 .cn_stride(5)
3289 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3290 }
3291 }
3292 }
3293
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_div_2_subtile)3294 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_div_2_subtile) {
3295 TEST_REQUIRES_ARM_NEON_FMA;
3296 for (uint32_t n = 4; n <= 6; n += 2) {
3297 for (size_t k = 1; k <= 40; k += 9) {
3298 for (uint32_t m = 1; m <= 4; m++) {
3299 GemmMicrokernelTester()
3300 .mr(4)
3301 .nr(2)
3302 .kr(1)
3303 .sr(1)
3304 .m(m)
3305 .n(n)
3306 .k(k)
3307 .iterations(1)
3308 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3309 }
3310 }
3311 }
3312 }
3313
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,small_kernel)3314 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
3315 TEST_REQUIRES_ARM_NEON_FMA;
3316 for (size_t k = 1; k <= 40; k += 9) {
3317 GemmMicrokernelTester()
3318 .mr(4)
3319 .nr(2)
3320 .kr(1)
3321 .sr(1)
3322 .m(4)
3323 .n(2)
3324 .k(k)
3325 .ks(3)
3326 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3327 }
3328 }
3329
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)3330 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
3331 TEST_REQUIRES_ARM_NEON_FMA;
3332 for (size_t k = 1; k <= 40; k += 9) {
3333 for (uint32_t n = 1; n <= 2; n++) {
3334 for (uint32_t m = 1; m <= 4; m++) {
3335 GemmMicrokernelTester()
3336 .mr(4)
3337 .nr(2)
3338 .kr(1)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(k)
3343 .ks(3)
3344 .iterations(1)
3345 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3346 }
3347 }
3348 }
3349 }
3350
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_gt_2_small_kernel)3351 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_gt_2_small_kernel) {
3352 TEST_REQUIRES_ARM_NEON_FMA;
3353 for (uint32_t n = 3; n < 4; n++) {
3354 for (size_t k = 1; k <= 40; k += 9) {
3355 GemmMicrokernelTester()
3356 .mr(4)
3357 .nr(2)
3358 .kr(1)
3359 .sr(1)
3360 .m(4)
3361 .n(n)
3362 .k(k)
3363 .ks(3)
3364 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3365 }
3366 }
3367 }
3368
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,n_div_2_small_kernel)3369 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, n_div_2_small_kernel) {
3370 TEST_REQUIRES_ARM_NEON_FMA;
3371 for (uint32_t n = 4; n <= 6; n += 2) {
3372 for (size_t k = 1; k <= 40; k += 9) {
3373 GemmMicrokernelTester()
3374 .mr(4)
3375 .nr(2)
3376 .kr(1)
3377 .sr(1)
3378 .m(4)
3379 .n(n)
3380 .k(k)
3381 .ks(3)
3382 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3383 }
3384 }
3385 }
3386
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)3387 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
3388 TEST_REQUIRES_ARM_NEON_FMA;
3389 for (size_t k = 1; k <= 40; k += 9) {
3390 for (uint32_t n = 1; n <= 2; n++) {
3391 for (uint32_t m = 1; m <= 4; m++) {
3392 GemmMicrokernelTester()
3393 .mr(4)
3394 .nr(2)
3395 .kr(1)
3396 .sr(1)
3397 .m(m)
3398 .n(n)
3399 .k(k)
3400 .cm_stride(5)
3401 .iterations(1)
3402 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3403 }
3404 }
3405 }
3406 }
3407
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,a_offset)3408 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
3409 TEST_REQUIRES_ARM_NEON_FMA;
3410 for (size_t k = 1; k <= 40; k += 9) {
3411 GemmMicrokernelTester()
3412 .mr(4)
3413 .nr(2)
3414 .kr(1)
3415 .sr(1)
3416 .m(4)
3417 .n(2)
3418 .k(k)
3419 .ks(3)
3420 .a_offset(163)
3421 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3422 }
3423 }
3424
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,zero)3425 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, zero) {
3426 TEST_REQUIRES_ARM_NEON_FMA;
3427 for (size_t k = 1; k <= 40; k += 9) {
3428 for (uint32_t mz = 0; mz < 4; mz++) {
3429 GemmMicrokernelTester()
3430 .mr(4)
3431 .nr(2)
3432 .kr(1)
3433 .sr(1)
3434 .m(4)
3435 .n(2)
3436 .k(k)
3437 .ks(3)
3438 .a_offset(163)
3439 .zero_index(mz)
3440 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3441 }
3442 }
3443 }
3444
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,qmin)3445 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, qmin) {
3446 TEST_REQUIRES_ARM_NEON_FMA;
3447 GemmMicrokernelTester()
3448 .mr(4)
3449 .nr(2)
3450 .kr(1)
3451 .sr(1)
3452 .m(4)
3453 .n(2)
3454 .k(8)
3455 .qmin(128)
3456 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3457 }
3458
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,qmax)3459 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, qmax) {
3460 TEST_REQUIRES_ARM_NEON_FMA;
3461 GemmMicrokernelTester()
3462 .mr(4)
3463 .nr(2)
3464 .kr(1)
3465 .sr(1)
3466 .m(4)
3467 .n(2)
3468 .k(8)
3469 .qmax(128)
3470 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3471 }
3472
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75,strided_cm)3473 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
3474 TEST_REQUIRES_ARM_NEON_FMA;
3475 GemmMicrokernelTester()
3476 .mr(4)
3477 .nr(2)
3478 .kr(1)
3479 .sr(1)
3480 .m(4)
3481 .n(2)
3482 .k(8)
3483 .cm_stride(5)
3484 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
3485 }
3486 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3487
3488
3489 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4)3490 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
3491 TEST_REQUIRES_ARM_NEON_FMA;
3492 GemmMicrokernelTester()
3493 .mr(4)
3494 .nr(8)
3495 .kr(1)
3496 .sr(1)
3497 .m(4)
3498 .n(8)
3499 .k(4)
3500 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3501 }
3502
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,strided_cn)3503 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
3504 TEST_REQUIRES_ARM_NEON_FMA;
3505 GemmMicrokernelTester()
3506 .mr(4)
3507 .nr(8)
3508 .kr(1)
3509 .sr(1)
3510 .m(4)
3511 .n(8)
3512 .k(4)
3513 .cn_stride(11)
3514 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3515 }
3516
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile)3517 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
3518 TEST_REQUIRES_ARM_NEON_FMA;
3519 for (uint32_t n = 1; n <= 8; n++) {
3520 for (uint32_t m = 1; m <= 4; m++) {
3521 GemmMicrokernelTester()
3522 .mr(4)
3523 .nr(8)
3524 .kr(1)
3525 .sr(1)
3526 .m(m)
3527 .n(n)
3528 .k(4)
3529 .iterations(1)
3530 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3531 }
3532 }
3533 }
3534
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_m)3535 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
3536 TEST_REQUIRES_ARM_NEON_FMA;
3537 for (uint32_t m = 1; m <= 4; m++) {
3538 GemmMicrokernelTester()
3539 .mr(4)
3540 .nr(8)
3541 .kr(1)
3542 .sr(1)
3543 .m(m)
3544 .n(8)
3545 .k(4)
3546 .iterations(1)
3547 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3548 }
3549 }
3550
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_n)3551 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
3552 TEST_REQUIRES_ARM_NEON_FMA;
3553 for (uint32_t n = 1; n <= 8; n++) {
3554 GemmMicrokernelTester()
3555 .mr(4)
3556 .nr(8)
3557 .kr(1)
3558 .sr(1)
3559 .m(4)
3560 .n(n)
3561 .k(4)
3562 .iterations(1)
3563 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3564 }
3565 }
3566
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8)3567 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
3568 TEST_REQUIRES_ARM_NEON_FMA;
3569 GemmMicrokernelTester()
3570 .mr(4)
3571 .nr(8)
3572 .kr(1)
3573 .sr(1)
3574 .m(4)
3575 .n(8)
3576 .k(8)
3577 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3578 }
3579
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8_subtile)3580 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
3581 TEST_REQUIRES_ARM_NEON_FMA;
3582 for (uint32_t n = 1; n <= 8; n++) {
3583 for (uint32_t m = 1; m <= 4; m++) {
3584 GemmMicrokernelTester()
3585 .mr(4)
3586 .nr(8)
3587 .kr(1)
3588 .sr(1)
3589 .m(m)
3590 .n(n)
3591 .k(8)
3592 .iterations(1)
3593 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3594 }
3595 }
3596 }
3597
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8)3598 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
3599 TEST_REQUIRES_ARM_NEON_FMA;
3600 for (size_t k = 1; k < 8; k++) {
3601 GemmMicrokernelTester()
3602 .mr(4)
3603 .nr(8)
3604 .kr(1)
3605 .sr(1)
3606 .m(4)
3607 .n(8)
3608 .k(k)
3609 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3610 }
3611 }
3612
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8_subtile)3613 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
3614 TEST_REQUIRES_ARM_NEON_FMA;
3615 for (size_t k = 1; k < 8; k++) {
3616 for (uint32_t n = 1; n <= 8; n++) {
3617 for (uint32_t m = 1; m <= 4; m++) {
3618 GemmMicrokernelTester()
3619 .mr(4)
3620 .nr(8)
3621 .kr(1)
3622 .sr(1)
3623 .m(m)
3624 .n(n)
3625 .k(k)
3626 .iterations(1)
3627 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3628 }
3629 }
3630 }
3631 }
3632
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8)3633 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
3634 TEST_REQUIRES_ARM_NEON_FMA;
3635 for (size_t k = 9; k < 16; k++) {
3636 GemmMicrokernelTester()
3637 .mr(4)
3638 .nr(8)
3639 .kr(1)
3640 .sr(1)
3641 .m(4)
3642 .n(8)
3643 .k(k)
3644 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3645 }
3646 }
3647
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8_subtile)3648 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
3649 TEST_REQUIRES_ARM_NEON_FMA;
3650 for (size_t k = 9; k < 16; k++) {
3651 for (uint32_t n = 1; n <= 8; n++) {
3652 for (uint32_t m = 1; m <= 4; m++) {
3653 GemmMicrokernelTester()
3654 .mr(4)
3655 .nr(8)
3656 .kr(1)
3657 .sr(1)
3658 .m(m)
3659 .n(n)
3660 .k(k)
3661 .iterations(1)
3662 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3663 }
3664 }
3665 }
3666 }
3667
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4)3668 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
3669 TEST_REQUIRES_ARM_NEON_FMA;
3670 for (size_t k = 12; k <= 40; k += 4) {
3671 GemmMicrokernelTester()
3672 .mr(4)
3673 .nr(8)
3674 .kr(1)
3675 .sr(1)
3676 .m(4)
3677 .n(8)
3678 .k(k)
3679 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3680 }
3681 }
3682
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4_subtile)3683 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
3684 TEST_REQUIRES_ARM_NEON_FMA;
3685 for (size_t k = 12; k <= 40; k += 4) {
3686 for (uint32_t n = 1; n <= 8; n++) {
3687 for (uint32_t m = 1; m <= 4; m++) {
3688 GemmMicrokernelTester()
3689 .mr(4)
3690 .nr(8)
3691 .kr(1)
3692 .sr(1)
3693 .m(m)
3694 .n(n)
3695 .k(k)
3696 .iterations(1)
3697 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3698 }
3699 }
3700 }
3701 }
3702
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8)3703 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
3704 TEST_REQUIRES_ARM_NEON_FMA;
3705 for (uint32_t n = 9; n < 16; n++) {
3706 for (size_t k = 1; k <= 20; k += 5) {
3707 GemmMicrokernelTester()
3708 .mr(4)
3709 .nr(8)
3710 .kr(1)
3711 .sr(1)
3712 .m(4)
3713 .n(n)
3714 .k(k)
3715 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3716 }
3717 }
3718 }
3719
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_strided_cn)3720 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
3721 TEST_REQUIRES_ARM_NEON_FMA;
3722 for (uint32_t n = 9; n < 16; n++) {
3723 for (size_t k = 1; k <= 20; k += 5) {
3724 GemmMicrokernelTester()
3725 .mr(4)
3726 .nr(8)
3727 .kr(1)
3728 .sr(1)
3729 .m(4)
3730 .n(n)
3731 .k(k)
3732 .cn_stride(11)
3733 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3734 }
3735 }
3736 }
3737
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_subtile)3738 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
3739 TEST_REQUIRES_ARM_NEON_FMA;
3740 for (uint32_t n = 9; n < 16; n++) {
3741 for (size_t k = 1; k <= 20; k += 5) {
3742 for (uint32_t m = 1; m <= 4; m++) {
3743 GemmMicrokernelTester()
3744 .mr(4)
3745 .nr(8)
3746 .kr(1)
3747 .sr(1)
3748 .m(m)
3749 .n(n)
3750 .k(k)
3751 .iterations(1)
3752 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3753 }
3754 }
3755 }
3756 }
3757
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8)3758 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
3759 TEST_REQUIRES_ARM_NEON_FMA;
3760 for (uint32_t n = 16; n <= 24; n += 8) {
3761 for (size_t k = 1; k <= 20; k += 5) {
3762 GemmMicrokernelTester()
3763 .mr(4)
3764 .nr(8)
3765 .kr(1)
3766 .sr(1)
3767 .m(4)
3768 .n(n)
3769 .k(k)
3770 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3771 }
3772 }
3773 }
3774
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_strided_cn)3775 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
3776 TEST_REQUIRES_ARM_NEON_FMA;
3777 for (uint32_t n = 16; n <= 24; n += 8) {
3778 for (size_t k = 1; k <= 20; k += 5) {
3779 GemmMicrokernelTester()
3780 .mr(4)
3781 .nr(8)
3782 .kr(1)
3783 .sr(1)
3784 .m(4)
3785 .n(n)
3786 .k(k)
3787 .cn_stride(11)
3788 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3789 }
3790 }
3791 }
3792
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_subtile)3793 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
3794 TEST_REQUIRES_ARM_NEON_FMA;
3795 for (uint32_t n = 16; n <= 24; n += 8) {
3796 for (size_t k = 1; k <= 20; k += 5) {
3797 for (uint32_t m = 1; m <= 4; m++) {
3798 GemmMicrokernelTester()
3799 .mr(4)
3800 .nr(8)
3801 .kr(1)
3802 .sr(1)
3803 .m(m)
3804 .n(n)
3805 .k(k)
3806 .iterations(1)
3807 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3808 }
3809 }
3810 }
3811 }
3812
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,small_kernel)3813 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel) {
3814 TEST_REQUIRES_ARM_NEON_FMA;
3815 for (size_t k = 1; k <= 20; k += 5) {
3816 GemmMicrokernelTester()
3817 .mr(4)
3818 .nr(8)
3819 .kr(1)
3820 .sr(1)
3821 .m(4)
3822 .n(8)
3823 .k(k)
3824 .ks(3)
3825 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3826 }
3827 }
3828
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,small_kernel_subtile)3829 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel_subtile) {
3830 TEST_REQUIRES_ARM_NEON_FMA;
3831 for (size_t k = 1; k <= 20; k += 5) {
3832 for (uint32_t n = 1; n <= 8; n++) {
3833 for (uint32_t m = 1; m <= 4; m++) {
3834 GemmMicrokernelTester()
3835 .mr(4)
3836 .nr(8)
3837 .kr(1)
3838 .sr(1)
3839 .m(m)
3840 .n(n)
3841 .k(k)
3842 .ks(3)
3843 .iterations(1)
3844 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3845 }
3846 }
3847 }
3848 }
3849
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_small_kernel)3850 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_small_kernel) {
3851 TEST_REQUIRES_ARM_NEON_FMA;
3852 for (uint32_t n = 9; n < 16; n++) {
3853 for (size_t k = 1; k <= 20; k += 5) {
3854 GemmMicrokernelTester()
3855 .mr(4)
3856 .nr(8)
3857 .kr(1)
3858 .sr(1)
3859 .m(4)
3860 .n(n)
3861 .k(k)
3862 .ks(3)
3863 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3864 }
3865 }
3866 }
3867
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_small_kernel)3868 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_small_kernel) {
3869 TEST_REQUIRES_ARM_NEON_FMA;
3870 for (uint32_t n = 16; n <= 24; n += 8) {
3871 for (size_t k = 1; k <= 20; k += 5) {
3872 GemmMicrokernelTester()
3873 .mr(4)
3874 .nr(8)
3875 .kr(1)
3876 .sr(1)
3877 .m(4)
3878 .n(n)
3879 .k(k)
3880 .ks(3)
3881 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3882 }
3883 }
3884 }
3885
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm_subtile)3886 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
3887 TEST_REQUIRES_ARM_NEON_FMA;
3888 for (size_t k = 1; k <= 20; k += 5) {
3889 for (uint32_t n = 1; n <= 8; n++) {
3890 for (uint32_t m = 1; m <= 4; m++) {
3891 GemmMicrokernelTester()
3892 .mr(4)
3893 .nr(8)
3894 .kr(1)
3895 .sr(1)
3896 .m(m)
3897 .n(n)
3898 .k(k)
3899 .cm_stride(11)
3900 .iterations(1)
3901 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3902 }
3903 }
3904 }
3905 }
3906
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,a_offset)3907 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, a_offset) {
3908 TEST_REQUIRES_ARM_NEON_FMA;
3909 for (size_t k = 1; k <= 20; k += 5) {
3910 GemmMicrokernelTester()
3911 .mr(4)
3912 .nr(8)
3913 .kr(1)
3914 .sr(1)
3915 .m(4)
3916 .n(8)
3917 .k(k)
3918 .ks(3)
3919 .a_offset(83)
3920 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3921 }
3922 }
3923
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,zero)3924 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, zero) {
3925 TEST_REQUIRES_ARM_NEON_FMA;
3926 for (size_t k = 1; k <= 20; k += 5) {
3927 for (uint32_t mz = 0; mz < 4; mz++) {
3928 GemmMicrokernelTester()
3929 .mr(4)
3930 .nr(8)
3931 .kr(1)
3932 .sr(1)
3933 .m(4)
3934 .n(8)
3935 .k(k)
3936 .ks(3)
3937 .a_offset(83)
3938 .zero_index(mz)
3939 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3940 }
3941 }
3942 }
3943
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,qmin)3944 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
3945 TEST_REQUIRES_ARM_NEON_FMA;
3946 GemmMicrokernelTester()
3947 .mr(4)
3948 .nr(8)
3949 .kr(1)
3950 .sr(1)
3951 .m(4)
3952 .n(8)
3953 .k(4)
3954 .qmin(128)
3955 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3956 }
3957
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,qmax)3958 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
3959 TEST_REQUIRES_ARM_NEON_FMA;
3960 GemmMicrokernelTester()
3961 .mr(4)
3962 .nr(8)
3963 .kr(1)
3964 .sr(1)
3965 .m(4)
3966 .n(8)
3967 .k(4)
3968 .qmax(128)
3969 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3970 }
3971
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm)3972 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
3973 TEST_REQUIRES_ARM_NEON_FMA;
3974 GemmMicrokernelTester()
3975 .mr(4)
3976 .nr(8)
3977 .kr(1)
3978 .sr(1)
3979 .m(4)
3980 .n(8)
3981 .k(4)
3982 .cm_stride(11)
3983 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
3984 }
3985 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3986
3987
3988 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)3989 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
3990 TEST_REQUIRES_ARM_NEON_FMA;
3991 GemmMicrokernelTester()
3992 .mr(4)
3993 .nr(8)
3994 .kr(1)
3995 .sr(1)
3996 .m(4)
3997 .n(8)
3998 .k(8)
3999 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4000 }
4001
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)4002 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
4003 TEST_REQUIRES_ARM_NEON_FMA;
4004 GemmMicrokernelTester()
4005 .mr(4)
4006 .nr(8)
4007 .kr(1)
4008 .sr(1)
4009 .m(4)
4010 .n(8)
4011 .k(8)
4012 .cn_stride(11)
4013 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4014 }
4015
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)4016 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
4017 TEST_REQUIRES_ARM_NEON_FMA;
4018 for (uint32_t n = 1; n <= 8; n++) {
4019 for (uint32_t m = 1; m <= 4; m++) {
4020 GemmMicrokernelTester()
4021 .mr(4)
4022 .nr(8)
4023 .kr(1)
4024 .sr(1)
4025 .m(m)
4026 .n(n)
4027 .k(8)
4028 .iterations(1)
4029 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4030 }
4031 }
4032 }
4033
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)4034 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
4035 TEST_REQUIRES_ARM_NEON_FMA;
4036 for (uint32_t m = 1; m <= 4; m++) {
4037 GemmMicrokernelTester()
4038 .mr(4)
4039 .nr(8)
4040 .kr(1)
4041 .sr(1)
4042 .m(m)
4043 .n(8)
4044 .k(8)
4045 .iterations(1)
4046 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4047 }
4048 }
4049
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)4050 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
4051 TEST_REQUIRES_ARM_NEON_FMA;
4052 for (uint32_t n = 1; n <= 8; n++) {
4053 GemmMicrokernelTester()
4054 .mr(4)
4055 .nr(8)
4056 .kr(1)
4057 .sr(1)
4058 .m(4)
4059 .n(n)
4060 .k(8)
4061 .iterations(1)
4062 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4063 }
4064 }
4065
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)4066 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
4067 TEST_REQUIRES_ARM_NEON_FMA;
4068 GemmMicrokernelTester()
4069 .mr(4)
4070 .nr(8)
4071 .kr(1)
4072 .sr(1)
4073 .m(4)
4074 .n(8)
4075 .k(16)
4076 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4077 }
4078
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)4079 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
4080 TEST_REQUIRES_ARM_NEON_FMA;
4081 for (uint32_t n = 1; n <= 8; n++) {
4082 for (uint32_t m = 1; m <= 4; m++) {
4083 GemmMicrokernelTester()
4084 .mr(4)
4085 .nr(8)
4086 .kr(1)
4087 .sr(1)
4088 .m(m)
4089 .n(n)
4090 .k(16)
4091 .iterations(1)
4092 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4093 }
4094 }
4095 }
4096
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)4097 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
4098 TEST_REQUIRES_ARM_NEON_FMA;
4099 for (size_t k = 1; k < 16; k++) {
4100 GemmMicrokernelTester()
4101 .mr(4)
4102 .nr(8)
4103 .kr(1)
4104 .sr(1)
4105 .m(4)
4106 .n(8)
4107 .k(k)
4108 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4109 }
4110 }
4111
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)4112 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
4113 TEST_REQUIRES_ARM_NEON_FMA;
4114 for (size_t k = 1; k < 16; k++) {
4115 for (uint32_t n = 1; n <= 8; n++) {
4116 for (uint32_t m = 1; m <= 4; m++) {
4117 GemmMicrokernelTester()
4118 .mr(4)
4119 .nr(8)
4120 .kr(1)
4121 .sr(1)
4122 .m(m)
4123 .n(n)
4124 .k(k)
4125 .iterations(1)
4126 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4127 }
4128 }
4129 }
4130 }
4131
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)4132 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
4133 TEST_REQUIRES_ARM_NEON_FMA;
4134 for (size_t k = 17; k < 32; k++) {
4135 GemmMicrokernelTester()
4136 .mr(4)
4137 .nr(8)
4138 .kr(1)
4139 .sr(1)
4140 .m(4)
4141 .n(8)
4142 .k(k)
4143 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4144 }
4145 }
4146
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)4147 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
4148 TEST_REQUIRES_ARM_NEON_FMA;
4149 for (size_t k = 17; k < 32; k++) {
4150 for (uint32_t n = 1; n <= 8; n++) {
4151 for (uint32_t m = 1; m <= 4; m++) {
4152 GemmMicrokernelTester()
4153 .mr(4)
4154 .nr(8)
4155 .kr(1)
4156 .sr(1)
4157 .m(m)
4158 .n(n)
4159 .k(k)
4160 .iterations(1)
4161 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4162 }
4163 }
4164 }
4165 }
4166
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)4167 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
4168 TEST_REQUIRES_ARM_NEON_FMA;
4169 for (size_t k = 24; k <= 80; k += 8) {
4170 GemmMicrokernelTester()
4171 .mr(4)
4172 .nr(8)
4173 .kr(1)
4174 .sr(1)
4175 .m(4)
4176 .n(8)
4177 .k(k)
4178 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4179 }
4180 }
4181
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)4182 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
4183 TEST_REQUIRES_ARM_NEON_FMA;
4184 for (size_t k = 24; k <= 80; k += 8) {
4185 for (uint32_t n = 1; n <= 8; n++) {
4186 for (uint32_t m = 1; m <= 4; m++) {
4187 GemmMicrokernelTester()
4188 .mr(4)
4189 .nr(8)
4190 .kr(1)
4191 .sr(1)
4192 .m(m)
4193 .n(n)
4194 .k(k)
4195 .iterations(1)
4196 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4197 }
4198 }
4199 }
4200 }
4201
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)4202 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
4203 TEST_REQUIRES_ARM_NEON_FMA;
4204 for (uint32_t n = 9; n < 16; n++) {
4205 for (size_t k = 1; k <= 40; k += 9) {
4206 GemmMicrokernelTester()
4207 .mr(4)
4208 .nr(8)
4209 .kr(1)
4210 .sr(1)
4211 .m(4)
4212 .n(n)
4213 .k(k)
4214 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4215 }
4216 }
4217 }
4218
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)4219 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
4220 TEST_REQUIRES_ARM_NEON_FMA;
4221 for (uint32_t n = 9; n < 16; n++) {
4222 for (size_t k = 1; k <= 40; k += 9) {
4223 GemmMicrokernelTester()
4224 .mr(4)
4225 .nr(8)
4226 .kr(1)
4227 .sr(1)
4228 .m(4)
4229 .n(n)
4230 .k(k)
4231 .cn_stride(11)
4232 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4233 }
4234 }
4235 }
4236
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)4237 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
4238 TEST_REQUIRES_ARM_NEON_FMA;
4239 for (uint32_t n = 9; n < 16; n++) {
4240 for (size_t k = 1; k <= 40; k += 9) {
4241 for (uint32_t m = 1; m <= 4; m++) {
4242 GemmMicrokernelTester()
4243 .mr(4)
4244 .nr(8)
4245 .kr(1)
4246 .sr(1)
4247 .m(m)
4248 .n(n)
4249 .k(k)
4250 .iterations(1)
4251 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4252 }
4253 }
4254 }
4255 }
4256
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)4257 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
4258 TEST_REQUIRES_ARM_NEON_FMA;
4259 for (uint32_t n = 16; n <= 24; n += 8) {
4260 for (size_t k = 1; k <= 40; k += 9) {
4261 GemmMicrokernelTester()
4262 .mr(4)
4263 .nr(8)
4264 .kr(1)
4265 .sr(1)
4266 .m(4)
4267 .n(n)
4268 .k(k)
4269 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4270 }
4271 }
4272 }
4273
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)4274 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
4275 TEST_REQUIRES_ARM_NEON_FMA;
4276 for (uint32_t n = 16; n <= 24; n += 8) {
4277 for (size_t k = 1; k <= 40; k += 9) {
4278 GemmMicrokernelTester()
4279 .mr(4)
4280 .nr(8)
4281 .kr(1)
4282 .sr(1)
4283 .m(4)
4284 .n(n)
4285 .k(k)
4286 .cn_stride(11)
4287 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4288 }
4289 }
4290 }
4291
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)4292 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
4293 TEST_REQUIRES_ARM_NEON_FMA;
4294 for (uint32_t n = 16; n <= 24; n += 8) {
4295 for (size_t k = 1; k <= 40; k += 9) {
4296 for (uint32_t m = 1; m <= 4; m++) {
4297 GemmMicrokernelTester()
4298 .mr(4)
4299 .nr(8)
4300 .kr(1)
4301 .sr(1)
4302 .m(m)
4303 .n(n)
4304 .k(k)
4305 .iterations(1)
4306 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4307 }
4308 }
4309 }
4310 }
4311
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel)4312 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
4313 TEST_REQUIRES_ARM_NEON_FMA;
4314 for (size_t k = 1; k <= 40; k += 9) {
4315 GemmMicrokernelTester()
4316 .mr(4)
4317 .nr(8)
4318 .kr(1)
4319 .sr(1)
4320 .m(4)
4321 .n(8)
4322 .k(k)
4323 .ks(3)
4324 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4325 }
4326 }
4327
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)4328 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
4329 TEST_REQUIRES_ARM_NEON_FMA;
4330 for (size_t k = 1; k <= 40; k += 9) {
4331 for (uint32_t n = 1; n <= 8; n++) {
4332 for (uint32_t m = 1; m <= 4; m++) {
4333 GemmMicrokernelTester()
4334 .mr(4)
4335 .nr(8)
4336 .kr(1)
4337 .sr(1)
4338 .m(m)
4339 .n(n)
4340 .k(k)
4341 .ks(3)
4342 .iterations(1)
4343 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4344 }
4345 }
4346 }
4347 }
4348
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_small_kernel)4349 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
4350 TEST_REQUIRES_ARM_NEON_FMA;
4351 for (uint32_t n = 9; n < 16; n++) {
4352 for (size_t k = 1; k <= 40; k += 9) {
4353 GemmMicrokernelTester()
4354 .mr(4)
4355 .nr(8)
4356 .kr(1)
4357 .sr(1)
4358 .m(4)
4359 .n(n)
4360 .k(k)
4361 .ks(3)
4362 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4363 }
4364 }
4365 }
4366
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_small_kernel)4367 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
4368 TEST_REQUIRES_ARM_NEON_FMA;
4369 for (uint32_t n = 16; n <= 24; n += 8) {
4370 for (size_t k = 1; k <= 40; k += 9) {
4371 GemmMicrokernelTester()
4372 .mr(4)
4373 .nr(8)
4374 .kr(1)
4375 .sr(1)
4376 .m(4)
4377 .n(n)
4378 .k(k)
4379 .ks(3)
4380 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4381 }
4382 }
4383 }
4384
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)4385 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
4386 TEST_REQUIRES_ARM_NEON_FMA;
4387 for (size_t k = 1; k <= 40; k += 9) {
4388 for (uint32_t n = 1; n <= 8; n++) {
4389 for (uint32_t m = 1; m <= 4; m++) {
4390 GemmMicrokernelTester()
4391 .mr(4)
4392 .nr(8)
4393 .kr(1)
4394 .sr(1)
4395 .m(m)
4396 .n(n)
4397 .k(k)
4398 .cm_stride(11)
4399 .iterations(1)
4400 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4401 }
4402 }
4403 }
4404 }
4405
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,a_offset)4406 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
4407 TEST_REQUIRES_ARM_NEON_FMA;
4408 for (size_t k = 1; k <= 40; k += 9) {
4409 GemmMicrokernelTester()
4410 .mr(4)
4411 .nr(8)
4412 .kr(1)
4413 .sr(1)
4414 .m(4)
4415 .n(8)
4416 .k(k)
4417 .ks(3)
4418 .a_offset(163)
4419 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4420 }
4421 }
4422
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,zero)4423 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
4424 TEST_REQUIRES_ARM_NEON_FMA;
4425 for (size_t k = 1; k <= 40; k += 9) {
4426 for (uint32_t mz = 0; mz < 4; mz++) {
4427 GemmMicrokernelTester()
4428 .mr(4)
4429 .nr(8)
4430 .kr(1)
4431 .sr(1)
4432 .m(4)
4433 .n(8)
4434 .k(k)
4435 .ks(3)
4436 .a_offset(163)
4437 .zero_index(mz)
4438 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4439 }
4440 }
4441 }
4442
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,qmin)4443 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
4444 TEST_REQUIRES_ARM_NEON_FMA;
4445 GemmMicrokernelTester()
4446 .mr(4)
4447 .nr(8)
4448 .kr(1)
4449 .sr(1)
4450 .m(4)
4451 .n(8)
4452 .k(8)
4453 .qmin(128)
4454 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4455 }
4456
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,qmax)4457 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
4458 TEST_REQUIRES_ARM_NEON_FMA;
4459 GemmMicrokernelTester()
4460 .mr(4)
4461 .nr(8)
4462 .kr(1)
4463 .sr(1)
4464 .m(4)
4465 .n(8)
4466 .k(8)
4467 .qmax(128)
4468 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4469 }
4470
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)4471 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
4472 TEST_REQUIRES_ARM_NEON_FMA;
4473 GemmMicrokernelTester()
4474 .mr(4)
4475 .nr(8)
4476 .kr(1)
4477 .sr(1)
4478 .m(4)
4479 .n(8)
4480 .k(8)
4481 .cm_stride(11)
4482 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4483 }
4484 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4485
4486
4487 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2)4488 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2) {
4489 TEST_REQUIRES_ARM_NEON_FMA;
4490 GemmMicrokernelTester()
4491 .mr(4)
4492 .nr(8)
4493 .kr(1)
4494 .sr(1)
4495 .m(4)
4496 .n(8)
4497 .k(2)
4498 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4499 }
4500
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,strided_cn)4501 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cn) {
4502 TEST_REQUIRES_ARM_NEON_FMA;
4503 GemmMicrokernelTester()
4504 .mr(4)
4505 .nr(8)
4506 .kr(1)
4507 .sr(1)
4508 .m(4)
4509 .n(8)
4510 .k(2)
4511 .cn_stride(11)
4512 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4513 }
4514
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile)4515 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
4516 TEST_REQUIRES_ARM_NEON_FMA;
4517 for (uint32_t n = 1; n <= 8; n++) {
4518 for (uint32_t m = 1; m <= 4; m++) {
4519 GemmMicrokernelTester()
4520 .mr(4)
4521 .nr(8)
4522 .kr(1)
4523 .sr(1)
4524 .m(m)
4525 .n(n)
4526 .k(2)
4527 .iterations(1)
4528 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4529 }
4530 }
4531 }
4532
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_m)4533 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
4534 TEST_REQUIRES_ARM_NEON_FMA;
4535 for (uint32_t m = 1; m <= 4; m++) {
4536 GemmMicrokernelTester()
4537 .mr(4)
4538 .nr(8)
4539 .kr(1)
4540 .sr(1)
4541 .m(m)
4542 .n(8)
4543 .k(2)
4544 .iterations(1)
4545 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4546 }
4547 }
4548
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_n)4549 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
4550 TEST_REQUIRES_ARM_NEON_FMA;
4551 for (uint32_t n = 1; n <= 8; n++) {
4552 GemmMicrokernelTester()
4553 .mr(4)
4554 .nr(8)
4555 .kr(1)
4556 .sr(1)
4557 .m(4)
4558 .n(n)
4559 .k(2)
4560 .iterations(1)
4561 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4562 }
4563 }
4564
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_lt_2)4565 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2) {
4566 TEST_REQUIRES_ARM_NEON_FMA;
4567 for (size_t k = 1; k < 2; k++) {
4568 GemmMicrokernelTester()
4569 .mr(4)
4570 .nr(8)
4571 .kr(1)
4572 .sr(1)
4573 .m(4)
4574 .n(8)
4575 .k(k)
4576 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4577 }
4578 }
4579
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_lt_2_subtile)4580 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
4581 TEST_REQUIRES_ARM_NEON_FMA;
4582 for (size_t k = 1; k < 2; k++) {
4583 for (uint32_t n = 1; n <= 8; n++) {
4584 for (uint32_t m = 1; m <= 4; m++) {
4585 GemmMicrokernelTester()
4586 .mr(4)
4587 .nr(8)
4588 .kr(1)
4589 .sr(1)
4590 .m(m)
4591 .n(n)
4592 .k(k)
4593 .iterations(1)
4594 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4595 }
4596 }
4597 }
4598 }
4599
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_gt_2)4600 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2) {
4601 TEST_REQUIRES_ARM_NEON_FMA;
4602 for (size_t k = 3; k < 4; k++) {
4603 GemmMicrokernelTester()
4604 .mr(4)
4605 .nr(8)
4606 .kr(1)
4607 .sr(1)
4608 .m(4)
4609 .n(8)
4610 .k(k)
4611 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4612 }
4613 }
4614
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_gt_2_subtile)4615 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
4616 TEST_REQUIRES_ARM_NEON_FMA;
4617 for (size_t k = 3; k < 4; k++) {
4618 for (uint32_t n = 1; n <= 8; n++) {
4619 for (uint32_t m = 1; m <= 4; m++) {
4620 GemmMicrokernelTester()
4621 .mr(4)
4622 .nr(8)
4623 .kr(1)
4624 .sr(1)
4625 .m(m)
4626 .n(n)
4627 .k(k)
4628 .iterations(1)
4629 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4630 }
4631 }
4632 }
4633 }
4634
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_div_2)4635 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2) {
4636 TEST_REQUIRES_ARM_NEON_FMA;
4637 for (size_t k = 4; k <= 20; k += 2) {
4638 GemmMicrokernelTester()
4639 .mr(4)
4640 .nr(8)
4641 .kr(1)
4642 .sr(1)
4643 .m(4)
4644 .n(8)
4645 .k(k)
4646 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4647 }
4648 }
4649
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_div_2_subtile)4650 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
4651 TEST_REQUIRES_ARM_NEON_FMA;
4652 for (size_t k = 4; k <= 20; k += 2) {
4653 for (uint32_t n = 1; n <= 8; n++) {
4654 for (uint32_t m = 1; m <= 4; m++) {
4655 GemmMicrokernelTester()
4656 .mr(4)
4657 .nr(8)
4658 .kr(1)
4659 .sr(1)
4660 .m(m)
4661 .n(n)
4662 .k(k)
4663 .iterations(1)
4664 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4665 }
4666 }
4667 }
4668 }
4669
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8)4670 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8) {
4671 TEST_REQUIRES_ARM_NEON_FMA;
4672 for (uint32_t n = 9; n < 16; n++) {
4673 for (size_t k = 1; k <= 10; k += 3) {
4674 GemmMicrokernelTester()
4675 .mr(4)
4676 .nr(8)
4677 .kr(1)
4678 .sr(1)
4679 .m(4)
4680 .n(n)
4681 .k(k)
4682 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4683 }
4684 }
4685 }
4686
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_cn)4687 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
4688 TEST_REQUIRES_ARM_NEON_FMA;
4689 for (uint32_t n = 9; n < 16; n++) {
4690 for (size_t k = 1; k <= 10; k += 3) {
4691 GemmMicrokernelTester()
4692 .mr(4)
4693 .nr(8)
4694 .kr(1)
4695 .sr(1)
4696 .m(4)
4697 .n(n)
4698 .k(k)
4699 .cn_stride(11)
4700 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4701 }
4702 }
4703 }
4704
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8_subtile)4705 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
4706 TEST_REQUIRES_ARM_NEON_FMA;
4707 for (uint32_t n = 9; n < 16; n++) {
4708 for (size_t k = 1; k <= 10; k += 3) {
4709 for (uint32_t m = 1; m <= 4; m++) {
4710 GemmMicrokernelTester()
4711 .mr(4)
4712 .nr(8)
4713 .kr(1)
4714 .sr(1)
4715 .m(m)
4716 .n(n)
4717 .k(k)
4718 .iterations(1)
4719 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4720 }
4721 }
4722 }
4723 }
4724
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8)4725 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8) {
4726 TEST_REQUIRES_ARM_NEON_FMA;
4727 for (uint32_t n = 16; n <= 24; n += 8) {
4728 for (size_t k = 1; k <= 10; k += 3) {
4729 GemmMicrokernelTester()
4730 .mr(4)
4731 .nr(8)
4732 .kr(1)
4733 .sr(1)
4734 .m(4)
4735 .n(n)
4736 .k(k)
4737 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4738 }
4739 }
4740 }
4741
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8_strided_cn)4742 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
4743 TEST_REQUIRES_ARM_NEON_FMA;
4744 for (uint32_t n = 16; n <= 24; n += 8) {
4745 for (size_t k = 1; k <= 10; k += 3) {
4746 GemmMicrokernelTester()
4747 .mr(4)
4748 .nr(8)
4749 .kr(1)
4750 .sr(1)
4751 .m(4)
4752 .n(n)
4753 .k(k)
4754 .cn_stride(11)
4755 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4756 }
4757 }
4758 }
4759
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8_subtile)4760 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
4761 TEST_REQUIRES_ARM_NEON_FMA;
4762 for (uint32_t n = 16; n <= 24; n += 8) {
4763 for (size_t k = 1; k <= 10; k += 3) {
4764 for (uint32_t m = 1; m <= 4; m++) {
4765 GemmMicrokernelTester()
4766 .mr(4)
4767 .nr(8)
4768 .kr(1)
4769 .sr(1)
4770 .m(m)
4771 .n(n)
4772 .k(k)
4773 .iterations(1)
4774 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4775 }
4776 }
4777 }
4778 }
4779
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,small_kernel)4780 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, small_kernel) {
4781 TEST_REQUIRES_ARM_NEON_FMA;
4782 for (size_t k = 1; k <= 10; k += 3) {
4783 GemmMicrokernelTester()
4784 .mr(4)
4785 .nr(8)
4786 .kr(1)
4787 .sr(1)
4788 .m(4)
4789 .n(8)
4790 .k(k)
4791 .ks(3)
4792 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4793 }
4794 }
4795
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,small_kernel_subtile)4796 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, small_kernel_subtile) {
4797 TEST_REQUIRES_ARM_NEON_FMA;
4798 for (size_t k = 1; k <= 10; k += 3) {
4799 for (uint32_t n = 1; n <= 8; n++) {
4800 for (uint32_t m = 1; m <= 4; m++) {
4801 GemmMicrokernelTester()
4802 .mr(4)
4803 .nr(8)
4804 .kr(1)
4805 .sr(1)
4806 .m(m)
4807 .n(n)
4808 .k(k)
4809 .ks(3)
4810 .iterations(1)
4811 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4812 }
4813 }
4814 }
4815 }
4816
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8_small_kernel)4817 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_small_kernel) {
4818 TEST_REQUIRES_ARM_NEON_FMA;
4819 for (uint32_t n = 9; n < 16; n++) {
4820 for (size_t k = 1; k <= 10; k += 3) {
4821 GemmMicrokernelTester()
4822 .mr(4)
4823 .nr(8)
4824 .kr(1)
4825 .sr(1)
4826 .m(4)
4827 .n(n)
4828 .k(k)
4829 .ks(3)
4830 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4831 }
4832 }
4833 }
4834
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8_small_kernel)4835 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_small_kernel) {
4836 TEST_REQUIRES_ARM_NEON_FMA;
4837 for (uint32_t n = 16; n <= 24; n += 8) {
4838 for (size_t k = 1; k <= 10; k += 3) {
4839 GemmMicrokernelTester()
4840 .mr(4)
4841 .nr(8)
4842 .kr(1)
4843 .sr(1)
4844 .m(4)
4845 .n(n)
4846 .k(k)
4847 .ks(3)
4848 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4849 }
4850 }
4851 }
4852
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,strided_cm_subtile)4853 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
4854 TEST_REQUIRES_ARM_NEON_FMA;
4855 for (size_t k = 1; k <= 10; k += 3) {
4856 for (uint32_t n = 1; n <= 8; n++) {
4857 for (uint32_t m = 1; m <= 4; m++) {
4858 GemmMicrokernelTester()
4859 .mr(4)
4860 .nr(8)
4861 .kr(1)
4862 .sr(1)
4863 .m(m)
4864 .n(n)
4865 .k(k)
4866 .cm_stride(11)
4867 .iterations(1)
4868 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4869 }
4870 }
4871 }
4872 }
4873
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,a_offset)4874 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, a_offset) {
4875 TEST_REQUIRES_ARM_NEON_FMA;
4876 for (size_t k = 1; k <= 10; k += 3) {
4877 GemmMicrokernelTester()
4878 .mr(4)
4879 .nr(8)
4880 .kr(1)
4881 .sr(1)
4882 .m(4)
4883 .n(8)
4884 .k(k)
4885 .ks(3)
4886 .a_offset(43)
4887 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4888 }
4889 }
4890
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,zero)4891 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, zero) {
4892 TEST_REQUIRES_ARM_NEON_FMA;
4893 for (size_t k = 1; k <= 10; k += 3) {
4894 for (uint32_t mz = 0; mz < 4; mz++) {
4895 GemmMicrokernelTester()
4896 .mr(4)
4897 .nr(8)
4898 .kr(1)
4899 .sr(1)
4900 .m(4)
4901 .n(8)
4902 .k(k)
4903 .ks(3)
4904 .a_offset(43)
4905 .zero_index(mz)
4906 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4907 }
4908 }
4909 }
4910
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,qmin)4911 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmin) {
4912 TEST_REQUIRES_ARM_NEON_FMA;
4913 GemmMicrokernelTester()
4914 .mr(4)
4915 .nr(8)
4916 .kr(1)
4917 .sr(1)
4918 .m(4)
4919 .n(8)
4920 .k(2)
4921 .qmin(128)
4922 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4923 }
4924
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,qmax)4925 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmax) {
4926 TEST_REQUIRES_ARM_NEON_FMA;
4927 GemmMicrokernelTester()
4928 .mr(4)
4929 .nr(8)
4930 .kr(1)
4931 .sr(1)
4932 .m(4)
4933 .n(8)
4934 .k(2)
4935 .qmax(128)
4936 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4937 }
4938
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64,strided_cm)4939 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm) {
4940 TEST_REQUIRES_ARM_NEON_FMA;
4941 GemmMicrokernelTester()
4942 .mr(4)
4943 .nr(8)
4944 .kr(1)
4945 .sr(1)
4946 .m(4)
4947 .n(8)
4948 .k(2)
4949 .cm_stride(11)
4950 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
4951 }
4952 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4953
4954
4955 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4)4956 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4) {
4957 TEST_REQUIRES_ARM_NEON_FMA;
4958 GemmMicrokernelTester()
4959 .mr(4)
4960 .nr(8)
4961 .kr(1)
4962 .sr(1)
4963 .m(4)
4964 .n(8)
4965 .k(4)
4966 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4967 }
4968
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,strided_cn)4969 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cn) {
4970 TEST_REQUIRES_ARM_NEON_FMA;
4971 GemmMicrokernelTester()
4972 .mr(4)
4973 .nr(8)
4974 .kr(1)
4975 .sr(1)
4976 .m(4)
4977 .n(8)
4978 .k(4)
4979 .cn_stride(11)
4980 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4981 }
4982
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile)4983 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
4984 TEST_REQUIRES_ARM_NEON_FMA;
4985 for (uint32_t n = 1; n <= 8; n++) {
4986 for (uint32_t m = 1; m <= 4; m++) {
4987 GemmMicrokernelTester()
4988 .mr(4)
4989 .nr(8)
4990 .kr(1)
4991 .sr(1)
4992 .m(m)
4993 .n(n)
4994 .k(4)
4995 .iterations(1)
4996 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4997 }
4998 }
4999 }
5000
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_m)5001 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
5002 TEST_REQUIRES_ARM_NEON_FMA;
5003 for (uint32_t m = 1; m <= 4; m++) {
5004 GemmMicrokernelTester()
5005 .mr(4)
5006 .nr(8)
5007 .kr(1)
5008 .sr(1)
5009 .m(m)
5010 .n(8)
5011 .k(4)
5012 .iterations(1)
5013 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5014 }
5015 }
5016
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_n)5017 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
5018 TEST_REQUIRES_ARM_NEON_FMA;
5019 for (uint32_t n = 1; n <= 8; n++) {
5020 GemmMicrokernelTester()
5021 .mr(4)
5022 .nr(8)
5023 .kr(1)
5024 .sr(1)
5025 .m(4)
5026 .n(n)
5027 .k(4)
5028 .iterations(1)
5029 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5030 }
5031 }
5032
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_lt_4)5033 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4) {
5034 TEST_REQUIRES_ARM_NEON_FMA;
5035 for (size_t k = 1; k < 4; k++) {
5036 GemmMicrokernelTester()
5037 .mr(4)
5038 .nr(8)
5039 .kr(1)
5040 .sr(1)
5041 .m(4)
5042 .n(8)
5043 .k(k)
5044 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5045 }
5046 }
5047
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_lt_4_subtile)5048 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
5049 TEST_REQUIRES_ARM_NEON_FMA;
5050 for (size_t k = 1; k < 4; k++) {
5051 for (uint32_t n = 1; n <= 8; n++) {
5052 for (uint32_t m = 1; m <= 4; m++) {
5053 GemmMicrokernelTester()
5054 .mr(4)
5055 .nr(8)
5056 .kr(1)
5057 .sr(1)
5058 .m(m)
5059 .n(n)
5060 .k(k)
5061 .iterations(1)
5062 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5063 }
5064 }
5065 }
5066 }
5067
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_gt_4)5068 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4) {
5069 TEST_REQUIRES_ARM_NEON_FMA;
5070 for (size_t k = 5; k < 8; k++) {
5071 GemmMicrokernelTester()
5072 .mr(4)
5073 .nr(8)
5074 .kr(1)
5075 .sr(1)
5076 .m(4)
5077 .n(8)
5078 .k(k)
5079 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5080 }
5081 }
5082
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_gt_4_subtile)5083 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
5084 TEST_REQUIRES_ARM_NEON_FMA;
5085 for (size_t k = 5; k < 8; k++) {
5086 for (uint32_t n = 1; n <= 8; n++) {
5087 for (uint32_t m = 1; m <= 4; m++) {
5088 GemmMicrokernelTester()
5089 .mr(4)
5090 .nr(8)
5091 .kr(1)
5092 .sr(1)
5093 .m(m)
5094 .n(n)
5095 .k(k)
5096 .iterations(1)
5097 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5098 }
5099 }
5100 }
5101 }
5102
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_div_4)5103 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4) {
5104 TEST_REQUIRES_ARM_NEON_FMA;
5105 for (size_t k = 8; k <= 40; k += 4) {
5106 GemmMicrokernelTester()
5107 .mr(4)
5108 .nr(8)
5109 .kr(1)
5110 .sr(1)
5111 .m(4)
5112 .n(8)
5113 .k(k)
5114 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5115 }
5116 }
5117
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_div_4_subtile)5118 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
5119 TEST_REQUIRES_ARM_NEON_FMA;
5120 for (size_t k = 8; k <= 40; k += 4) {
5121 for (uint32_t n = 1; n <= 8; n++) {
5122 for (uint32_t m = 1; m <= 4; m++) {
5123 GemmMicrokernelTester()
5124 .mr(4)
5125 .nr(8)
5126 .kr(1)
5127 .sr(1)
5128 .m(m)
5129 .n(n)
5130 .k(k)
5131 .iterations(1)
5132 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5133 }
5134 }
5135 }
5136 }
5137
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8)5138 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8) {
5139 TEST_REQUIRES_ARM_NEON_FMA;
5140 for (uint32_t n = 9; n < 16; n++) {
5141 for (size_t k = 1; k <= 20; k += 5) {
5142 GemmMicrokernelTester()
5143 .mr(4)
5144 .nr(8)
5145 .kr(1)
5146 .sr(1)
5147 .m(4)
5148 .n(n)
5149 .k(k)
5150 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5151 }
5152 }
5153 }
5154
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8_strided_cn)5155 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
5156 TEST_REQUIRES_ARM_NEON_FMA;
5157 for (uint32_t n = 9; n < 16; n++) {
5158 for (size_t k = 1; k <= 20; k += 5) {
5159 GemmMicrokernelTester()
5160 .mr(4)
5161 .nr(8)
5162 .kr(1)
5163 .sr(1)
5164 .m(4)
5165 .n(n)
5166 .k(k)
5167 .cn_stride(11)
5168 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5169 }
5170 }
5171 }
5172
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8_subtile)5173 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
5174 TEST_REQUIRES_ARM_NEON_FMA;
5175 for (uint32_t n = 9; n < 16; n++) {
5176 for (size_t k = 1; k <= 20; k += 5) {
5177 for (uint32_t m = 1; m <= 4; m++) {
5178 GemmMicrokernelTester()
5179 .mr(4)
5180 .nr(8)
5181 .kr(1)
5182 .sr(1)
5183 .m(m)
5184 .n(n)
5185 .k(k)
5186 .iterations(1)
5187 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5188 }
5189 }
5190 }
5191 }
5192
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8)5193 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8) {
5194 TEST_REQUIRES_ARM_NEON_FMA;
5195 for (uint32_t n = 16; n <= 24; n += 8) {
5196 for (size_t k = 1; k <= 20; k += 5) {
5197 GemmMicrokernelTester()
5198 .mr(4)
5199 .nr(8)
5200 .kr(1)
5201 .sr(1)
5202 .m(4)
5203 .n(n)
5204 .k(k)
5205 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5206 }
5207 }
5208 }
5209
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8_strided_cn)5210 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
5211 TEST_REQUIRES_ARM_NEON_FMA;
5212 for (uint32_t n = 16; n <= 24; n += 8) {
5213 for (size_t k = 1; k <= 20; k += 5) {
5214 GemmMicrokernelTester()
5215 .mr(4)
5216 .nr(8)
5217 .kr(1)
5218 .sr(1)
5219 .m(4)
5220 .n(n)
5221 .k(k)
5222 .cn_stride(11)
5223 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5224 }
5225 }
5226 }
5227
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8_subtile)5228 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
5229 TEST_REQUIRES_ARM_NEON_FMA;
5230 for (uint32_t n = 16; n <= 24; n += 8) {
5231 for (size_t k = 1; k <= 20; k += 5) {
5232 for (uint32_t m = 1; m <= 4; m++) {
5233 GemmMicrokernelTester()
5234 .mr(4)
5235 .nr(8)
5236 .kr(1)
5237 .sr(1)
5238 .m(m)
5239 .n(n)
5240 .k(k)
5241 .iterations(1)
5242 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5243 }
5244 }
5245 }
5246 }
5247
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,small_kernel)5248 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, small_kernel) {
5249 TEST_REQUIRES_ARM_NEON_FMA;
5250 for (size_t k = 1; k <= 20; k += 5) {
5251 GemmMicrokernelTester()
5252 .mr(4)
5253 .nr(8)
5254 .kr(1)
5255 .sr(1)
5256 .m(4)
5257 .n(8)
5258 .k(k)
5259 .ks(3)
5260 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5261 }
5262 }
5263
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,small_kernel_subtile)5264 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, small_kernel_subtile) {
5265 TEST_REQUIRES_ARM_NEON_FMA;
5266 for (size_t k = 1; k <= 20; k += 5) {
5267 for (uint32_t n = 1; n <= 8; n++) {
5268 for (uint32_t m = 1; m <= 4; m++) {
5269 GemmMicrokernelTester()
5270 .mr(4)
5271 .nr(8)
5272 .kr(1)
5273 .sr(1)
5274 .m(m)
5275 .n(n)
5276 .k(k)
5277 .ks(3)
5278 .iterations(1)
5279 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5280 }
5281 }
5282 }
5283 }
5284
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8_small_kernel)5285 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_small_kernel) {
5286 TEST_REQUIRES_ARM_NEON_FMA;
5287 for (uint32_t n = 9; n < 16; n++) {
5288 for (size_t k = 1; k <= 20; k += 5) {
5289 GemmMicrokernelTester()
5290 .mr(4)
5291 .nr(8)
5292 .kr(1)
5293 .sr(1)
5294 .m(4)
5295 .n(n)
5296 .k(k)
5297 .ks(3)
5298 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5299 }
5300 }
5301 }
5302
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8_small_kernel)5303 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_small_kernel) {
5304 TEST_REQUIRES_ARM_NEON_FMA;
5305 for (uint32_t n = 16; n <= 24; n += 8) {
5306 for (size_t k = 1; k <= 20; k += 5) {
5307 GemmMicrokernelTester()
5308 .mr(4)
5309 .nr(8)
5310 .kr(1)
5311 .sr(1)
5312 .m(4)
5313 .n(n)
5314 .k(k)
5315 .ks(3)
5316 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5317 }
5318 }
5319 }
5320
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,strided_cm_subtile)5321 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
5322 TEST_REQUIRES_ARM_NEON_FMA;
5323 for (size_t k = 1; k <= 20; k += 5) {
5324 for (uint32_t n = 1; n <= 8; n++) {
5325 for (uint32_t m = 1; m <= 4; m++) {
5326 GemmMicrokernelTester()
5327 .mr(4)
5328 .nr(8)
5329 .kr(1)
5330 .sr(1)
5331 .m(m)
5332 .n(n)
5333 .k(k)
5334 .cm_stride(11)
5335 .iterations(1)
5336 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5337 }
5338 }
5339 }
5340 }
5341
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,a_offset)5342 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, a_offset) {
5343 TEST_REQUIRES_ARM_NEON_FMA;
5344 for (size_t k = 1; k <= 20; k += 5) {
5345 GemmMicrokernelTester()
5346 .mr(4)
5347 .nr(8)
5348 .kr(1)
5349 .sr(1)
5350 .m(4)
5351 .n(8)
5352 .k(k)
5353 .ks(3)
5354 .a_offset(83)
5355 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5356 }
5357 }
5358
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,zero)5359 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, zero) {
5360 TEST_REQUIRES_ARM_NEON_FMA;
5361 for (size_t k = 1; k <= 20; k += 5) {
5362 for (uint32_t mz = 0; mz < 4; mz++) {
5363 GemmMicrokernelTester()
5364 .mr(4)
5365 .nr(8)
5366 .kr(1)
5367 .sr(1)
5368 .m(4)
5369 .n(8)
5370 .k(k)
5371 .ks(3)
5372 .a_offset(83)
5373 .zero_index(mz)
5374 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5375 }
5376 }
5377 }
5378
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,qmin)5379 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmin) {
5380 TEST_REQUIRES_ARM_NEON_FMA;
5381 GemmMicrokernelTester()
5382 .mr(4)
5383 .nr(8)
5384 .kr(1)
5385 .sr(1)
5386 .m(4)
5387 .n(8)
5388 .k(4)
5389 .qmin(128)
5390 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5391 }
5392
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,qmax)5393 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmax) {
5394 TEST_REQUIRES_ARM_NEON_FMA;
5395 GemmMicrokernelTester()
5396 .mr(4)
5397 .nr(8)
5398 .kr(1)
5399 .sr(1)
5400 .m(4)
5401 .n(8)
5402 .k(4)
5403 .qmax(128)
5404 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5405 }
5406
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128,strided_cm)5407 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm) {
5408 TEST_REQUIRES_ARM_NEON_FMA;
5409 GemmMicrokernelTester()
5410 .mr(4)
5411 .nr(8)
5412 .kr(1)
5413 .sr(1)
5414 .m(4)
5415 .n(8)
5416 .k(4)
5417 .cm_stride(11)
5418 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
5419 }
5420 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5421
5422
5423 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)5424 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
5425 TEST_REQUIRES_ARM_NEON_FMA;
5426 GemmMicrokernelTester()
5427 .mr(4)
5428 .nr(8)
5429 .kr(1)
5430 .sr(1)
5431 .m(4)
5432 .n(8)
5433 .k(8)
5434 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5435 }
5436
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)5437 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
5438 TEST_REQUIRES_ARM_NEON_FMA;
5439 GemmMicrokernelTester()
5440 .mr(4)
5441 .nr(8)
5442 .kr(1)
5443 .sr(1)
5444 .m(4)
5445 .n(8)
5446 .k(8)
5447 .cn_stride(11)
5448 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5449 }
5450
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)5451 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
5452 TEST_REQUIRES_ARM_NEON_FMA;
5453 for (uint32_t n = 1; n <= 8; n++) {
5454 for (uint32_t m = 1; m <= 4; m++) {
5455 GemmMicrokernelTester()
5456 .mr(4)
5457 .nr(8)
5458 .kr(1)
5459 .sr(1)
5460 .m(m)
5461 .n(n)
5462 .k(8)
5463 .iterations(1)
5464 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5465 }
5466 }
5467 }
5468
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)5469 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
5470 TEST_REQUIRES_ARM_NEON_FMA;
5471 for (uint32_t m = 1; m <= 4; m++) {
5472 GemmMicrokernelTester()
5473 .mr(4)
5474 .nr(8)
5475 .kr(1)
5476 .sr(1)
5477 .m(m)
5478 .n(8)
5479 .k(8)
5480 .iterations(1)
5481 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5482 }
5483 }
5484
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)5485 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
5486 TEST_REQUIRES_ARM_NEON_FMA;
5487 for (uint32_t n = 1; n <= 8; n++) {
5488 GemmMicrokernelTester()
5489 .mr(4)
5490 .nr(8)
5491 .kr(1)
5492 .sr(1)
5493 .m(4)
5494 .n(n)
5495 .k(8)
5496 .iterations(1)
5497 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5498 }
5499 }
5500
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)5501 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
5502 TEST_REQUIRES_ARM_NEON_FMA;
5503 GemmMicrokernelTester()
5504 .mr(4)
5505 .nr(8)
5506 .kr(1)
5507 .sr(1)
5508 .m(4)
5509 .n(8)
5510 .k(16)
5511 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5512 }
5513
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)5514 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
5515 TEST_REQUIRES_ARM_NEON_FMA;
5516 for (uint32_t n = 1; n <= 8; n++) {
5517 for (uint32_t m = 1; m <= 4; m++) {
5518 GemmMicrokernelTester()
5519 .mr(4)
5520 .nr(8)
5521 .kr(1)
5522 .sr(1)
5523 .m(m)
5524 .n(n)
5525 .k(16)
5526 .iterations(1)
5527 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5528 }
5529 }
5530 }
5531
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)5532 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
5533 TEST_REQUIRES_ARM_NEON_FMA;
5534 for (size_t k = 1; k < 16; k++) {
5535 GemmMicrokernelTester()
5536 .mr(4)
5537 .nr(8)
5538 .kr(1)
5539 .sr(1)
5540 .m(4)
5541 .n(8)
5542 .k(k)
5543 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5544 }
5545 }
5546
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)5547 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
5548 TEST_REQUIRES_ARM_NEON_FMA;
5549 for (size_t k = 1; k < 16; k++) {
5550 for (uint32_t n = 1; n <= 8; n++) {
5551 for (uint32_t m = 1; m <= 4; m++) {
5552 GemmMicrokernelTester()
5553 .mr(4)
5554 .nr(8)
5555 .kr(1)
5556 .sr(1)
5557 .m(m)
5558 .n(n)
5559 .k(k)
5560 .iterations(1)
5561 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5562 }
5563 }
5564 }
5565 }
5566
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)5567 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
5568 TEST_REQUIRES_ARM_NEON_FMA;
5569 for (size_t k = 17; k < 32; k++) {
5570 GemmMicrokernelTester()
5571 .mr(4)
5572 .nr(8)
5573 .kr(1)
5574 .sr(1)
5575 .m(4)
5576 .n(8)
5577 .k(k)
5578 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5579 }
5580 }
5581
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)5582 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
5583 TEST_REQUIRES_ARM_NEON_FMA;
5584 for (size_t k = 17; k < 32; k++) {
5585 for (uint32_t n = 1; n <= 8; n++) {
5586 for (uint32_t m = 1; m <= 4; m++) {
5587 GemmMicrokernelTester()
5588 .mr(4)
5589 .nr(8)
5590 .kr(1)
5591 .sr(1)
5592 .m(m)
5593 .n(n)
5594 .k(k)
5595 .iterations(1)
5596 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5597 }
5598 }
5599 }
5600 }
5601
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)5602 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
5603 TEST_REQUIRES_ARM_NEON_FMA;
5604 for (size_t k = 24; k <= 80; k += 8) {
5605 GemmMicrokernelTester()
5606 .mr(4)
5607 .nr(8)
5608 .kr(1)
5609 .sr(1)
5610 .m(4)
5611 .n(8)
5612 .k(k)
5613 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5614 }
5615 }
5616
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)5617 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
5618 TEST_REQUIRES_ARM_NEON_FMA;
5619 for (size_t k = 24; k <= 80; k += 8) {
5620 for (uint32_t n = 1; n <= 8; n++) {
5621 for (uint32_t m = 1; m <= 4; m++) {
5622 GemmMicrokernelTester()
5623 .mr(4)
5624 .nr(8)
5625 .kr(1)
5626 .sr(1)
5627 .m(m)
5628 .n(n)
5629 .k(k)
5630 .iterations(1)
5631 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5632 }
5633 }
5634 }
5635 }
5636
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)5637 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
5638 TEST_REQUIRES_ARM_NEON_FMA;
5639 for (uint32_t n = 9; n < 16; n++) {
5640 for (size_t k = 1; k <= 40; k += 9) {
5641 GemmMicrokernelTester()
5642 .mr(4)
5643 .nr(8)
5644 .kr(1)
5645 .sr(1)
5646 .m(4)
5647 .n(n)
5648 .k(k)
5649 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5650 }
5651 }
5652 }
5653
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)5654 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
5655 TEST_REQUIRES_ARM_NEON_FMA;
5656 for (uint32_t n = 9; n < 16; n++) {
5657 for (size_t k = 1; k <= 40; k += 9) {
5658 GemmMicrokernelTester()
5659 .mr(4)
5660 .nr(8)
5661 .kr(1)
5662 .sr(1)
5663 .m(4)
5664 .n(n)
5665 .k(k)
5666 .cn_stride(11)
5667 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5668 }
5669 }
5670 }
5671
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)5672 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
5673 TEST_REQUIRES_ARM_NEON_FMA;
5674 for (uint32_t n = 9; n < 16; n++) {
5675 for (size_t k = 1; k <= 40; k += 9) {
5676 for (uint32_t m = 1; m <= 4; m++) {
5677 GemmMicrokernelTester()
5678 .mr(4)
5679 .nr(8)
5680 .kr(1)
5681 .sr(1)
5682 .m(m)
5683 .n(n)
5684 .k(k)
5685 .iterations(1)
5686 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5687 }
5688 }
5689 }
5690 }
5691
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)5692 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
5693 TEST_REQUIRES_ARM_NEON_FMA;
5694 for (uint32_t n = 16; n <= 24; n += 8) {
5695 for (size_t k = 1; k <= 40; k += 9) {
5696 GemmMicrokernelTester()
5697 .mr(4)
5698 .nr(8)
5699 .kr(1)
5700 .sr(1)
5701 .m(4)
5702 .n(n)
5703 .k(k)
5704 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5705 }
5706 }
5707 }
5708
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)5709 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
5710 TEST_REQUIRES_ARM_NEON_FMA;
5711 for (uint32_t n = 16; n <= 24; n += 8) {
5712 for (size_t k = 1; k <= 40; k += 9) {
5713 GemmMicrokernelTester()
5714 .mr(4)
5715 .nr(8)
5716 .kr(1)
5717 .sr(1)
5718 .m(4)
5719 .n(n)
5720 .k(k)
5721 .cn_stride(11)
5722 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5723 }
5724 }
5725 }
5726
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)5727 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
5728 TEST_REQUIRES_ARM_NEON_FMA;
5729 for (uint32_t n = 16; n <= 24; n += 8) {
5730 for (size_t k = 1; k <= 40; k += 9) {
5731 for (uint32_t m = 1; m <= 4; m++) {
5732 GemmMicrokernelTester()
5733 .mr(4)
5734 .nr(8)
5735 .kr(1)
5736 .sr(1)
5737 .m(m)
5738 .n(n)
5739 .k(k)
5740 .iterations(1)
5741 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5742 }
5743 }
5744 }
5745 }
5746
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)5747 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
5748 TEST_REQUIRES_ARM_NEON_FMA;
5749 for (size_t k = 1; k <= 40; k += 9) {
5750 GemmMicrokernelTester()
5751 .mr(4)
5752 .nr(8)
5753 .kr(1)
5754 .sr(1)
5755 .m(4)
5756 .n(8)
5757 .k(k)
5758 .ks(3)
5759 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5760 }
5761 }
5762
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)5763 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
5764 TEST_REQUIRES_ARM_NEON_FMA;
5765 for (size_t k = 1; k <= 40; k += 9) {
5766 for (uint32_t n = 1; n <= 8; n++) {
5767 for (uint32_t m = 1; m <= 4; m++) {
5768 GemmMicrokernelTester()
5769 .mr(4)
5770 .nr(8)
5771 .kr(1)
5772 .sr(1)
5773 .m(m)
5774 .n(n)
5775 .k(k)
5776 .ks(3)
5777 .iterations(1)
5778 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5779 }
5780 }
5781 }
5782 }
5783
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_small_kernel)5784 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
5785 TEST_REQUIRES_ARM_NEON_FMA;
5786 for (uint32_t n = 9; n < 16; n++) {
5787 for (size_t k = 1; k <= 40; k += 9) {
5788 GemmMicrokernelTester()
5789 .mr(4)
5790 .nr(8)
5791 .kr(1)
5792 .sr(1)
5793 .m(4)
5794 .n(n)
5795 .k(k)
5796 .ks(3)
5797 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5798 }
5799 }
5800 }
5801
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_small_kernel)5802 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
5803 TEST_REQUIRES_ARM_NEON_FMA;
5804 for (uint32_t n = 16; n <= 24; n += 8) {
5805 for (size_t k = 1; k <= 40; k += 9) {
5806 GemmMicrokernelTester()
5807 .mr(4)
5808 .nr(8)
5809 .kr(1)
5810 .sr(1)
5811 .m(4)
5812 .n(n)
5813 .k(k)
5814 .ks(3)
5815 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5816 }
5817 }
5818 }
5819
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)5820 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
5821 TEST_REQUIRES_ARM_NEON_FMA;
5822 for (size_t k = 1; k <= 40; k += 9) {
5823 for (uint32_t n = 1; n <= 8; n++) {
5824 for (uint32_t m = 1; m <= 4; m++) {
5825 GemmMicrokernelTester()
5826 .mr(4)
5827 .nr(8)
5828 .kr(1)
5829 .sr(1)
5830 .m(m)
5831 .n(n)
5832 .k(k)
5833 .cm_stride(11)
5834 .iterations(1)
5835 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5836 }
5837 }
5838 }
5839 }
5840
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)5841 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
5842 TEST_REQUIRES_ARM_NEON_FMA;
5843 for (size_t k = 1; k <= 40; k += 9) {
5844 GemmMicrokernelTester()
5845 .mr(4)
5846 .nr(8)
5847 .kr(1)
5848 .sr(1)
5849 .m(4)
5850 .n(8)
5851 .k(k)
5852 .ks(3)
5853 .a_offset(163)
5854 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5855 }
5856 }
5857
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)5858 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
5859 TEST_REQUIRES_ARM_NEON_FMA;
5860 for (size_t k = 1; k <= 40; k += 9) {
5861 for (uint32_t mz = 0; mz < 4; mz++) {
5862 GemmMicrokernelTester()
5863 .mr(4)
5864 .nr(8)
5865 .kr(1)
5866 .sr(1)
5867 .m(4)
5868 .n(8)
5869 .k(k)
5870 .ks(3)
5871 .a_offset(163)
5872 .zero_index(mz)
5873 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5874 }
5875 }
5876 }
5877
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)5878 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
5879 TEST_REQUIRES_ARM_NEON_FMA;
5880 GemmMicrokernelTester()
5881 .mr(4)
5882 .nr(8)
5883 .kr(1)
5884 .sr(1)
5885 .m(4)
5886 .n(8)
5887 .k(8)
5888 .qmin(128)
5889 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5890 }
5891
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)5892 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
5893 TEST_REQUIRES_ARM_NEON_FMA;
5894 GemmMicrokernelTester()
5895 .mr(4)
5896 .nr(8)
5897 .kr(1)
5898 .sr(1)
5899 .m(4)
5900 .n(8)
5901 .k(8)
5902 .qmax(128)
5903 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5904 }
5905
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)5906 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
5907 TEST_REQUIRES_ARM_NEON_FMA;
5908 GemmMicrokernelTester()
5909 .mr(4)
5910 .nr(8)
5911 .kr(1)
5912 .sr(1)
5913 .m(4)
5914 .n(8)
5915 .k(8)
5916 .cm_stride(11)
5917 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5918 }
5919 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5920
5921
5922 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)5923 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
5924 TEST_REQUIRES_ARM_NEON_FMA;
5925 GemmMicrokernelTester()
5926 .mr(4)
5927 .nr(12)
5928 .kr(1)
5929 .sr(1)
5930 .m(4)
5931 .n(12)
5932 .k(4)
5933 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5934 }
5935
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,strided_cn)5936 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
5937 TEST_REQUIRES_ARM_NEON_FMA;
5938 GemmMicrokernelTester()
5939 .mr(4)
5940 .nr(12)
5941 .kr(1)
5942 .sr(1)
5943 .m(4)
5944 .n(12)
5945 .k(4)
5946 .cn_stride(17)
5947 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5948 }
5949
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)5950 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
5951 TEST_REQUIRES_ARM_NEON_FMA;
5952 for (uint32_t n = 1; n <= 12; n++) {
5953 for (uint32_t m = 1; m <= 4; m++) {
5954 GemmMicrokernelTester()
5955 .mr(4)
5956 .nr(12)
5957 .kr(1)
5958 .sr(1)
5959 .m(m)
5960 .n(n)
5961 .k(4)
5962 .iterations(1)
5963 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5964 }
5965 }
5966 }
5967
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)5968 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
5969 TEST_REQUIRES_ARM_NEON_FMA;
5970 for (uint32_t m = 1; m <= 4; m++) {
5971 GemmMicrokernelTester()
5972 .mr(4)
5973 .nr(12)
5974 .kr(1)
5975 .sr(1)
5976 .m(m)
5977 .n(12)
5978 .k(4)
5979 .iterations(1)
5980 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5981 }
5982 }
5983
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)5984 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
5985 TEST_REQUIRES_ARM_NEON_FMA;
5986 for (uint32_t n = 1; n <= 12; n++) {
5987 GemmMicrokernelTester()
5988 .mr(4)
5989 .nr(12)
5990 .kr(1)
5991 .sr(1)
5992 .m(4)
5993 .n(n)
5994 .k(4)
5995 .iterations(1)
5996 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5997 }
5998 }
5999
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)6000 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
6001 TEST_REQUIRES_ARM_NEON_FMA;
6002 GemmMicrokernelTester()
6003 .mr(4)
6004 .nr(12)
6005 .kr(1)
6006 .sr(1)
6007 .m(4)
6008 .n(12)
6009 .k(8)
6010 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6011 }
6012
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)6013 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
6014 TEST_REQUIRES_ARM_NEON_FMA;
6015 for (uint32_t n = 1; n <= 12; n++) {
6016 for (uint32_t m = 1; m <= 4; m++) {
6017 GemmMicrokernelTester()
6018 .mr(4)
6019 .nr(12)
6020 .kr(1)
6021 .sr(1)
6022 .m(m)
6023 .n(n)
6024 .k(8)
6025 .iterations(1)
6026 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6027 }
6028 }
6029 }
6030
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)6031 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
6032 TEST_REQUIRES_ARM_NEON_FMA;
6033 for (size_t k = 1; k < 8; k++) {
6034 GemmMicrokernelTester()
6035 .mr(4)
6036 .nr(12)
6037 .kr(1)
6038 .sr(1)
6039 .m(4)
6040 .n(12)
6041 .k(k)
6042 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6043 }
6044 }
6045
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)6046 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
6047 TEST_REQUIRES_ARM_NEON_FMA;
6048 for (size_t k = 1; k < 8; k++) {
6049 for (uint32_t n = 1; n <= 12; n++) {
6050 for (uint32_t m = 1; m <= 4; m++) {
6051 GemmMicrokernelTester()
6052 .mr(4)
6053 .nr(12)
6054 .kr(1)
6055 .sr(1)
6056 .m(m)
6057 .n(n)
6058 .k(k)
6059 .iterations(1)
6060 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6061 }
6062 }
6063 }
6064 }
6065
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)6066 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
6067 TEST_REQUIRES_ARM_NEON_FMA;
6068 for (size_t k = 9; k < 16; k++) {
6069 GemmMicrokernelTester()
6070 .mr(4)
6071 .nr(12)
6072 .kr(1)
6073 .sr(1)
6074 .m(4)
6075 .n(12)
6076 .k(k)
6077 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6078 }
6079 }
6080
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)6081 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
6082 TEST_REQUIRES_ARM_NEON_FMA;
6083 for (size_t k = 9; k < 16; k++) {
6084 for (uint32_t n = 1; n <= 12; n++) {
6085 for (uint32_t m = 1; m <= 4; m++) {
6086 GemmMicrokernelTester()
6087 .mr(4)
6088 .nr(12)
6089 .kr(1)
6090 .sr(1)
6091 .m(m)
6092 .n(n)
6093 .k(k)
6094 .iterations(1)
6095 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6096 }
6097 }
6098 }
6099 }
6100
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4)6101 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
6102 TEST_REQUIRES_ARM_NEON_FMA;
6103 for (size_t k = 12; k <= 40; k += 4) {
6104 GemmMicrokernelTester()
6105 .mr(4)
6106 .nr(12)
6107 .kr(1)
6108 .sr(1)
6109 .m(4)
6110 .n(12)
6111 .k(k)
6112 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6113 }
6114 }
6115
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)6116 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
6117 TEST_REQUIRES_ARM_NEON_FMA;
6118 for (size_t k = 12; k <= 40; k += 4) {
6119 for (uint32_t n = 1; n <= 12; n++) {
6120 for (uint32_t m = 1; m <= 4; m++) {
6121 GemmMicrokernelTester()
6122 .mr(4)
6123 .nr(12)
6124 .kr(1)
6125 .sr(1)
6126 .m(m)
6127 .n(n)
6128 .k(k)
6129 .iterations(1)
6130 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6131 }
6132 }
6133 }
6134 }
6135
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12)6136 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
6137 TEST_REQUIRES_ARM_NEON_FMA;
6138 for (uint32_t n = 13; n < 24; n++) {
6139 for (size_t k = 1; k <= 20; k += 5) {
6140 GemmMicrokernelTester()
6141 .mr(4)
6142 .nr(12)
6143 .kr(1)
6144 .sr(1)
6145 .m(4)
6146 .n(n)
6147 .k(k)
6148 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6149 }
6150 }
6151 }
6152
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_strided_cn)6153 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
6154 TEST_REQUIRES_ARM_NEON_FMA;
6155 for (uint32_t n = 13; n < 24; n++) {
6156 for (size_t k = 1; k <= 20; k += 5) {
6157 GemmMicrokernelTester()
6158 .mr(4)
6159 .nr(12)
6160 .kr(1)
6161 .sr(1)
6162 .m(4)
6163 .n(n)
6164 .k(k)
6165 .cn_stride(17)
6166 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6167 }
6168 }
6169 }
6170
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_subtile)6171 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
6172 TEST_REQUIRES_ARM_NEON_FMA;
6173 for (uint32_t n = 13; n < 24; n++) {
6174 for (size_t k = 1; k <= 20; k += 5) {
6175 for (uint32_t m = 1; m <= 4; m++) {
6176 GemmMicrokernelTester()
6177 .mr(4)
6178 .nr(12)
6179 .kr(1)
6180 .sr(1)
6181 .m(m)
6182 .n(n)
6183 .k(k)
6184 .iterations(1)
6185 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6186 }
6187 }
6188 }
6189 }
6190
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12)6191 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
6192 TEST_REQUIRES_ARM_NEON_FMA;
6193 for (uint32_t n = 24; n <= 36; n += 12) {
6194 for (size_t k = 1; k <= 20; k += 5) {
6195 GemmMicrokernelTester()
6196 .mr(4)
6197 .nr(12)
6198 .kr(1)
6199 .sr(1)
6200 .m(4)
6201 .n(n)
6202 .k(k)
6203 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6204 }
6205 }
6206 }
6207
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_strided_cn)6208 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
6209 TEST_REQUIRES_ARM_NEON_FMA;
6210 for (uint32_t n = 24; n <= 36; n += 12) {
6211 for (size_t k = 1; k <= 20; k += 5) {
6212 GemmMicrokernelTester()
6213 .mr(4)
6214 .nr(12)
6215 .kr(1)
6216 .sr(1)
6217 .m(4)
6218 .n(n)
6219 .k(k)
6220 .cn_stride(17)
6221 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6222 }
6223 }
6224 }
6225
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_subtile)6226 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
6227 TEST_REQUIRES_ARM_NEON_FMA;
6228 for (uint32_t n = 24; n <= 36; n += 12) {
6229 for (size_t k = 1; k <= 20; k += 5) {
6230 for (uint32_t m = 1; m <= 4; m++) {
6231 GemmMicrokernelTester()
6232 .mr(4)
6233 .nr(12)
6234 .kr(1)
6235 .sr(1)
6236 .m(m)
6237 .n(n)
6238 .k(k)
6239 .iterations(1)
6240 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6241 }
6242 }
6243 }
6244 }
6245
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,small_kernel)6246 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
6247 TEST_REQUIRES_ARM_NEON_FMA;
6248 for (size_t k = 1; k <= 20; k += 5) {
6249 GemmMicrokernelTester()
6250 .mr(4)
6251 .nr(12)
6252 .kr(1)
6253 .sr(1)
6254 .m(4)
6255 .n(12)
6256 .k(k)
6257 .ks(3)
6258 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6259 }
6260 }
6261
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,small_kernel_subtile)6262 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
6263 TEST_REQUIRES_ARM_NEON_FMA;
6264 for (size_t k = 1; k <= 20; k += 5) {
6265 for (uint32_t n = 1; n <= 12; n++) {
6266 for (uint32_t m = 1; m <= 4; m++) {
6267 GemmMicrokernelTester()
6268 .mr(4)
6269 .nr(12)
6270 .kr(1)
6271 .sr(1)
6272 .m(m)
6273 .n(n)
6274 .k(k)
6275 .ks(3)
6276 .iterations(1)
6277 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6278 }
6279 }
6280 }
6281 }
6282
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_small_kernel)6283 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_small_kernel) {
6284 TEST_REQUIRES_ARM_NEON_FMA;
6285 for (uint32_t n = 13; n < 24; n++) {
6286 for (size_t k = 1; k <= 20; k += 5) {
6287 GemmMicrokernelTester()
6288 .mr(4)
6289 .nr(12)
6290 .kr(1)
6291 .sr(1)
6292 .m(4)
6293 .n(n)
6294 .k(k)
6295 .ks(3)
6296 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6297 }
6298 }
6299 }
6300
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_small_kernel)6301 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_small_kernel) {
6302 TEST_REQUIRES_ARM_NEON_FMA;
6303 for (uint32_t n = 24; n <= 36; n += 12) {
6304 for (size_t k = 1; k <= 20; k += 5) {
6305 GemmMicrokernelTester()
6306 .mr(4)
6307 .nr(12)
6308 .kr(1)
6309 .sr(1)
6310 .m(4)
6311 .n(n)
6312 .k(k)
6313 .ks(3)
6314 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6315 }
6316 }
6317 }
6318
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)6319 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
6320 TEST_REQUIRES_ARM_NEON_FMA;
6321 for (size_t k = 1; k <= 20; k += 5) {
6322 for (uint32_t n = 1; n <= 12; n++) {
6323 for (uint32_t m = 1; m <= 4; m++) {
6324 GemmMicrokernelTester()
6325 .mr(4)
6326 .nr(12)
6327 .kr(1)
6328 .sr(1)
6329 .m(m)
6330 .n(n)
6331 .k(k)
6332 .cm_stride(17)
6333 .iterations(1)
6334 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6335 }
6336 }
6337 }
6338 }
6339
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,a_offset)6340 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
6341 TEST_REQUIRES_ARM_NEON_FMA;
6342 for (size_t k = 1; k <= 20; k += 5) {
6343 GemmMicrokernelTester()
6344 .mr(4)
6345 .nr(12)
6346 .kr(1)
6347 .sr(1)
6348 .m(4)
6349 .n(12)
6350 .k(k)
6351 .ks(3)
6352 .a_offset(83)
6353 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6354 }
6355 }
6356
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,zero)6357 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, zero) {
6358 TEST_REQUIRES_ARM_NEON_FMA;
6359 for (size_t k = 1; k <= 20; k += 5) {
6360 for (uint32_t mz = 0; mz < 4; mz++) {
6361 GemmMicrokernelTester()
6362 .mr(4)
6363 .nr(12)
6364 .kr(1)
6365 .sr(1)
6366 .m(4)
6367 .n(12)
6368 .k(k)
6369 .ks(3)
6370 .a_offset(83)
6371 .zero_index(mz)
6372 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6373 }
6374 }
6375 }
6376
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,qmin)6377 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
6378 TEST_REQUIRES_ARM_NEON_FMA;
6379 GemmMicrokernelTester()
6380 .mr(4)
6381 .nr(12)
6382 .kr(1)
6383 .sr(1)
6384 .m(4)
6385 .n(12)
6386 .k(4)
6387 .qmin(128)
6388 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6389 }
6390
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,qmax)6391 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
6392 TEST_REQUIRES_ARM_NEON_FMA;
6393 GemmMicrokernelTester()
6394 .mr(4)
6395 .nr(12)
6396 .kr(1)
6397 .sr(1)
6398 .m(4)
6399 .n(12)
6400 .k(4)
6401 .qmax(128)
6402 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6403 }
6404
TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm)6405 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
6406 TEST_REQUIRES_ARM_NEON_FMA;
6407 GemmMicrokernelTester()
6408 .mr(4)
6409 .nr(12)
6410 .kr(1)
6411 .sr(1)
6412 .m(4)
6413 .n(12)
6414 .k(4)
6415 .cm_stride(17)
6416 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6417 }
6418 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
6419
6420
6421 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4)6422 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
6423 TEST_REQUIRES_ARM_NEON_FMA;
6424 GemmMicrokernelTester()
6425 .mr(6)
6426 .nr(8)
6427 .kr(1)
6428 .sr(1)
6429 .m(6)
6430 .n(8)
6431 .k(4)
6432 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6433 }
6434
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,strided_cn)6435 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
6436 TEST_REQUIRES_ARM_NEON_FMA;
6437 GemmMicrokernelTester()
6438 .mr(6)
6439 .nr(8)
6440 .kr(1)
6441 .sr(1)
6442 .m(6)
6443 .n(8)
6444 .k(4)
6445 .cn_stride(11)
6446 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6447 }
6448
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile)6449 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
6450 TEST_REQUIRES_ARM_NEON_FMA;
6451 for (uint32_t n = 1; n <= 8; n++) {
6452 for (uint32_t m = 1; m <= 6; m++) {
6453 GemmMicrokernelTester()
6454 .mr(6)
6455 .nr(8)
6456 .kr(1)
6457 .sr(1)
6458 .m(m)
6459 .n(n)
6460 .k(4)
6461 .iterations(1)
6462 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6463 }
6464 }
6465 }
6466
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_m)6467 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
6468 TEST_REQUIRES_ARM_NEON_FMA;
6469 for (uint32_t m = 1; m <= 6; m++) {
6470 GemmMicrokernelTester()
6471 .mr(6)
6472 .nr(8)
6473 .kr(1)
6474 .sr(1)
6475 .m(m)
6476 .n(8)
6477 .k(4)
6478 .iterations(1)
6479 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6480 }
6481 }
6482
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_n)6483 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
6484 TEST_REQUIRES_ARM_NEON_FMA;
6485 for (uint32_t n = 1; n <= 8; n++) {
6486 GemmMicrokernelTester()
6487 .mr(6)
6488 .nr(8)
6489 .kr(1)
6490 .sr(1)
6491 .m(6)
6492 .n(n)
6493 .k(4)
6494 .iterations(1)
6495 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6496 }
6497 }
6498
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8)6499 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
6500 TEST_REQUIRES_ARM_NEON_FMA;
6501 GemmMicrokernelTester()
6502 .mr(6)
6503 .nr(8)
6504 .kr(1)
6505 .sr(1)
6506 .m(6)
6507 .n(8)
6508 .k(8)
6509 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6510 }
6511
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8_subtile)6512 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
6513 TEST_REQUIRES_ARM_NEON_FMA;
6514 for (uint32_t n = 1; n <= 8; n++) {
6515 for (uint32_t m = 1; m <= 6; m++) {
6516 GemmMicrokernelTester()
6517 .mr(6)
6518 .nr(8)
6519 .kr(1)
6520 .sr(1)
6521 .m(m)
6522 .n(n)
6523 .k(8)
6524 .iterations(1)
6525 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6526 }
6527 }
6528 }
6529
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8)6530 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
6531 TEST_REQUIRES_ARM_NEON_FMA;
6532 for (size_t k = 1; k < 8; k++) {
6533 GemmMicrokernelTester()
6534 .mr(6)
6535 .nr(8)
6536 .kr(1)
6537 .sr(1)
6538 .m(6)
6539 .n(8)
6540 .k(k)
6541 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6542 }
6543 }
6544
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8_subtile)6545 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
6546 TEST_REQUIRES_ARM_NEON_FMA;
6547 for (size_t k = 1; k < 8; k++) {
6548 for (uint32_t n = 1; n <= 8; n++) {
6549 for (uint32_t m = 1; m <= 6; m++) {
6550 GemmMicrokernelTester()
6551 .mr(6)
6552 .nr(8)
6553 .kr(1)
6554 .sr(1)
6555 .m(m)
6556 .n(n)
6557 .k(k)
6558 .iterations(1)
6559 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6560 }
6561 }
6562 }
6563 }
6564
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8)6565 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
6566 TEST_REQUIRES_ARM_NEON_FMA;
6567 for (size_t k = 9; k < 16; k++) {
6568 GemmMicrokernelTester()
6569 .mr(6)
6570 .nr(8)
6571 .kr(1)
6572 .sr(1)
6573 .m(6)
6574 .n(8)
6575 .k(k)
6576 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6577 }
6578 }
6579
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8_subtile)6580 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
6581 TEST_REQUIRES_ARM_NEON_FMA;
6582 for (size_t k = 9; k < 16; k++) {
6583 for (uint32_t n = 1; n <= 8; n++) {
6584 for (uint32_t m = 1; m <= 6; m++) {
6585 GemmMicrokernelTester()
6586 .mr(6)
6587 .nr(8)
6588 .kr(1)
6589 .sr(1)
6590 .m(m)
6591 .n(n)
6592 .k(k)
6593 .iterations(1)
6594 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6595 }
6596 }
6597 }
6598 }
6599
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4)6600 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
6601 TEST_REQUIRES_ARM_NEON_FMA;
6602 for (size_t k = 12; k <= 40; k += 4) {
6603 GemmMicrokernelTester()
6604 .mr(6)
6605 .nr(8)
6606 .kr(1)
6607 .sr(1)
6608 .m(6)
6609 .n(8)
6610 .k(k)
6611 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6612 }
6613 }
6614
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4_subtile)6615 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
6616 TEST_REQUIRES_ARM_NEON_FMA;
6617 for (size_t k = 12; k <= 40; k += 4) {
6618 for (uint32_t n = 1; n <= 8; n++) {
6619 for (uint32_t m = 1; m <= 6; m++) {
6620 GemmMicrokernelTester()
6621 .mr(6)
6622 .nr(8)
6623 .kr(1)
6624 .sr(1)
6625 .m(m)
6626 .n(n)
6627 .k(k)
6628 .iterations(1)
6629 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6630 }
6631 }
6632 }
6633 }
6634
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8)6635 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
6636 TEST_REQUIRES_ARM_NEON_FMA;
6637 for (uint32_t n = 9; n < 16; n++) {
6638 for (size_t k = 1; k <= 20; k += 5) {
6639 GemmMicrokernelTester()
6640 .mr(6)
6641 .nr(8)
6642 .kr(1)
6643 .sr(1)
6644 .m(6)
6645 .n(n)
6646 .k(k)
6647 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6648 }
6649 }
6650 }
6651
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_strided_cn)6652 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
6653 TEST_REQUIRES_ARM_NEON_FMA;
6654 for (uint32_t n = 9; n < 16; n++) {
6655 for (size_t k = 1; k <= 20; k += 5) {
6656 GemmMicrokernelTester()
6657 .mr(6)
6658 .nr(8)
6659 .kr(1)
6660 .sr(1)
6661 .m(6)
6662 .n(n)
6663 .k(k)
6664 .cn_stride(11)
6665 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6666 }
6667 }
6668 }
6669
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_subtile)6670 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
6671 TEST_REQUIRES_ARM_NEON_FMA;
6672 for (uint32_t n = 9; n < 16; n++) {
6673 for (size_t k = 1; k <= 20; k += 5) {
6674 for (uint32_t m = 1; m <= 6; m++) {
6675 GemmMicrokernelTester()
6676 .mr(6)
6677 .nr(8)
6678 .kr(1)
6679 .sr(1)
6680 .m(m)
6681 .n(n)
6682 .k(k)
6683 .iterations(1)
6684 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6685 }
6686 }
6687 }
6688 }
6689
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8)6690 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
6691 TEST_REQUIRES_ARM_NEON_FMA;
6692 for (uint32_t n = 16; n <= 24; n += 8) {
6693 for (size_t k = 1; k <= 20; k += 5) {
6694 GemmMicrokernelTester()
6695 .mr(6)
6696 .nr(8)
6697 .kr(1)
6698 .sr(1)
6699 .m(6)
6700 .n(n)
6701 .k(k)
6702 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6703 }
6704 }
6705 }
6706
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_strided_cn)6707 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
6708 TEST_REQUIRES_ARM_NEON_FMA;
6709 for (uint32_t n = 16; n <= 24; n += 8) {
6710 for (size_t k = 1; k <= 20; k += 5) {
6711 GemmMicrokernelTester()
6712 .mr(6)
6713 .nr(8)
6714 .kr(1)
6715 .sr(1)
6716 .m(6)
6717 .n(n)
6718 .k(k)
6719 .cn_stride(11)
6720 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6721 }
6722 }
6723 }
6724
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_subtile)6725 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
6726 TEST_REQUIRES_ARM_NEON_FMA;
6727 for (uint32_t n = 16; n <= 24; n += 8) {
6728 for (size_t k = 1; k <= 20; k += 5) {
6729 for (uint32_t m = 1; m <= 6; m++) {
6730 GemmMicrokernelTester()
6731 .mr(6)
6732 .nr(8)
6733 .kr(1)
6734 .sr(1)
6735 .m(m)
6736 .n(n)
6737 .k(k)
6738 .iterations(1)
6739 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6740 }
6741 }
6742 }
6743 }
6744
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,small_kernel)6745 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel) {
6746 TEST_REQUIRES_ARM_NEON_FMA;
6747 for (size_t k = 1; k <= 20; k += 5) {
6748 GemmMicrokernelTester()
6749 .mr(6)
6750 .nr(8)
6751 .kr(1)
6752 .sr(1)
6753 .m(6)
6754 .n(8)
6755 .k(k)
6756 .ks(3)
6757 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6758 }
6759 }
6760
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,small_kernel_subtile)6761 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel_subtile) {
6762 TEST_REQUIRES_ARM_NEON_FMA;
6763 for (size_t k = 1; k <= 20; k += 5) {
6764 for (uint32_t n = 1; n <= 8; n++) {
6765 for (uint32_t m = 1; m <= 6; m++) {
6766 GemmMicrokernelTester()
6767 .mr(6)
6768 .nr(8)
6769 .kr(1)
6770 .sr(1)
6771 .m(m)
6772 .n(n)
6773 .k(k)
6774 .ks(3)
6775 .iterations(1)
6776 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6777 }
6778 }
6779 }
6780 }
6781
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_small_kernel)6782 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_small_kernel) {
6783 TEST_REQUIRES_ARM_NEON_FMA;
6784 for (uint32_t n = 9; n < 16; n++) {
6785 for (size_t k = 1; k <= 20; k += 5) {
6786 GemmMicrokernelTester()
6787 .mr(6)
6788 .nr(8)
6789 .kr(1)
6790 .sr(1)
6791 .m(6)
6792 .n(n)
6793 .k(k)
6794 .ks(3)
6795 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6796 }
6797 }
6798 }
6799
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_small_kernel)6800 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_small_kernel) {
6801 TEST_REQUIRES_ARM_NEON_FMA;
6802 for (uint32_t n = 16; n <= 24; n += 8) {
6803 for (size_t k = 1; k <= 20; k += 5) {
6804 GemmMicrokernelTester()
6805 .mr(6)
6806 .nr(8)
6807 .kr(1)
6808 .sr(1)
6809 .m(6)
6810 .n(n)
6811 .k(k)
6812 .ks(3)
6813 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6814 }
6815 }
6816 }
6817
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm_subtile)6818 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
6819 TEST_REQUIRES_ARM_NEON_FMA;
6820 for (size_t k = 1; k <= 20; k += 5) {
6821 for (uint32_t n = 1; n <= 8; n++) {
6822 for (uint32_t m = 1; m <= 6; m++) {
6823 GemmMicrokernelTester()
6824 .mr(6)
6825 .nr(8)
6826 .kr(1)
6827 .sr(1)
6828 .m(m)
6829 .n(n)
6830 .k(k)
6831 .cm_stride(11)
6832 .iterations(1)
6833 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6834 }
6835 }
6836 }
6837 }
6838
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,a_offset)6839 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, a_offset) {
6840 TEST_REQUIRES_ARM_NEON_FMA;
6841 for (size_t k = 1; k <= 20; k += 5) {
6842 GemmMicrokernelTester()
6843 .mr(6)
6844 .nr(8)
6845 .kr(1)
6846 .sr(1)
6847 .m(6)
6848 .n(8)
6849 .k(k)
6850 .ks(3)
6851 .a_offset(127)
6852 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6853 }
6854 }
6855
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,zero)6856 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, zero) {
6857 TEST_REQUIRES_ARM_NEON_FMA;
6858 for (size_t k = 1; k <= 20; k += 5) {
6859 for (uint32_t mz = 0; mz < 6; mz++) {
6860 GemmMicrokernelTester()
6861 .mr(6)
6862 .nr(8)
6863 .kr(1)
6864 .sr(1)
6865 .m(6)
6866 .n(8)
6867 .k(k)
6868 .ks(3)
6869 .a_offset(127)
6870 .zero_index(mz)
6871 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6872 }
6873 }
6874 }
6875
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,qmin)6876 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
6877 TEST_REQUIRES_ARM_NEON_FMA;
6878 GemmMicrokernelTester()
6879 .mr(6)
6880 .nr(8)
6881 .kr(1)
6882 .sr(1)
6883 .m(6)
6884 .n(8)
6885 .k(4)
6886 .qmin(128)
6887 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6888 }
6889
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,qmax)6890 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
6891 TEST_REQUIRES_ARM_NEON_FMA;
6892 GemmMicrokernelTester()
6893 .mr(6)
6894 .nr(8)
6895 .kr(1)
6896 .sr(1)
6897 .m(6)
6898 .n(8)
6899 .k(4)
6900 .qmax(128)
6901 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6902 }
6903
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm)6904 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
6905 TEST_REQUIRES_ARM_NEON_FMA;
6906 GemmMicrokernelTester()
6907 .mr(6)
6908 .nr(8)
6909 .kr(1)
6910 .sr(1)
6911 .m(6)
6912 .n(8)
6913 .k(4)
6914 .cm_stride(11)
6915 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
6916 }
6917 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
6918
6919
6920 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)6921 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
6922 TEST_REQUIRES_ARM_NEON_FMA;
6923 GemmMicrokernelTester()
6924 .mr(6)
6925 .nr(8)
6926 .kr(1)
6927 .sr(1)
6928 .m(6)
6929 .n(8)
6930 .k(8)
6931 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
6932 }
6933
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)6934 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
6935 TEST_REQUIRES_ARM_NEON_FMA;
6936 GemmMicrokernelTester()
6937 .mr(6)
6938 .nr(8)
6939 .kr(1)
6940 .sr(1)
6941 .m(6)
6942 .n(8)
6943 .k(8)
6944 .cn_stride(11)
6945 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
6946 }
6947
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)6948 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
6949 TEST_REQUIRES_ARM_NEON_FMA;
6950 for (uint32_t n = 1; n <= 8; n++) {
6951 for (uint32_t m = 1; m <= 6; m++) {
6952 GemmMicrokernelTester()
6953 .mr(6)
6954 .nr(8)
6955 .kr(1)
6956 .sr(1)
6957 .m(m)
6958 .n(n)
6959 .k(8)
6960 .iterations(1)
6961 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
6962 }
6963 }
6964 }
6965
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)6966 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
6967 TEST_REQUIRES_ARM_NEON_FMA;
6968 for (uint32_t m = 1; m <= 6; m++) {
6969 GemmMicrokernelTester()
6970 .mr(6)
6971 .nr(8)
6972 .kr(1)
6973 .sr(1)
6974 .m(m)
6975 .n(8)
6976 .k(8)
6977 .iterations(1)
6978 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
6979 }
6980 }
6981
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)6982 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
6983 TEST_REQUIRES_ARM_NEON_FMA;
6984 for (uint32_t n = 1; n <= 8; n++) {
6985 GemmMicrokernelTester()
6986 .mr(6)
6987 .nr(8)
6988 .kr(1)
6989 .sr(1)
6990 .m(6)
6991 .n(n)
6992 .k(8)
6993 .iterations(1)
6994 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
6995 }
6996 }
6997
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)6998 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
6999 TEST_REQUIRES_ARM_NEON_FMA;
7000 GemmMicrokernelTester()
7001 .mr(6)
7002 .nr(8)
7003 .kr(1)
7004 .sr(1)
7005 .m(6)
7006 .n(8)
7007 .k(16)
7008 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7009 }
7010
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)7011 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
7012 TEST_REQUIRES_ARM_NEON_FMA;
7013 for (uint32_t n = 1; n <= 8; n++) {
7014 for (uint32_t m = 1; m <= 6; m++) {
7015 GemmMicrokernelTester()
7016 .mr(6)
7017 .nr(8)
7018 .kr(1)
7019 .sr(1)
7020 .m(m)
7021 .n(n)
7022 .k(16)
7023 .iterations(1)
7024 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7025 }
7026 }
7027 }
7028
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)7029 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
7030 TEST_REQUIRES_ARM_NEON_FMA;
7031 for (size_t k = 1; k < 16; k++) {
7032 GemmMicrokernelTester()
7033 .mr(6)
7034 .nr(8)
7035 .kr(1)
7036 .sr(1)
7037 .m(6)
7038 .n(8)
7039 .k(k)
7040 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7041 }
7042 }
7043
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)7044 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
7045 TEST_REQUIRES_ARM_NEON_FMA;
7046 for (size_t k = 1; k < 16; k++) {
7047 for (uint32_t n = 1; n <= 8; n++) {
7048 for (uint32_t m = 1; m <= 6; m++) {
7049 GemmMicrokernelTester()
7050 .mr(6)
7051 .nr(8)
7052 .kr(1)
7053 .sr(1)
7054 .m(m)
7055 .n(n)
7056 .k(k)
7057 .iterations(1)
7058 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7059 }
7060 }
7061 }
7062 }
7063
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)7064 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
7065 TEST_REQUIRES_ARM_NEON_FMA;
7066 for (size_t k = 17; k < 32; k++) {
7067 GemmMicrokernelTester()
7068 .mr(6)
7069 .nr(8)
7070 .kr(1)
7071 .sr(1)
7072 .m(6)
7073 .n(8)
7074 .k(k)
7075 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7076 }
7077 }
7078
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)7079 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
7080 TEST_REQUIRES_ARM_NEON_FMA;
7081 for (size_t k = 17; k < 32; k++) {
7082 for (uint32_t n = 1; n <= 8; n++) {
7083 for (uint32_t m = 1; m <= 6; m++) {
7084 GemmMicrokernelTester()
7085 .mr(6)
7086 .nr(8)
7087 .kr(1)
7088 .sr(1)
7089 .m(m)
7090 .n(n)
7091 .k(k)
7092 .iterations(1)
7093 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7094 }
7095 }
7096 }
7097 }
7098
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)7099 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
7100 TEST_REQUIRES_ARM_NEON_FMA;
7101 for (size_t k = 24; k <= 80; k += 8) {
7102 GemmMicrokernelTester()
7103 .mr(6)
7104 .nr(8)
7105 .kr(1)
7106 .sr(1)
7107 .m(6)
7108 .n(8)
7109 .k(k)
7110 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7111 }
7112 }
7113
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)7114 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
7115 TEST_REQUIRES_ARM_NEON_FMA;
7116 for (size_t k = 24; k <= 80; k += 8) {
7117 for (uint32_t n = 1; n <= 8; n++) {
7118 for (uint32_t m = 1; m <= 6; m++) {
7119 GemmMicrokernelTester()
7120 .mr(6)
7121 .nr(8)
7122 .kr(1)
7123 .sr(1)
7124 .m(m)
7125 .n(n)
7126 .k(k)
7127 .iterations(1)
7128 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7129 }
7130 }
7131 }
7132 }
7133
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)7134 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
7135 TEST_REQUIRES_ARM_NEON_FMA;
7136 for (uint32_t n = 9; n < 16; n++) {
7137 for (size_t k = 1; k <= 40; k += 9) {
7138 GemmMicrokernelTester()
7139 .mr(6)
7140 .nr(8)
7141 .kr(1)
7142 .sr(1)
7143 .m(6)
7144 .n(n)
7145 .k(k)
7146 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7147 }
7148 }
7149 }
7150
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)7151 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
7152 TEST_REQUIRES_ARM_NEON_FMA;
7153 for (uint32_t n = 9; n < 16; n++) {
7154 for (size_t k = 1; k <= 40; k += 9) {
7155 GemmMicrokernelTester()
7156 .mr(6)
7157 .nr(8)
7158 .kr(1)
7159 .sr(1)
7160 .m(6)
7161 .n(n)
7162 .k(k)
7163 .cn_stride(11)
7164 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7165 }
7166 }
7167 }
7168
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)7169 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
7170 TEST_REQUIRES_ARM_NEON_FMA;
7171 for (uint32_t n = 9; n < 16; n++) {
7172 for (size_t k = 1; k <= 40; k += 9) {
7173 for (uint32_t m = 1; m <= 6; m++) {
7174 GemmMicrokernelTester()
7175 .mr(6)
7176 .nr(8)
7177 .kr(1)
7178 .sr(1)
7179 .m(m)
7180 .n(n)
7181 .k(k)
7182 .iterations(1)
7183 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7184 }
7185 }
7186 }
7187 }
7188
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)7189 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
7190 TEST_REQUIRES_ARM_NEON_FMA;
7191 for (uint32_t n = 16; n <= 24; n += 8) {
7192 for (size_t k = 1; k <= 40; k += 9) {
7193 GemmMicrokernelTester()
7194 .mr(6)
7195 .nr(8)
7196 .kr(1)
7197 .sr(1)
7198 .m(6)
7199 .n(n)
7200 .k(k)
7201 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7202 }
7203 }
7204 }
7205
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)7206 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
7207 TEST_REQUIRES_ARM_NEON_FMA;
7208 for (uint32_t n = 16; n <= 24; n += 8) {
7209 for (size_t k = 1; k <= 40; k += 9) {
7210 GemmMicrokernelTester()
7211 .mr(6)
7212 .nr(8)
7213 .kr(1)
7214 .sr(1)
7215 .m(6)
7216 .n(n)
7217 .k(k)
7218 .cn_stride(11)
7219 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7220 }
7221 }
7222 }
7223
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)7224 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
7225 TEST_REQUIRES_ARM_NEON_FMA;
7226 for (uint32_t n = 16; n <= 24; n += 8) {
7227 for (size_t k = 1; k <= 40; k += 9) {
7228 for (uint32_t m = 1; m <= 6; m++) {
7229 GemmMicrokernelTester()
7230 .mr(6)
7231 .nr(8)
7232 .kr(1)
7233 .sr(1)
7234 .m(m)
7235 .n(n)
7236 .k(k)
7237 .iterations(1)
7238 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7239 }
7240 }
7241 }
7242 }
7243
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel)7244 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
7245 TEST_REQUIRES_ARM_NEON_FMA;
7246 for (size_t k = 1; k <= 40; k += 9) {
7247 GemmMicrokernelTester()
7248 .mr(6)
7249 .nr(8)
7250 .kr(1)
7251 .sr(1)
7252 .m(6)
7253 .n(8)
7254 .k(k)
7255 .ks(3)
7256 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7257 }
7258 }
7259
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)7260 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
7261 TEST_REQUIRES_ARM_NEON_FMA;
7262 for (size_t k = 1; k <= 40; k += 9) {
7263 for (uint32_t n = 1; n <= 8; n++) {
7264 for (uint32_t m = 1; m <= 6; m++) {
7265 GemmMicrokernelTester()
7266 .mr(6)
7267 .nr(8)
7268 .kr(1)
7269 .sr(1)
7270 .m(m)
7271 .n(n)
7272 .k(k)
7273 .ks(3)
7274 .iterations(1)
7275 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7276 }
7277 }
7278 }
7279 }
7280
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_small_kernel)7281 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
7282 TEST_REQUIRES_ARM_NEON_FMA;
7283 for (uint32_t n = 9; n < 16; n++) {
7284 for (size_t k = 1; k <= 40; k += 9) {
7285 GemmMicrokernelTester()
7286 .mr(6)
7287 .nr(8)
7288 .kr(1)
7289 .sr(1)
7290 .m(6)
7291 .n(n)
7292 .k(k)
7293 .ks(3)
7294 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7295 }
7296 }
7297 }
7298
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_small_kernel)7299 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
7300 TEST_REQUIRES_ARM_NEON_FMA;
7301 for (uint32_t n = 16; n <= 24; n += 8) {
7302 for (size_t k = 1; k <= 40; k += 9) {
7303 GemmMicrokernelTester()
7304 .mr(6)
7305 .nr(8)
7306 .kr(1)
7307 .sr(1)
7308 .m(6)
7309 .n(n)
7310 .k(k)
7311 .ks(3)
7312 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7313 }
7314 }
7315 }
7316
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)7317 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
7318 TEST_REQUIRES_ARM_NEON_FMA;
7319 for (size_t k = 1; k <= 40; k += 9) {
7320 for (uint32_t n = 1; n <= 8; n++) {
7321 for (uint32_t m = 1; m <= 6; m++) {
7322 GemmMicrokernelTester()
7323 .mr(6)
7324 .nr(8)
7325 .kr(1)
7326 .sr(1)
7327 .m(m)
7328 .n(n)
7329 .k(k)
7330 .cm_stride(11)
7331 .iterations(1)
7332 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7333 }
7334 }
7335 }
7336 }
7337
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,a_offset)7338 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
7339 TEST_REQUIRES_ARM_NEON_FMA;
7340 for (size_t k = 1; k <= 40; k += 9) {
7341 GemmMicrokernelTester()
7342 .mr(6)
7343 .nr(8)
7344 .kr(1)
7345 .sr(1)
7346 .m(6)
7347 .n(8)
7348 .k(k)
7349 .ks(3)
7350 .a_offset(251)
7351 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7352 }
7353 }
7354
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,zero)7355 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
7356 TEST_REQUIRES_ARM_NEON_FMA;
7357 for (size_t k = 1; k <= 40; k += 9) {
7358 for (uint32_t mz = 0; mz < 6; mz++) {
7359 GemmMicrokernelTester()
7360 .mr(6)
7361 .nr(8)
7362 .kr(1)
7363 .sr(1)
7364 .m(6)
7365 .n(8)
7366 .k(k)
7367 .ks(3)
7368 .a_offset(251)
7369 .zero_index(mz)
7370 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7371 }
7372 }
7373 }
7374
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,qmin)7375 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
7376 TEST_REQUIRES_ARM_NEON_FMA;
7377 GemmMicrokernelTester()
7378 .mr(6)
7379 .nr(8)
7380 .kr(1)
7381 .sr(1)
7382 .m(6)
7383 .n(8)
7384 .k(8)
7385 .qmin(128)
7386 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7387 }
7388
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,qmax)7389 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
7390 TEST_REQUIRES_ARM_NEON_FMA;
7391 GemmMicrokernelTester()
7392 .mr(6)
7393 .nr(8)
7394 .kr(1)
7395 .sr(1)
7396 .m(6)
7397 .n(8)
7398 .k(8)
7399 .qmax(128)
7400 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7401 }
7402
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)7403 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
7404 TEST_REQUIRES_ARM_NEON_FMA;
7405 GemmMicrokernelTester()
7406 .mr(6)
7407 .nr(8)
7408 .kr(1)
7409 .sr(1)
7410 .m(6)
7411 .n(8)
7412 .k(8)
7413 .cm_stride(11)
7414 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
7415 }
7416 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
7417
7418
7419 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4)7420 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4) {
7421 TEST_REQUIRES_ARM_NEON_FMA;
7422 GemmMicrokernelTester()
7423 .mr(6)
7424 .nr(8)
7425 .kr(1)
7426 .sr(1)
7427 .m(6)
7428 .n(8)
7429 .k(4)
7430 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7431 }
7432
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cn)7433 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cn) {
7434 TEST_REQUIRES_ARM_NEON_FMA;
7435 GemmMicrokernelTester()
7436 .mr(6)
7437 .nr(8)
7438 .kr(1)
7439 .sr(1)
7440 .m(6)
7441 .n(8)
7442 .k(4)
7443 .cn_stride(11)
7444 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7445 }
7446
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4_subtile)7447 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4_subtile) {
7448 TEST_REQUIRES_ARM_NEON_FMA;
7449 for (uint32_t n = 1; n <= 8; n++) {
7450 for (uint32_t m = 1; m <= 6; m++) {
7451 GemmMicrokernelTester()
7452 .mr(6)
7453 .nr(8)
7454 .kr(1)
7455 .sr(1)
7456 .m(m)
7457 .n(n)
7458 .k(4)
7459 .iterations(1)
7460 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7461 }
7462 }
7463 }
7464
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4_subtile_m)7465 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4_subtile_m) {
7466 TEST_REQUIRES_ARM_NEON_FMA;
7467 for (uint32_t m = 1; m <= 6; m++) {
7468 GemmMicrokernelTester()
7469 .mr(6)
7470 .nr(8)
7471 .kr(1)
7472 .sr(1)
7473 .m(m)
7474 .n(8)
7475 .k(4)
7476 .iterations(1)
7477 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7478 }
7479 }
7480
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4_subtile_n)7481 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4_subtile_n) {
7482 TEST_REQUIRES_ARM_NEON_FMA;
7483 for (uint32_t n = 1; n <= 8; n++) {
7484 GemmMicrokernelTester()
7485 .mr(6)
7486 .nr(8)
7487 .kr(1)
7488 .sr(1)
7489 .m(6)
7490 .n(n)
7491 .k(4)
7492 .iterations(1)
7493 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7494 }
7495 }
7496
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8)7497 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8) {
7498 TEST_REQUIRES_ARM_NEON_FMA;
7499 GemmMicrokernelTester()
7500 .mr(6)
7501 .nr(8)
7502 .kr(1)
7503 .sr(1)
7504 .m(6)
7505 .n(8)
7506 .k(8)
7507 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7508 }
7509
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8_subtile)7510 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8_subtile) {
7511 TEST_REQUIRES_ARM_NEON_FMA;
7512 for (uint32_t n = 1; n <= 8; n++) {
7513 for (uint32_t m = 1; m <= 6; m++) {
7514 GemmMicrokernelTester()
7515 .mr(6)
7516 .nr(8)
7517 .kr(1)
7518 .sr(1)
7519 .m(m)
7520 .n(n)
7521 .k(8)
7522 .iterations(1)
7523 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7524 }
7525 }
7526 }
7527
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_lt_8)7528 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_lt_8) {
7529 TEST_REQUIRES_ARM_NEON_FMA;
7530 for (size_t k = 1; k < 8; k++) {
7531 GemmMicrokernelTester()
7532 .mr(6)
7533 .nr(8)
7534 .kr(1)
7535 .sr(1)
7536 .m(6)
7537 .n(8)
7538 .k(k)
7539 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7540 }
7541 }
7542
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_lt_8_subtile)7543 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_lt_8_subtile) {
7544 TEST_REQUIRES_ARM_NEON_FMA;
7545 for (size_t k = 1; k < 8; k++) {
7546 for (uint32_t n = 1; n <= 8; n++) {
7547 for (uint32_t m = 1; m <= 6; m++) {
7548 GemmMicrokernelTester()
7549 .mr(6)
7550 .nr(8)
7551 .kr(1)
7552 .sr(1)
7553 .m(m)
7554 .n(n)
7555 .k(k)
7556 .iterations(1)
7557 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7558 }
7559 }
7560 }
7561 }
7562
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_gt_8)7563 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_gt_8) {
7564 TEST_REQUIRES_ARM_NEON_FMA;
7565 for (size_t k = 9; k < 16; k++) {
7566 GemmMicrokernelTester()
7567 .mr(6)
7568 .nr(8)
7569 .kr(1)
7570 .sr(1)
7571 .m(6)
7572 .n(8)
7573 .k(k)
7574 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7575 }
7576 }
7577
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_gt_8_subtile)7578 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_gt_8_subtile) {
7579 TEST_REQUIRES_ARM_NEON_FMA;
7580 for (size_t k = 9; k < 16; k++) {
7581 for (uint32_t n = 1; n <= 8; n++) {
7582 for (uint32_t m = 1; m <= 6; m++) {
7583 GemmMicrokernelTester()
7584 .mr(6)
7585 .nr(8)
7586 .kr(1)
7587 .sr(1)
7588 .m(m)
7589 .n(n)
7590 .k(k)
7591 .iterations(1)
7592 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7593 }
7594 }
7595 }
7596 }
7597
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_div_4)7598 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_div_4) {
7599 TEST_REQUIRES_ARM_NEON_FMA;
7600 for (size_t k = 12; k <= 40; k += 4) {
7601 GemmMicrokernelTester()
7602 .mr(6)
7603 .nr(8)
7604 .kr(1)
7605 .sr(1)
7606 .m(6)
7607 .n(8)
7608 .k(k)
7609 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7610 }
7611 }
7612
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_div_4_subtile)7613 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_div_4_subtile) {
7614 TEST_REQUIRES_ARM_NEON_FMA;
7615 for (size_t k = 12; k <= 40; k += 4) {
7616 for (uint32_t n = 1; n <= 8; n++) {
7617 for (uint32_t m = 1; m <= 6; m++) {
7618 GemmMicrokernelTester()
7619 .mr(6)
7620 .nr(8)
7621 .kr(1)
7622 .sr(1)
7623 .m(m)
7624 .n(n)
7625 .k(k)
7626 .iterations(1)
7627 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7628 }
7629 }
7630 }
7631 }
7632
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8)7633 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8) {
7634 TEST_REQUIRES_ARM_NEON_FMA;
7635 for (uint32_t n = 9; n < 16; n++) {
7636 for (size_t k = 1; k <= 20; k += 5) {
7637 GemmMicrokernelTester()
7638 .mr(6)
7639 .nr(8)
7640 .kr(1)
7641 .sr(1)
7642 .m(6)
7643 .n(n)
7644 .k(k)
7645 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7646 }
7647 }
7648 }
7649
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_strided_cn)7650 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
7651 TEST_REQUIRES_ARM_NEON_FMA;
7652 for (uint32_t n = 9; n < 16; n++) {
7653 for (size_t k = 1; k <= 20; k += 5) {
7654 GemmMicrokernelTester()
7655 .mr(6)
7656 .nr(8)
7657 .kr(1)
7658 .sr(1)
7659 .m(6)
7660 .n(n)
7661 .k(k)
7662 .cn_stride(11)
7663 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7664 }
7665 }
7666 }
7667
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_subtile)7668 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_subtile) {
7669 TEST_REQUIRES_ARM_NEON_FMA;
7670 for (uint32_t n = 9; n < 16; n++) {
7671 for (size_t k = 1; k <= 20; k += 5) {
7672 for (uint32_t m = 1; m <= 6; m++) {
7673 GemmMicrokernelTester()
7674 .mr(6)
7675 .nr(8)
7676 .kr(1)
7677 .sr(1)
7678 .m(m)
7679 .n(n)
7680 .k(k)
7681 .iterations(1)
7682 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7683 }
7684 }
7685 }
7686 }
7687
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8)7688 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8) {
7689 TEST_REQUIRES_ARM_NEON_FMA;
7690 for (uint32_t n = 16; n <= 24; n += 8) {
7691 for (size_t k = 1; k <= 20; k += 5) {
7692 GemmMicrokernelTester()
7693 .mr(6)
7694 .nr(8)
7695 .kr(1)
7696 .sr(1)
7697 .m(6)
7698 .n(n)
7699 .k(k)
7700 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7701 }
7702 }
7703 }
7704
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_strided_cn)7705 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_strided_cn) {
7706 TEST_REQUIRES_ARM_NEON_FMA;
7707 for (uint32_t n = 16; n <= 24; n += 8) {
7708 for (size_t k = 1; k <= 20; k += 5) {
7709 GemmMicrokernelTester()
7710 .mr(6)
7711 .nr(8)
7712 .kr(1)
7713 .sr(1)
7714 .m(6)
7715 .n(n)
7716 .k(k)
7717 .cn_stride(11)
7718 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7719 }
7720 }
7721 }
7722
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_subtile)7723 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_subtile) {
7724 TEST_REQUIRES_ARM_NEON_FMA;
7725 for (uint32_t n = 16; n <= 24; n += 8) {
7726 for (size_t k = 1; k <= 20; k += 5) {
7727 for (uint32_t m = 1; m <= 6; m++) {
7728 GemmMicrokernelTester()
7729 .mr(6)
7730 .nr(8)
7731 .kr(1)
7732 .sr(1)
7733 .m(m)
7734 .n(n)
7735 .k(k)
7736 .iterations(1)
7737 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7738 }
7739 }
7740 }
7741 }
7742
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,small_kernel)7743 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, small_kernel) {
7744 TEST_REQUIRES_ARM_NEON_FMA;
7745 for (size_t k = 1; k <= 20; k += 5) {
7746 GemmMicrokernelTester()
7747 .mr(6)
7748 .nr(8)
7749 .kr(1)
7750 .sr(1)
7751 .m(6)
7752 .n(8)
7753 .k(k)
7754 .ks(3)
7755 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7756 }
7757 }
7758
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,small_kernel_subtile)7759 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, small_kernel_subtile) {
7760 TEST_REQUIRES_ARM_NEON_FMA;
7761 for (size_t k = 1; k <= 20; k += 5) {
7762 for (uint32_t n = 1; n <= 8; n++) {
7763 for (uint32_t m = 1; m <= 6; m++) {
7764 GemmMicrokernelTester()
7765 .mr(6)
7766 .nr(8)
7767 .kr(1)
7768 .sr(1)
7769 .m(m)
7770 .n(n)
7771 .k(k)
7772 .ks(3)
7773 .iterations(1)
7774 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7775 }
7776 }
7777 }
7778 }
7779
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_small_kernel)7780 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
7781 TEST_REQUIRES_ARM_NEON_FMA;
7782 for (uint32_t n = 9; n < 16; n++) {
7783 for (size_t k = 1; k <= 20; k += 5) {
7784 GemmMicrokernelTester()
7785 .mr(6)
7786 .nr(8)
7787 .kr(1)
7788 .sr(1)
7789 .m(6)
7790 .n(n)
7791 .k(k)
7792 .ks(3)
7793 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7794 }
7795 }
7796 }
7797
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_small_kernel)7798 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_small_kernel) {
7799 TEST_REQUIRES_ARM_NEON_FMA;
7800 for (uint32_t n = 16; n <= 24; n += 8) {
7801 for (size_t k = 1; k <= 20; k += 5) {
7802 GemmMicrokernelTester()
7803 .mr(6)
7804 .nr(8)
7805 .kr(1)
7806 .sr(1)
7807 .m(6)
7808 .n(n)
7809 .k(k)
7810 .ks(3)
7811 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7812 }
7813 }
7814 }
7815
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cm_subtile)7816 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cm_subtile) {
7817 TEST_REQUIRES_ARM_NEON_FMA;
7818 for (size_t k = 1; k <= 20; k += 5) {
7819 for (uint32_t n = 1; n <= 8; n++) {
7820 for (uint32_t m = 1; m <= 6; m++) {
7821 GemmMicrokernelTester()
7822 .mr(6)
7823 .nr(8)
7824 .kr(1)
7825 .sr(1)
7826 .m(m)
7827 .n(n)
7828 .k(k)
7829 .cm_stride(11)
7830 .iterations(1)
7831 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7832 }
7833 }
7834 }
7835 }
7836
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,a_offset)7837 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, a_offset) {
7838 TEST_REQUIRES_ARM_NEON_FMA;
7839 for (size_t k = 1; k <= 20; k += 5) {
7840 GemmMicrokernelTester()
7841 .mr(6)
7842 .nr(8)
7843 .kr(1)
7844 .sr(1)
7845 .m(6)
7846 .n(8)
7847 .k(k)
7848 .ks(3)
7849 .a_offset(127)
7850 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7851 }
7852 }
7853
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,zero)7854 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, zero) {
7855 TEST_REQUIRES_ARM_NEON_FMA;
7856 for (size_t k = 1; k <= 20; k += 5) {
7857 for (uint32_t mz = 0; mz < 6; mz++) {
7858 GemmMicrokernelTester()
7859 .mr(6)
7860 .nr(8)
7861 .kr(1)
7862 .sr(1)
7863 .m(6)
7864 .n(8)
7865 .k(k)
7866 .ks(3)
7867 .a_offset(127)
7868 .zero_index(mz)
7869 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7870 }
7871 }
7872 }
7873
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,qmin)7874 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, qmin) {
7875 TEST_REQUIRES_ARM_NEON_FMA;
7876 GemmMicrokernelTester()
7877 .mr(6)
7878 .nr(8)
7879 .kr(1)
7880 .sr(1)
7881 .m(6)
7882 .n(8)
7883 .k(4)
7884 .qmin(128)
7885 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7886 }
7887
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,qmax)7888 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, qmax) {
7889 TEST_REQUIRES_ARM_NEON_FMA;
7890 GemmMicrokernelTester()
7891 .mr(6)
7892 .nr(8)
7893 .kr(1)
7894 .sr(1)
7895 .m(6)
7896 .n(8)
7897 .k(4)
7898 .qmax(128)
7899 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7900 }
7901
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cm)7902 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cm) {
7903 TEST_REQUIRES_ARM_NEON_FMA;
7904 GemmMicrokernelTester()
7905 .mr(6)
7906 .nr(8)
7907 .kr(1)
7908 .sr(1)
7909 .m(6)
7910 .n(8)
7911 .k(4)
7912 .cm_stride(11)
7913 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
7914 }
7915 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
7916
7917
7918 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_eq_2)7919 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2) {
7920 TEST_REQUIRES_ARM_NEON;
7921 GemmMicrokernelTester()
7922 .mr(1)
7923 .nr(8)
7924 .kr(1)
7925 .sr(1)
7926 .m(1)
7927 .n(8)
7928 .k(2)
7929 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7930 }
7931
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,strided_cn)7932 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cn) {
7933 TEST_REQUIRES_ARM_NEON;
7934 GemmMicrokernelTester()
7935 .mr(1)
7936 .nr(8)
7937 .kr(1)
7938 .sr(1)
7939 .m(1)
7940 .n(8)
7941 .k(2)
7942 .cn_stride(11)
7943 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7944 }
7945
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_eq_2_subtile)7946 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile) {
7947 TEST_REQUIRES_ARM_NEON;
7948 for (uint32_t n = 1; n <= 8; n++) {
7949 for (uint32_t m = 1; m <= 1; m++) {
7950 GemmMicrokernelTester()
7951 .mr(1)
7952 .nr(8)
7953 .kr(1)
7954 .sr(1)
7955 .m(m)
7956 .n(n)
7957 .k(2)
7958 .iterations(1)
7959 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7960 }
7961 }
7962 }
7963
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_eq_2_subtile_m)7964 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
7965 TEST_REQUIRES_ARM_NEON;
7966 for (uint32_t m = 1; m <= 1; m++) {
7967 GemmMicrokernelTester()
7968 .mr(1)
7969 .nr(8)
7970 .kr(1)
7971 .sr(1)
7972 .m(m)
7973 .n(8)
7974 .k(2)
7975 .iterations(1)
7976 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7977 }
7978 }
7979
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_eq_2_subtile_n)7980 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
7981 TEST_REQUIRES_ARM_NEON;
7982 for (uint32_t n = 1; n <= 8; n++) {
7983 GemmMicrokernelTester()
7984 .mr(1)
7985 .nr(8)
7986 .kr(1)
7987 .sr(1)
7988 .m(1)
7989 .n(n)
7990 .k(2)
7991 .iterations(1)
7992 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7993 }
7994 }
7995
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_lt_2)7996 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_lt_2) {
7997 TEST_REQUIRES_ARM_NEON;
7998 for (size_t k = 1; k < 2; k++) {
7999 GemmMicrokernelTester()
8000 .mr(1)
8001 .nr(8)
8002 .kr(1)
8003 .sr(1)
8004 .m(1)
8005 .n(8)
8006 .k(k)
8007 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8008 }
8009 }
8010
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_lt_2_subtile)8011 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_subtile) {
8012 TEST_REQUIRES_ARM_NEON;
8013 for (size_t k = 1; k < 2; k++) {
8014 for (uint32_t n = 1; n <= 8; n++) {
8015 for (uint32_t m = 1; m <= 1; m++) {
8016 GemmMicrokernelTester()
8017 .mr(1)
8018 .nr(8)
8019 .kr(1)
8020 .sr(1)
8021 .m(m)
8022 .n(n)
8023 .k(k)
8024 .iterations(1)
8025 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8026 }
8027 }
8028 }
8029 }
8030
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_gt_2)8031 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_gt_2) {
8032 TEST_REQUIRES_ARM_NEON;
8033 for (size_t k = 3; k < 4; k++) {
8034 GemmMicrokernelTester()
8035 .mr(1)
8036 .nr(8)
8037 .kr(1)
8038 .sr(1)
8039 .m(1)
8040 .n(8)
8041 .k(k)
8042 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8043 }
8044 }
8045
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_gt_2_subtile)8046 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_subtile) {
8047 TEST_REQUIRES_ARM_NEON;
8048 for (size_t k = 3; k < 4; k++) {
8049 for (uint32_t n = 1; n <= 8; n++) {
8050 for (uint32_t m = 1; m <= 1; m++) {
8051 GemmMicrokernelTester()
8052 .mr(1)
8053 .nr(8)
8054 .kr(1)
8055 .sr(1)
8056 .m(m)
8057 .n(n)
8058 .k(k)
8059 .iterations(1)
8060 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8061 }
8062 }
8063 }
8064 }
8065
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_div_2)8066 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_div_2) {
8067 TEST_REQUIRES_ARM_NEON;
8068 for (size_t k = 4; k <= 20; k += 2) {
8069 GemmMicrokernelTester()
8070 .mr(1)
8071 .nr(8)
8072 .kr(1)
8073 .sr(1)
8074 .m(1)
8075 .n(8)
8076 .k(k)
8077 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8078 }
8079 }
8080
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,k_div_2_subtile)8081 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_div_2_subtile) {
8082 TEST_REQUIRES_ARM_NEON;
8083 for (size_t k = 4; k <= 20; k += 2) {
8084 for (uint32_t n = 1; n <= 8; n++) {
8085 for (uint32_t m = 1; m <= 1; m++) {
8086 GemmMicrokernelTester()
8087 .mr(1)
8088 .nr(8)
8089 .kr(1)
8090 .sr(1)
8091 .m(m)
8092 .n(n)
8093 .k(k)
8094 .iterations(1)
8095 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8096 }
8097 }
8098 }
8099 }
8100
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_gt_8)8101 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8) {
8102 TEST_REQUIRES_ARM_NEON;
8103 for (uint32_t n = 9; n < 16; n++) {
8104 for (size_t k = 1; k <= 10; k += 3) {
8105 GemmMicrokernelTester()
8106 .mr(1)
8107 .nr(8)
8108 .kr(1)
8109 .sr(1)
8110 .m(1)
8111 .n(n)
8112 .k(k)
8113 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8114 }
8115 }
8116 }
8117
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_gt_8_strided_cn)8118 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
8119 TEST_REQUIRES_ARM_NEON;
8120 for (uint32_t n = 9; n < 16; n++) {
8121 for (size_t k = 1; k <= 10; k += 3) {
8122 GemmMicrokernelTester()
8123 .mr(1)
8124 .nr(8)
8125 .kr(1)
8126 .sr(1)
8127 .m(1)
8128 .n(n)
8129 .k(k)
8130 .cn_stride(11)
8131 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8132 }
8133 }
8134 }
8135
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_gt_8_subtile)8136 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_subtile) {
8137 TEST_REQUIRES_ARM_NEON;
8138 for (uint32_t n = 9; n < 16; n++) {
8139 for (size_t k = 1; k <= 10; k += 3) {
8140 for (uint32_t m = 1; m <= 1; m++) {
8141 GemmMicrokernelTester()
8142 .mr(1)
8143 .nr(8)
8144 .kr(1)
8145 .sr(1)
8146 .m(m)
8147 .n(n)
8148 .k(k)
8149 .iterations(1)
8150 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8151 }
8152 }
8153 }
8154 }
8155
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_div_8)8156 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8) {
8157 TEST_REQUIRES_ARM_NEON;
8158 for (uint32_t n = 16; n <= 24; n += 8) {
8159 for (size_t k = 1; k <= 10; k += 3) {
8160 GemmMicrokernelTester()
8161 .mr(1)
8162 .nr(8)
8163 .kr(1)
8164 .sr(1)
8165 .m(1)
8166 .n(n)
8167 .k(k)
8168 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8169 }
8170 }
8171 }
8172
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_div_8_strided_cn)8173 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_cn) {
8174 TEST_REQUIRES_ARM_NEON;
8175 for (uint32_t n = 16; n <= 24; n += 8) {
8176 for (size_t k = 1; k <= 10; k += 3) {
8177 GemmMicrokernelTester()
8178 .mr(1)
8179 .nr(8)
8180 .kr(1)
8181 .sr(1)
8182 .m(1)
8183 .n(n)
8184 .k(k)
8185 .cn_stride(11)
8186 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8187 }
8188 }
8189 }
8190
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_div_8_subtile)8191 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_subtile) {
8192 TEST_REQUIRES_ARM_NEON;
8193 for (uint32_t n = 16; n <= 24; n += 8) {
8194 for (size_t k = 1; k <= 10; k += 3) {
8195 for (uint32_t m = 1; m <= 1; m++) {
8196 GemmMicrokernelTester()
8197 .mr(1)
8198 .nr(8)
8199 .kr(1)
8200 .sr(1)
8201 .m(m)
8202 .n(n)
8203 .k(k)
8204 .iterations(1)
8205 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8206 }
8207 }
8208 }
8209 }
8210
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,small_kernel)8211 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, small_kernel) {
8212 TEST_REQUIRES_ARM_NEON;
8213 for (size_t k = 1; k <= 10; k += 3) {
8214 GemmMicrokernelTester()
8215 .mr(1)
8216 .nr(8)
8217 .kr(1)
8218 .sr(1)
8219 .m(1)
8220 .n(8)
8221 .k(k)
8222 .ks(3)
8223 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8224 }
8225 }
8226
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,small_kernel_subtile)8227 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, small_kernel_subtile) {
8228 TEST_REQUIRES_ARM_NEON;
8229 for (size_t k = 1; k <= 10; k += 3) {
8230 for (uint32_t n = 1; n <= 8; n++) {
8231 for (uint32_t m = 1; m <= 1; m++) {
8232 GemmMicrokernelTester()
8233 .mr(1)
8234 .nr(8)
8235 .kr(1)
8236 .sr(1)
8237 .m(m)
8238 .n(n)
8239 .k(k)
8240 .ks(3)
8241 .iterations(1)
8242 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8243 }
8244 }
8245 }
8246 }
8247
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_gt_8_small_kernel)8248 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_small_kernel) {
8249 TEST_REQUIRES_ARM_NEON;
8250 for (uint32_t n = 9; n < 16; n++) {
8251 for (size_t k = 1; k <= 10; k += 3) {
8252 GemmMicrokernelTester()
8253 .mr(1)
8254 .nr(8)
8255 .kr(1)
8256 .sr(1)
8257 .m(1)
8258 .n(n)
8259 .k(k)
8260 .ks(3)
8261 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8262 }
8263 }
8264 }
8265
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,n_div_8_small_kernel)8266 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_small_kernel) {
8267 TEST_REQUIRES_ARM_NEON;
8268 for (uint32_t n = 16; n <= 24; n += 8) {
8269 for (size_t k = 1; k <= 10; k += 3) {
8270 GemmMicrokernelTester()
8271 .mr(1)
8272 .nr(8)
8273 .kr(1)
8274 .sr(1)
8275 .m(1)
8276 .n(n)
8277 .k(k)
8278 .ks(3)
8279 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8280 }
8281 }
8282 }
8283
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,strided_cm_subtile)8284 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cm_subtile) {
8285 TEST_REQUIRES_ARM_NEON;
8286 for (size_t k = 1; k <= 10; k += 3) {
8287 for (uint32_t n = 1; n <= 8; n++) {
8288 for (uint32_t m = 1; m <= 1; m++) {
8289 GemmMicrokernelTester()
8290 .mr(1)
8291 .nr(8)
8292 .kr(1)
8293 .sr(1)
8294 .m(m)
8295 .n(n)
8296 .k(k)
8297 .cm_stride(11)
8298 .iterations(1)
8299 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8300 }
8301 }
8302 }
8303 }
8304
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,a_offset)8305 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, a_offset) {
8306 TEST_REQUIRES_ARM_NEON;
8307 for (size_t k = 1; k <= 10; k += 3) {
8308 GemmMicrokernelTester()
8309 .mr(1)
8310 .nr(8)
8311 .kr(1)
8312 .sr(1)
8313 .m(1)
8314 .n(8)
8315 .k(k)
8316 .ks(3)
8317 .a_offset(13)
8318 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8319 }
8320 }
8321
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,zero)8322 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, zero) {
8323 TEST_REQUIRES_ARM_NEON;
8324 for (size_t k = 1; k <= 10; k += 3) {
8325 for (uint32_t mz = 0; mz < 1; mz++) {
8326 GemmMicrokernelTester()
8327 .mr(1)
8328 .nr(8)
8329 .kr(1)
8330 .sr(1)
8331 .m(1)
8332 .n(8)
8333 .k(k)
8334 .ks(3)
8335 .a_offset(13)
8336 .zero_index(mz)
8337 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8338 }
8339 }
8340 }
8341
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,qmin)8342 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, qmin) {
8343 TEST_REQUIRES_ARM_NEON;
8344 GemmMicrokernelTester()
8345 .mr(1)
8346 .nr(8)
8347 .kr(1)
8348 .sr(1)
8349 .m(1)
8350 .n(8)
8351 .k(2)
8352 .qmin(128)
8353 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8354 }
8355
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,qmax)8356 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, qmax) {
8357 TEST_REQUIRES_ARM_NEON;
8358 GemmMicrokernelTester()
8359 .mr(1)
8360 .nr(8)
8361 .kr(1)
8362 .sr(1)
8363 .m(1)
8364 .n(8)
8365 .k(2)
8366 .qmax(128)
8367 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8368 }
8369
TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64,strided_cm)8370 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cm) {
8371 TEST_REQUIRES_ARM_NEON;
8372 GemmMicrokernelTester()
8373 .mr(1)
8374 .nr(8)
8375 .kr(1)
8376 .sr(1)
8377 .m(1)
8378 .n(8)
8379 .k(2)
8380 .cm_stride(11)
8381 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8382 }
8383 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8384
8385
8386 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2)8387 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2) {
8388 TEST_REQUIRES_ARM_NEON_FMA;
8389 GemmMicrokernelTester()
8390 .mr(1)
8391 .nr(8)
8392 .kr(1)
8393 .sr(1)
8394 .m(1)
8395 .n(8)
8396 .k(2)
8397 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8398 }
8399
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,strided_cn)8400 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cn) {
8401 TEST_REQUIRES_ARM_NEON_FMA;
8402 GemmMicrokernelTester()
8403 .mr(1)
8404 .nr(8)
8405 .kr(1)
8406 .sr(1)
8407 .m(1)
8408 .n(8)
8409 .k(2)
8410 .cn_stride(11)
8411 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8412 }
8413
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2_subtile)8414 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
8415 TEST_REQUIRES_ARM_NEON_FMA;
8416 for (uint32_t n = 1; n <= 8; n++) {
8417 for (uint32_t m = 1; m <= 1; m++) {
8418 GemmMicrokernelTester()
8419 .mr(1)
8420 .nr(8)
8421 .kr(1)
8422 .sr(1)
8423 .m(m)
8424 .n(n)
8425 .k(2)
8426 .iterations(1)
8427 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8428 }
8429 }
8430 }
8431
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2_subtile_m)8432 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
8433 TEST_REQUIRES_ARM_NEON_FMA;
8434 for (uint32_t m = 1; m <= 1; m++) {
8435 GemmMicrokernelTester()
8436 .mr(1)
8437 .nr(8)
8438 .kr(1)
8439 .sr(1)
8440 .m(m)
8441 .n(8)
8442 .k(2)
8443 .iterations(1)
8444 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8445 }
8446 }
8447
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2_subtile_n)8448 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
8449 TEST_REQUIRES_ARM_NEON_FMA;
8450 for (uint32_t n = 1; n <= 8; n++) {
8451 GemmMicrokernelTester()
8452 .mr(1)
8453 .nr(8)
8454 .kr(1)
8455 .sr(1)
8456 .m(1)
8457 .n(n)
8458 .k(2)
8459 .iterations(1)
8460 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8461 }
8462 }
8463
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_lt_2)8464 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2) {
8465 TEST_REQUIRES_ARM_NEON_FMA;
8466 for (size_t k = 1; k < 2; k++) {
8467 GemmMicrokernelTester()
8468 .mr(1)
8469 .nr(8)
8470 .kr(1)
8471 .sr(1)
8472 .m(1)
8473 .n(8)
8474 .k(k)
8475 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8476 }
8477 }
8478
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_lt_2_subtile)8479 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
8480 TEST_REQUIRES_ARM_NEON_FMA;
8481 for (size_t k = 1; k < 2; k++) {
8482 for (uint32_t n = 1; n <= 8; n++) {
8483 for (uint32_t m = 1; m <= 1; m++) {
8484 GemmMicrokernelTester()
8485 .mr(1)
8486 .nr(8)
8487 .kr(1)
8488 .sr(1)
8489 .m(m)
8490 .n(n)
8491 .k(k)
8492 .iterations(1)
8493 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8494 }
8495 }
8496 }
8497 }
8498
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_gt_2)8499 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2) {
8500 TEST_REQUIRES_ARM_NEON_FMA;
8501 for (size_t k = 3; k < 4; k++) {
8502 GemmMicrokernelTester()
8503 .mr(1)
8504 .nr(8)
8505 .kr(1)
8506 .sr(1)
8507 .m(1)
8508 .n(8)
8509 .k(k)
8510 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8511 }
8512 }
8513
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_gt_2_subtile)8514 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
8515 TEST_REQUIRES_ARM_NEON_FMA;
8516 for (size_t k = 3; k < 4; k++) {
8517 for (uint32_t n = 1; n <= 8; n++) {
8518 for (uint32_t m = 1; m <= 1; m++) {
8519 GemmMicrokernelTester()
8520 .mr(1)
8521 .nr(8)
8522 .kr(1)
8523 .sr(1)
8524 .m(m)
8525 .n(n)
8526 .k(k)
8527 .iterations(1)
8528 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8529 }
8530 }
8531 }
8532 }
8533
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_div_2)8534 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2) {
8535 TEST_REQUIRES_ARM_NEON_FMA;
8536 for (size_t k = 4; k <= 20; k += 2) {
8537 GemmMicrokernelTester()
8538 .mr(1)
8539 .nr(8)
8540 .kr(1)
8541 .sr(1)
8542 .m(1)
8543 .n(8)
8544 .k(k)
8545 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8546 }
8547 }
8548
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,k_div_2_subtile)8549 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
8550 TEST_REQUIRES_ARM_NEON_FMA;
8551 for (size_t k = 4; k <= 20; k += 2) {
8552 for (uint32_t n = 1; n <= 8; n++) {
8553 for (uint32_t m = 1; m <= 1; m++) {
8554 GemmMicrokernelTester()
8555 .mr(1)
8556 .nr(8)
8557 .kr(1)
8558 .sr(1)
8559 .m(m)
8560 .n(n)
8561 .k(k)
8562 .iterations(1)
8563 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8564 }
8565 }
8566 }
8567 }
8568
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8)8569 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8) {
8570 TEST_REQUIRES_ARM_NEON_FMA;
8571 for (uint32_t n = 9; n < 16; n++) {
8572 for (size_t k = 1; k <= 10; k += 3) {
8573 GemmMicrokernelTester()
8574 .mr(1)
8575 .nr(8)
8576 .kr(1)
8577 .sr(1)
8578 .m(1)
8579 .n(n)
8580 .k(k)
8581 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8582 }
8583 }
8584 }
8585
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8_strided_cn)8586 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
8587 TEST_REQUIRES_ARM_NEON_FMA;
8588 for (uint32_t n = 9; n < 16; n++) {
8589 for (size_t k = 1; k <= 10; k += 3) {
8590 GemmMicrokernelTester()
8591 .mr(1)
8592 .nr(8)
8593 .kr(1)
8594 .sr(1)
8595 .m(1)
8596 .n(n)
8597 .k(k)
8598 .cn_stride(11)
8599 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8600 }
8601 }
8602 }
8603
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8_subtile)8604 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
8605 TEST_REQUIRES_ARM_NEON_FMA;
8606 for (uint32_t n = 9; n < 16; n++) {
8607 for (size_t k = 1; k <= 10; k += 3) {
8608 for (uint32_t m = 1; m <= 1; m++) {
8609 GemmMicrokernelTester()
8610 .mr(1)
8611 .nr(8)
8612 .kr(1)
8613 .sr(1)
8614 .m(m)
8615 .n(n)
8616 .k(k)
8617 .iterations(1)
8618 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8619 }
8620 }
8621 }
8622 }
8623
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8)8624 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8) {
8625 TEST_REQUIRES_ARM_NEON_FMA;
8626 for (uint32_t n = 16; n <= 24; n += 8) {
8627 for (size_t k = 1; k <= 10; k += 3) {
8628 GemmMicrokernelTester()
8629 .mr(1)
8630 .nr(8)
8631 .kr(1)
8632 .sr(1)
8633 .m(1)
8634 .n(n)
8635 .k(k)
8636 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8637 }
8638 }
8639 }
8640
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8_strided_cn)8641 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
8642 TEST_REQUIRES_ARM_NEON_FMA;
8643 for (uint32_t n = 16; n <= 24; n += 8) {
8644 for (size_t k = 1; k <= 10; k += 3) {
8645 GemmMicrokernelTester()
8646 .mr(1)
8647 .nr(8)
8648 .kr(1)
8649 .sr(1)
8650 .m(1)
8651 .n(n)
8652 .k(k)
8653 .cn_stride(11)
8654 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8655 }
8656 }
8657 }
8658
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8_subtile)8659 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
8660 TEST_REQUIRES_ARM_NEON_FMA;
8661 for (uint32_t n = 16; n <= 24; n += 8) {
8662 for (size_t k = 1; k <= 10; k += 3) {
8663 for (uint32_t m = 1; m <= 1; m++) {
8664 GemmMicrokernelTester()
8665 .mr(1)
8666 .nr(8)
8667 .kr(1)
8668 .sr(1)
8669 .m(m)
8670 .n(n)
8671 .k(k)
8672 .iterations(1)
8673 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8674 }
8675 }
8676 }
8677 }
8678
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,small_kernel)8679 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, small_kernel) {
8680 TEST_REQUIRES_ARM_NEON_FMA;
8681 for (size_t k = 1; k <= 10; k += 3) {
8682 GemmMicrokernelTester()
8683 .mr(1)
8684 .nr(8)
8685 .kr(1)
8686 .sr(1)
8687 .m(1)
8688 .n(8)
8689 .k(k)
8690 .ks(3)
8691 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8692 }
8693 }
8694
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,small_kernel_subtile)8695 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, small_kernel_subtile) {
8696 TEST_REQUIRES_ARM_NEON_FMA;
8697 for (size_t k = 1; k <= 10; k += 3) {
8698 for (uint32_t n = 1; n <= 8; n++) {
8699 for (uint32_t m = 1; m <= 1; m++) {
8700 GemmMicrokernelTester()
8701 .mr(1)
8702 .nr(8)
8703 .kr(1)
8704 .sr(1)
8705 .m(m)
8706 .n(n)
8707 .k(k)
8708 .ks(3)
8709 .iterations(1)
8710 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8711 }
8712 }
8713 }
8714 }
8715
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8_small_kernel)8716 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_small_kernel) {
8717 TEST_REQUIRES_ARM_NEON_FMA;
8718 for (uint32_t n = 9; n < 16; n++) {
8719 for (size_t k = 1; k <= 10; k += 3) {
8720 GemmMicrokernelTester()
8721 .mr(1)
8722 .nr(8)
8723 .kr(1)
8724 .sr(1)
8725 .m(1)
8726 .n(n)
8727 .k(k)
8728 .ks(3)
8729 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8730 }
8731 }
8732 }
8733
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8_small_kernel)8734 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_small_kernel) {
8735 TEST_REQUIRES_ARM_NEON_FMA;
8736 for (uint32_t n = 16; n <= 24; n += 8) {
8737 for (size_t k = 1; k <= 10; k += 3) {
8738 GemmMicrokernelTester()
8739 .mr(1)
8740 .nr(8)
8741 .kr(1)
8742 .sr(1)
8743 .m(1)
8744 .n(n)
8745 .k(k)
8746 .ks(3)
8747 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8748 }
8749 }
8750 }
8751
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,strided_cm_subtile)8752 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
8753 TEST_REQUIRES_ARM_NEON_FMA;
8754 for (size_t k = 1; k <= 10; k += 3) {
8755 for (uint32_t n = 1; n <= 8; n++) {
8756 for (uint32_t m = 1; m <= 1; m++) {
8757 GemmMicrokernelTester()
8758 .mr(1)
8759 .nr(8)
8760 .kr(1)
8761 .sr(1)
8762 .m(m)
8763 .n(n)
8764 .k(k)
8765 .cm_stride(11)
8766 .iterations(1)
8767 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8768 }
8769 }
8770 }
8771 }
8772
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,a_offset)8773 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, a_offset) {
8774 TEST_REQUIRES_ARM_NEON_FMA;
8775 for (size_t k = 1; k <= 10; k += 3) {
8776 GemmMicrokernelTester()
8777 .mr(1)
8778 .nr(8)
8779 .kr(1)
8780 .sr(1)
8781 .m(1)
8782 .n(8)
8783 .k(k)
8784 .ks(3)
8785 .a_offset(13)
8786 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8787 }
8788 }
8789
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,zero)8790 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, zero) {
8791 TEST_REQUIRES_ARM_NEON_FMA;
8792 for (size_t k = 1; k <= 10; k += 3) {
8793 for (uint32_t mz = 0; mz < 1; mz++) {
8794 GemmMicrokernelTester()
8795 .mr(1)
8796 .nr(8)
8797 .kr(1)
8798 .sr(1)
8799 .m(1)
8800 .n(8)
8801 .k(k)
8802 .ks(3)
8803 .a_offset(13)
8804 .zero_index(mz)
8805 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8806 }
8807 }
8808 }
8809
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,qmin)8810 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, qmin) {
8811 TEST_REQUIRES_ARM_NEON_FMA;
8812 GemmMicrokernelTester()
8813 .mr(1)
8814 .nr(8)
8815 .kr(1)
8816 .sr(1)
8817 .m(1)
8818 .n(8)
8819 .k(2)
8820 .qmin(128)
8821 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8822 }
8823
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,qmax)8824 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, qmax) {
8825 TEST_REQUIRES_ARM_NEON_FMA;
8826 GemmMicrokernelTester()
8827 .mr(1)
8828 .nr(8)
8829 .kr(1)
8830 .sr(1)
8831 .m(1)
8832 .n(8)
8833 .k(2)
8834 .qmax(128)
8835 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8836 }
8837
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64,strided_cm)8838 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm) {
8839 TEST_REQUIRES_ARM_NEON_FMA;
8840 GemmMicrokernelTester()
8841 .mr(1)
8842 .nr(8)
8843 .kr(1)
8844 .sr(1)
8845 .m(1)
8846 .n(8)
8847 .k(2)
8848 .cm_stride(11)
8849 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8850 }
8851 #endif // XNN_ARCH_ARM64
8852
8853
8854 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_eq_4)8855 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4) {
8856 TEST_REQUIRES_ARM_NEON;
8857 GemmMicrokernelTester()
8858 .mr(1)
8859 .nr(8)
8860 .kr(1)
8861 .sr(4)
8862 .m(1)
8863 .n(8)
8864 .k(4)
8865 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8866 }
8867
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,strided_cn)8868 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, strided_cn) {
8869 TEST_REQUIRES_ARM_NEON;
8870 GemmMicrokernelTester()
8871 .mr(1)
8872 .nr(8)
8873 .kr(1)
8874 .sr(4)
8875 .m(1)
8876 .n(8)
8877 .k(4)
8878 .cn_stride(11)
8879 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8880 }
8881
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_eq_4_subtile)8882 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile) {
8883 TEST_REQUIRES_ARM_NEON;
8884 for (uint32_t n = 1; n <= 8; n++) {
8885 for (uint32_t m = 1; m <= 1; m++) {
8886 GemmMicrokernelTester()
8887 .mr(1)
8888 .nr(8)
8889 .kr(1)
8890 .sr(4)
8891 .m(m)
8892 .n(n)
8893 .k(4)
8894 .iterations(1)
8895 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8896 }
8897 }
8898 }
8899
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_eq_4_subtile_m)8900 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile_m) {
8901 TEST_REQUIRES_ARM_NEON;
8902 for (uint32_t m = 1; m <= 1; m++) {
8903 GemmMicrokernelTester()
8904 .mr(1)
8905 .nr(8)
8906 .kr(1)
8907 .sr(4)
8908 .m(m)
8909 .n(8)
8910 .k(4)
8911 .iterations(1)
8912 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8913 }
8914 }
8915
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_eq_4_subtile_n)8916 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile_n) {
8917 TEST_REQUIRES_ARM_NEON;
8918 for (uint32_t n = 1; n <= 8; n++) {
8919 GemmMicrokernelTester()
8920 .mr(1)
8921 .nr(8)
8922 .kr(1)
8923 .sr(4)
8924 .m(1)
8925 .n(n)
8926 .k(4)
8927 .iterations(1)
8928 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8929 }
8930 }
8931
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_lt_4)8932 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_lt_4) {
8933 TEST_REQUIRES_ARM_NEON;
8934 for (size_t k = 1; k < 4; k++) {
8935 GemmMicrokernelTester()
8936 .mr(1)
8937 .nr(8)
8938 .kr(1)
8939 .sr(4)
8940 .m(1)
8941 .n(8)
8942 .k(k)
8943 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8944 }
8945 }
8946
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_lt_4_subtile)8947 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_lt_4_subtile) {
8948 TEST_REQUIRES_ARM_NEON;
8949 for (size_t k = 1; k < 4; k++) {
8950 for (uint32_t n = 1; n <= 8; n++) {
8951 for (uint32_t m = 1; m <= 1; m++) {
8952 GemmMicrokernelTester()
8953 .mr(1)
8954 .nr(8)
8955 .kr(1)
8956 .sr(4)
8957 .m(m)
8958 .n(n)
8959 .k(k)
8960 .iterations(1)
8961 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8962 }
8963 }
8964 }
8965 }
8966
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_gt_4)8967 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_gt_4) {
8968 TEST_REQUIRES_ARM_NEON;
8969 for (size_t k = 5; k < 8; k++) {
8970 GemmMicrokernelTester()
8971 .mr(1)
8972 .nr(8)
8973 .kr(1)
8974 .sr(4)
8975 .m(1)
8976 .n(8)
8977 .k(k)
8978 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8979 }
8980 }
8981
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_gt_4_subtile)8982 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_gt_4_subtile) {
8983 TEST_REQUIRES_ARM_NEON;
8984 for (size_t k = 5; k < 8; k++) {
8985 for (uint32_t n = 1; n <= 8; n++) {
8986 for (uint32_t m = 1; m <= 1; m++) {
8987 GemmMicrokernelTester()
8988 .mr(1)
8989 .nr(8)
8990 .kr(1)
8991 .sr(4)
8992 .m(m)
8993 .n(n)
8994 .k(k)
8995 .iterations(1)
8996 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
8997 }
8998 }
8999 }
9000 }
9001
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_div_4)9002 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_div_4) {
9003 TEST_REQUIRES_ARM_NEON;
9004 for (size_t k = 8; k <= 40; k += 4) {
9005 GemmMicrokernelTester()
9006 .mr(1)
9007 .nr(8)
9008 .kr(1)
9009 .sr(4)
9010 .m(1)
9011 .n(8)
9012 .k(k)
9013 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9014 }
9015 }
9016
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,k_div_4_subtile)9017 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_div_4_subtile) {
9018 TEST_REQUIRES_ARM_NEON;
9019 for (size_t k = 8; k <= 40; k += 4) {
9020 for (uint32_t n = 1; n <= 8; n++) {
9021 for (uint32_t m = 1; m <= 1; m++) {
9022 GemmMicrokernelTester()
9023 .mr(1)
9024 .nr(8)
9025 .kr(1)
9026 .sr(4)
9027 .m(m)
9028 .n(n)
9029 .k(k)
9030 .iterations(1)
9031 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9032 }
9033 }
9034 }
9035 }
9036
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_gt_8)9037 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8) {
9038 TEST_REQUIRES_ARM_NEON;
9039 for (uint32_t n = 9; n < 16; n++) {
9040 for (size_t k = 1; k <= 20; k += 5) {
9041 GemmMicrokernelTester()
9042 .mr(1)
9043 .nr(8)
9044 .kr(1)
9045 .sr(4)
9046 .m(1)
9047 .n(n)
9048 .k(k)
9049 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9050 }
9051 }
9052 }
9053
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_gt_8_strided_cn)9054 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8_strided_cn) {
9055 TEST_REQUIRES_ARM_NEON;
9056 for (uint32_t n = 9; n < 16; n++) {
9057 for (size_t k = 1; k <= 20; k += 5) {
9058 GemmMicrokernelTester()
9059 .mr(1)
9060 .nr(8)
9061 .kr(1)
9062 .sr(4)
9063 .m(1)
9064 .n(n)
9065 .k(k)
9066 .cn_stride(11)
9067 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9068 }
9069 }
9070 }
9071
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_gt_8_subtile)9072 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8_subtile) {
9073 TEST_REQUIRES_ARM_NEON;
9074 for (uint32_t n = 9; n < 16; n++) {
9075 for (size_t k = 1; k <= 20; k += 5) {
9076 for (uint32_t m = 1; m <= 1; m++) {
9077 GemmMicrokernelTester()
9078 .mr(1)
9079 .nr(8)
9080 .kr(1)
9081 .sr(4)
9082 .m(m)
9083 .n(n)
9084 .k(k)
9085 .iterations(1)
9086 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9087 }
9088 }
9089 }
9090 }
9091
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_div_8)9092 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8) {
9093 TEST_REQUIRES_ARM_NEON;
9094 for (uint32_t n = 16; n <= 24; n += 8) {
9095 for (size_t k = 1; k <= 20; k += 5) {
9096 GemmMicrokernelTester()
9097 .mr(1)
9098 .nr(8)
9099 .kr(1)
9100 .sr(4)
9101 .m(1)
9102 .n(n)
9103 .k(k)
9104 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9105 }
9106 }
9107 }
9108
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_div_8_strided_cn)9109 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8_strided_cn) {
9110 TEST_REQUIRES_ARM_NEON;
9111 for (uint32_t n = 16; n <= 24; n += 8) {
9112 for (size_t k = 1; k <= 20; k += 5) {
9113 GemmMicrokernelTester()
9114 .mr(1)
9115 .nr(8)
9116 .kr(1)
9117 .sr(4)
9118 .m(1)
9119 .n(n)
9120 .k(k)
9121 .cn_stride(11)
9122 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9123 }
9124 }
9125 }
9126
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_div_8_subtile)9127 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8_subtile) {
9128 TEST_REQUIRES_ARM_NEON;
9129 for (uint32_t n = 16; n <= 24; n += 8) {
9130 for (size_t k = 1; k <= 20; k += 5) {
9131 for (uint32_t m = 1; m <= 1; m++) {
9132 GemmMicrokernelTester()
9133 .mr(1)
9134 .nr(8)
9135 .kr(1)
9136 .sr(4)
9137 .m(m)
9138 .n(n)
9139 .k(k)
9140 .iterations(1)
9141 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9142 }
9143 }
9144 }
9145 }
9146
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,small_kernel)9147 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, small_kernel) {
9148 TEST_REQUIRES_ARM_NEON;
9149 for (size_t k = 1; k <= 20; k += 5) {
9150 GemmMicrokernelTester()
9151 .mr(1)
9152 .nr(8)
9153 .kr(1)
9154 .sr(4)
9155 .m(1)
9156 .n(8)
9157 .k(k)
9158 .ks(3)
9159 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9160 }
9161 }
9162
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,small_kernel_subtile)9163 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, small_kernel_subtile) {
9164 TEST_REQUIRES_ARM_NEON;
9165 for (size_t k = 1; k <= 20; k += 5) {
9166 for (uint32_t n = 1; n <= 8; n++) {
9167 for (uint32_t m = 1; m <= 1; m++) {
9168 GemmMicrokernelTester()
9169 .mr(1)
9170 .nr(8)
9171 .kr(1)
9172 .sr(4)
9173 .m(m)
9174 .n(n)
9175 .k(k)
9176 .ks(3)
9177 .iterations(1)
9178 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9179 }
9180 }
9181 }
9182 }
9183
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_gt_8_small_kernel)9184 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8_small_kernel) {
9185 TEST_REQUIRES_ARM_NEON;
9186 for (uint32_t n = 9; n < 16; n++) {
9187 for (size_t k = 1; k <= 20; k += 5) {
9188 GemmMicrokernelTester()
9189 .mr(1)
9190 .nr(8)
9191 .kr(1)
9192 .sr(4)
9193 .m(1)
9194 .n(n)
9195 .k(k)
9196 .ks(3)
9197 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9198 }
9199 }
9200 }
9201
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,n_div_8_small_kernel)9202 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8_small_kernel) {
9203 TEST_REQUIRES_ARM_NEON;
9204 for (uint32_t n = 16; n <= 24; n += 8) {
9205 for (size_t k = 1; k <= 20; k += 5) {
9206 GemmMicrokernelTester()
9207 .mr(1)
9208 .nr(8)
9209 .kr(1)
9210 .sr(4)
9211 .m(1)
9212 .n(n)
9213 .k(k)
9214 .ks(3)
9215 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9216 }
9217 }
9218 }
9219
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,strided_cm_subtile)9220 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, strided_cm_subtile) {
9221 TEST_REQUIRES_ARM_NEON;
9222 for (size_t k = 1; k <= 20; k += 5) {
9223 for (uint32_t n = 1; n <= 8; n++) {
9224 for (uint32_t m = 1; m <= 1; m++) {
9225 GemmMicrokernelTester()
9226 .mr(1)
9227 .nr(8)
9228 .kr(1)
9229 .sr(4)
9230 .m(m)
9231 .n(n)
9232 .k(k)
9233 .cm_stride(11)
9234 .iterations(1)
9235 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9236 }
9237 }
9238 }
9239 }
9240
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,a_offset)9241 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, a_offset) {
9242 TEST_REQUIRES_ARM_NEON;
9243 for (size_t k = 1; k <= 20; k += 5) {
9244 GemmMicrokernelTester()
9245 .mr(1)
9246 .nr(8)
9247 .kr(1)
9248 .sr(4)
9249 .m(1)
9250 .n(8)
9251 .k(k)
9252 .ks(3)
9253 .a_offset(23)
9254 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9255 }
9256 }
9257
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,zero)9258 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, zero) {
9259 TEST_REQUIRES_ARM_NEON;
9260 for (size_t k = 1; k <= 20; k += 5) {
9261 for (uint32_t mz = 0; mz < 1; mz++) {
9262 GemmMicrokernelTester()
9263 .mr(1)
9264 .nr(8)
9265 .kr(1)
9266 .sr(4)
9267 .m(1)
9268 .n(8)
9269 .k(k)
9270 .ks(3)
9271 .a_offset(23)
9272 .zero_index(mz)
9273 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9274 }
9275 }
9276 }
9277
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,qmin)9278 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, qmin) {
9279 TEST_REQUIRES_ARM_NEON;
9280 GemmMicrokernelTester()
9281 .mr(1)
9282 .nr(8)
9283 .kr(1)
9284 .sr(4)
9285 .m(1)
9286 .n(8)
9287 .k(4)
9288 .qmin(128)
9289 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9290 }
9291
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,qmax)9292 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, qmax) {
9293 TEST_REQUIRES_ARM_NEON;
9294 GemmMicrokernelTester()
9295 .mr(1)
9296 .nr(8)
9297 .kr(1)
9298 .sr(4)
9299 .m(1)
9300 .n(8)
9301 .k(4)
9302 .qmax(128)
9303 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9304 }
9305
TEST(F32_IGEMM_MINMAX_1X8S4__NEON,strided_cm)9306 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, strided_cm) {
9307 TEST_REQUIRES_ARM_NEON;
9308 GemmMicrokernelTester()
9309 .mr(1)
9310 .nr(8)
9311 .kr(1)
9312 .sr(4)
9313 .m(1)
9314 .n(8)
9315 .k(4)
9316 .cm_stride(11)
9317 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
9318 }
9319 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9320
9321
9322 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_eq_2)9323 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2) {
9324 TEST_REQUIRES_ARM_NEON_FMA;
9325 GemmMicrokernelTester()
9326 .mr(4)
9327 .nr(2)
9328 .kr(1)
9329 .sr(1)
9330 .m(4)
9331 .n(2)
9332 .k(2)
9333 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9334 }
9335
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,strided_cn)9336 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, strided_cn) {
9337 TEST_REQUIRES_ARM_NEON_FMA;
9338 GemmMicrokernelTester()
9339 .mr(4)
9340 .nr(2)
9341 .kr(1)
9342 .sr(1)
9343 .m(4)
9344 .n(2)
9345 .k(2)
9346 .cn_stride(5)
9347 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9348 }
9349
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_eq_2_subtile)9350 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2_subtile) {
9351 TEST_REQUIRES_ARM_NEON_FMA;
9352 for (uint32_t n = 1; n <= 2; n++) {
9353 for (uint32_t m = 1; m <= 4; m++) {
9354 GemmMicrokernelTester()
9355 .mr(4)
9356 .nr(2)
9357 .kr(1)
9358 .sr(1)
9359 .m(m)
9360 .n(n)
9361 .k(2)
9362 .iterations(1)
9363 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9364 }
9365 }
9366 }
9367
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_eq_2_subtile_m)9368 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
9369 TEST_REQUIRES_ARM_NEON_FMA;
9370 for (uint32_t m = 1; m <= 4; m++) {
9371 GemmMicrokernelTester()
9372 .mr(4)
9373 .nr(2)
9374 .kr(1)
9375 .sr(1)
9376 .m(m)
9377 .n(2)
9378 .k(2)
9379 .iterations(1)
9380 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9381 }
9382 }
9383
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_eq_2_subtile_n)9384 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
9385 TEST_REQUIRES_ARM_NEON_FMA;
9386 for (uint32_t n = 1; n <= 2; n++) {
9387 GemmMicrokernelTester()
9388 .mr(4)
9389 .nr(2)
9390 .kr(1)
9391 .sr(1)
9392 .m(4)
9393 .n(n)
9394 .k(2)
9395 .iterations(1)
9396 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9397 }
9398 }
9399
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_lt_2)9400 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_lt_2) {
9401 TEST_REQUIRES_ARM_NEON_FMA;
9402 for (size_t k = 1; k < 2; k++) {
9403 GemmMicrokernelTester()
9404 .mr(4)
9405 .nr(2)
9406 .kr(1)
9407 .sr(1)
9408 .m(4)
9409 .n(2)
9410 .k(k)
9411 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9412 }
9413 }
9414
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_lt_2_subtile)9415 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_lt_2_subtile) {
9416 TEST_REQUIRES_ARM_NEON_FMA;
9417 for (size_t k = 1; k < 2; k++) {
9418 for (uint32_t n = 1; n <= 2; n++) {
9419 for (uint32_t m = 1; m <= 4; m++) {
9420 GemmMicrokernelTester()
9421 .mr(4)
9422 .nr(2)
9423 .kr(1)
9424 .sr(1)
9425 .m(m)
9426 .n(n)
9427 .k(k)
9428 .iterations(1)
9429 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9430 }
9431 }
9432 }
9433 }
9434
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_gt_2)9435 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_gt_2) {
9436 TEST_REQUIRES_ARM_NEON_FMA;
9437 for (size_t k = 3; k < 4; k++) {
9438 GemmMicrokernelTester()
9439 .mr(4)
9440 .nr(2)
9441 .kr(1)
9442 .sr(1)
9443 .m(4)
9444 .n(2)
9445 .k(k)
9446 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9447 }
9448 }
9449
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_gt_2_subtile)9450 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_gt_2_subtile) {
9451 TEST_REQUIRES_ARM_NEON_FMA;
9452 for (size_t k = 3; k < 4; k++) {
9453 for (uint32_t n = 1; n <= 2; n++) {
9454 for (uint32_t m = 1; m <= 4; m++) {
9455 GemmMicrokernelTester()
9456 .mr(4)
9457 .nr(2)
9458 .kr(1)
9459 .sr(1)
9460 .m(m)
9461 .n(n)
9462 .k(k)
9463 .iterations(1)
9464 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9465 }
9466 }
9467 }
9468 }
9469
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_div_2)9470 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_div_2) {
9471 TEST_REQUIRES_ARM_NEON_FMA;
9472 for (size_t k = 4; k <= 20; k += 2) {
9473 GemmMicrokernelTester()
9474 .mr(4)
9475 .nr(2)
9476 .kr(1)
9477 .sr(1)
9478 .m(4)
9479 .n(2)
9480 .k(k)
9481 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9482 }
9483 }
9484
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,k_div_2_subtile)9485 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_div_2_subtile) {
9486 TEST_REQUIRES_ARM_NEON_FMA;
9487 for (size_t k = 4; k <= 20; k += 2) {
9488 for (uint32_t n = 1; n <= 2; n++) {
9489 for (uint32_t m = 1; m <= 4; m++) {
9490 GemmMicrokernelTester()
9491 .mr(4)
9492 .nr(2)
9493 .kr(1)
9494 .sr(1)
9495 .m(m)
9496 .n(n)
9497 .k(k)
9498 .iterations(1)
9499 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9500 }
9501 }
9502 }
9503 }
9504
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_gt_2)9505 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2) {
9506 TEST_REQUIRES_ARM_NEON_FMA;
9507 for (uint32_t n = 3; n < 4; n++) {
9508 for (size_t k = 1; k <= 10; k += 3) {
9509 GemmMicrokernelTester()
9510 .mr(4)
9511 .nr(2)
9512 .kr(1)
9513 .sr(1)
9514 .m(4)
9515 .n(n)
9516 .k(k)
9517 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9518 }
9519 }
9520 }
9521
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_gt_2_strided_cn)9522 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2_strided_cn) {
9523 TEST_REQUIRES_ARM_NEON_FMA;
9524 for (uint32_t n = 3; n < 4; n++) {
9525 for (size_t k = 1; k <= 10; k += 3) {
9526 GemmMicrokernelTester()
9527 .mr(4)
9528 .nr(2)
9529 .kr(1)
9530 .sr(1)
9531 .m(4)
9532 .n(n)
9533 .k(k)
9534 .cn_stride(5)
9535 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9536 }
9537 }
9538 }
9539
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_gt_2_subtile)9540 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2_subtile) {
9541 TEST_REQUIRES_ARM_NEON_FMA;
9542 for (uint32_t n = 3; n < 4; n++) {
9543 for (size_t k = 1; k <= 10; k += 3) {
9544 for (uint32_t m = 1; m <= 4; m++) {
9545 GemmMicrokernelTester()
9546 .mr(4)
9547 .nr(2)
9548 .kr(1)
9549 .sr(1)
9550 .m(m)
9551 .n(n)
9552 .k(k)
9553 .iterations(1)
9554 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9555 }
9556 }
9557 }
9558 }
9559
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_div_2)9560 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2) {
9561 TEST_REQUIRES_ARM_NEON_FMA;
9562 for (uint32_t n = 4; n <= 6; n += 2) {
9563 for (size_t k = 1; k <= 10; k += 3) {
9564 GemmMicrokernelTester()
9565 .mr(4)
9566 .nr(2)
9567 .kr(1)
9568 .sr(1)
9569 .m(4)
9570 .n(n)
9571 .k(k)
9572 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9573 }
9574 }
9575 }
9576
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_div_2_strided_cn)9577 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2_strided_cn) {
9578 TEST_REQUIRES_ARM_NEON_FMA;
9579 for (uint32_t n = 4; n <= 6; n += 2) {
9580 for (size_t k = 1; k <= 10; k += 3) {
9581 GemmMicrokernelTester()
9582 .mr(4)
9583 .nr(2)
9584 .kr(1)
9585 .sr(1)
9586 .m(4)
9587 .n(n)
9588 .k(k)
9589 .cn_stride(5)
9590 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9591 }
9592 }
9593 }
9594
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_div_2_subtile)9595 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2_subtile) {
9596 TEST_REQUIRES_ARM_NEON_FMA;
9597 for (uint32_t n = 4; n <= 6; n += 2) {
9598 for (size_t k = 1; k <= 10; k += 3) {
9599 for (uint32_t m = 1; m <= 4; m++) {
9600 GemmMicrokernelTester()
9601 .mr(4)
9602 .nr(2)
9603 .kr(1)
9604 .sr(1)
9605 .m(m)
9606 .n(n)
9607 .k(k)
9608 .iterations(1)
9609 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9610 }
9611 }
9612 }
9613 }
9614
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,small_kernel)9615 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, small_kernel) {
9616 TEST_REQUIRES_ARM_NEON_FMA;
9617 for (size_t k = 1; k <= 10; k += 3) {
9618 GemmMicrokernelTester()
9619 .mr(4)
9620 .nr(2)
9621 .kr(1)
9622 .sr(1)
9623 .m(4)
9624 .n(2)
9625 .k(k)
9626 .ks(3)
9627 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9628 }
9629 }
9630
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,small_kernel_subtile)9631 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, small_kernel_subtile) {
9632 TEST_REQUIRES_ARM_NEON_FMA;
9633 for (size_t k = 1; k <= 10; k += 3) {
9634 for (uint32_t n = 1; n <= 2; n++) {
9635 for (uint32_t m = 1; m <= 4; m++) {
9636 GemmMicrokernelTester()
9637 .mr(4)
9638 .nr(2)
9639 .kr(1)
9640 .sr(1)
9641 .m(m)
9642 .n(n)
9643 .k(k)
9644 .ks(3)
9645 .iterations(1)
9646 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9647 }
9648 }
9649 }
9650 }
9651
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_gt_2_small_kernel)9652 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2_small_kernel) {
9653 TEST_REQUIRES_ARM_NEON_FMA;
9654 for (uint32_t n = 3; n < 4; n++) {
9655 for (size_t k = 1; k <= 10; k += 3) {
9656 GemmMicrokernelTester()
9657 .mr(4)
9658 .nr(2)
9659 .kr(1)
9660 .sr(1)
9661 .m(4)
9662 .n(n)
9663 .k(k)
9664 .ks(3)
9665 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9666 }
9667 }
9668 }
9669
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,n_div_2_small_kernel)9670 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2_small_kernel) {
9671 TEST_REQUIRES_ARM_NEON_FMA;
9672 for (uint32_t n = 4; n <= 6; n += 2) {
9673 for (size_t k = 1; k <= 10; k += 3) {
9674 GemmMicrokernelTester()
9675 .mr(4)
9676 .nr(2)
9677 .kr(1)
9678 .sr(1)
9679 .m(4)
9680 .n(n)
9681 .k(k)
9682 .ks(3)
9683 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9684 }
9685 }
9686 }
9687
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,strided_cm_subtile)9688 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, strided_cm_subtile) {
9689 TEST_REQUIRES_ARM_NEON_FMA;
9690 for (size_t k = 1; k <= 10; k += 3) {
9691 for (uint32_t n = 1; n <= 2; n++) {
9692 for (uint32_t m = 1; m <= 4; m++) {
9693 GemmMicrokernelTester()
9694 .mr(4)
9695 .nr(2)
9696 .kr(1)
9697 .sr(1)
9698 .m(m)
9699 .n(n)
9700 .k(k)
9701 .cm_stride(5)
9702 .iterations(1)
9703 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9704 }
9705 }
9706 }
9707 }
9708
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,a_offset)9709 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, a_offset) {
9710 TEST_REQUIRES_ARM_NEON_FMA;
9711 for (size_t k = 1; k <= 10; k += 3) {
9712 GemmMicrokernelTester()
9713 .mr(4)
9714 .nr(2)
9715 .kr(1)
9716 .sr(1)
9717 .m(4)
9718 .n(2)
9719 .k(k)
9720 .ks(3)
9721 .a_offset(43)
9722 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9723 }
9724 }
9725
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,zero)9726 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, zero) {
9727 TEST_REQUIRES_ARM_NEON_FMA;
9728 for (size_t k = 1; k <= 10; k += 3) {
9729 for (uint32_t mz = 0; mz < 4; mz++) {
9730 GemmMicrokernelTester()
9731 .mr(4)
9732 .nr(2)
9733 .kr(1)
9734 .sr(1)
9735 .m(4)
9736 .n(2)
9737 .k(k)
9738 .ks(3)
9739 .a_offset(43)
9740 .zero_index(mz)
9741 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9742 }
9743 }
9744 }
9745
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,qmin)9746 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, qmin) {
9747 TEST_REQUIRES_ARM_NEON_FMA;
9748 GemmMicrokernelTester()
9749 .mr(4)
9750 .nr(2)
9751 .kr(1)
9752 .sr(1)
9753 .m(4)
9754 .n(2)
9755 .k(2)
9756 .qmin(128)
9757 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9758 }
9759
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,qmax)9760 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, qmax) {
9761 TEST_REQUIRES_ARM_NEON_FMA;
9762 GemmMicrokernelTester()
9763 .mr(4)
9764 .nr(2)
9765 .kr(1)
9766 .sr(1)
9767 .m(4)
9768 .n(2)
9769 .k(2)
9770 .qmax(128)
9771 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9772 }
9773
TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64,strided_cm)9774 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, strided_cm) {
9775 TEST_REQUIRES_ARM_NEON_FMA;
9776 GemmMicrokernelTester()
9777 .mr(4)
9778 .nr(2)
9779 .kr(1)
9780 .sr(1)
9781 .m(4)
9782 .n(2)
9783 .k(2)
9784 .cm_stride(5)
9785 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9786 }
9787 #endif // XNN_ARCH_ARM64
9788
9789
9790 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_eq_2)9791 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2) {
9792 TEST_REQUIRES_ARM_NEON;
9793 GemmMicrokernelTester()
9794 .mr(4)
9795 .nr(8)
9796 .kr(1)
9797 .sr(1)
9798 .m(4)
9799 .n(8)
9800 .k(2)
9801 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9802 }
9803
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,strided_cn)9804 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, strided_cn) {
9805 TEST_REQUIRES_ARM_NEON;
9806 GemmMicrokernelTester()
9807 .mr(4)
9808 .nr(8)
9809 .kr(1)
9810 .sr(1)
9811 .m(4)
9812 .n(8)
9813 .k(2)
9814 .cn_stride(11)
9815 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9816 }
9817
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_eq_2_subtile)9818 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile) {
9819 TEST_REQUIRES_ARM_NEON;
9820 for (uint32_t n = 1; n <= 8; n++) {
9821 for (uint32_t m = 1; m <= 4; m++) {
9822 GemmMicrokernelTester()
9823 .mr(4)
9824 .nr(8)
9825 .kr(1)
9826 .sr(1)
9827 .m(m)
9828 .n(n)
9829 .k(2)
9830 .iterations(1)
9831 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9832 }
9833 }
9834 }
9835
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_eq_2_subtile_m)9836 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
9837 TEST_REQUIRES_ARM_NEON;
9838 for (uint32_t m = 1; m <= 4; m++) {
9839 GemmMicrokernelTester()
9840 .mr(4)
9841 .nr(8)
9842 .kr(1)
9843 .sr(1)
9844 .m(m)
9845 .n(8)
9846 .k(2)
9847 .iterations(1)
9848 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9849 }
9850 }
9851
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_eq_2_subtile_n)9852 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
9853 TEST_REQUIRES_ARM_NEON;
9854 for (uint32_t n = 1; n <= 8; n++) {
9855 GemmMicrokernelTester()
9856 .mr(4)
9857 .nr(8)
9858 .kr(1)
9859 .sr(1)
9860 .m(4)
9861 .n(n)
9862 .k(2)
9863 .iterations(1)
9864 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9865 }
9866 }
9867
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_lt_2)9868 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_lt_2) {
9869 TEST_REQUIRES_ARM_NEON;
9870 for (size_t k = 1; k < 2; k++) {
9871 GemmMicrokernelTester()
9872 .mr(4)
9873 .nr(8)
9874 .kr(1)
9875 .sr(1)
9876 .m(4)
9877 .n(8)
9878 .k(k)
9879 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9880 }
9881 }
9882
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_lt_2_subtile)9883 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_lt_2_subtile) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (size_t k = 1; k < 2; k++) {
9886 for (uint32_t n = 1; n <= 8; n++) {
9887 for (uint32_t m = 1; m <= 4; m++) {
9888 GemmMicrokernelTester()
9889 .mr(4)
9890 .nr(8)
9891 .kr(1)
9892 .sr(1)
9893 .m(m)
9894 .n(n)
9895 .k(k)
9896 .iterations(1)
9897 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9898 }
9899 }
9900 }
9901 }
9902
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_gt_2)9903 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_gt_2) {
9904 TEST_REQUIRES_ARM_NEON;
9905 for (size_t k = 3; k < 4; k++) {
9906 GemmMicrokernelTester()
9907 .mr(4)
9908 .nr(8)
9909 .kr(1)
9910 .sr(1)
9911 .m(4)
9912 .n(8)
9913 .k(k)
9914 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9915 }
9916 }
9917
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_gt_2_subtile)9918 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_gt_2_subtile) {
9919 TEST_REQUIRES_ARM_NEON;
9920 for (size_t k = 3; k < 4; k++) {
9921 for (uint32_t n = 1; n <= 8; n++) {
9922 for (uint32_t m = 1; m <= 4; m++) {
9923 GemmMicrokernelTester()
9924 .mr(4)
9925 .nr(8)
9926 .kr(1)
9927 .sr(1)
9928 .m(m)
9929 .n(n)
9930 .k(k)
9931 .iterations(1)
9932 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9933 }
9934 }
9935 }
9936 }
9937
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_div_2)9938 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_div_2) {
9939 TEST_REQUIRES_ARM_NEON;
9940 for (size_t k = 4; k <= 20; k += 2) {
9941 GemmMicrokernelTester()
9942 .mr(4)
9943 .nr(8)
9944 .kr(1)
9945 .sr(1)
9946 .m(4)
9947 .n(8)
9948 .k(k)
9949 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9950 }
9951 }
9952
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,k_div_2_subtile)9953 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_div_2_subtile) {
9954 TEST_REQUIRES_ARM_NEON;
9955 for (size_t k = 4; k <= 20; k += 2) {
9956 for (uint32_t n = 1; n <= 8; n++) {
9957 for (uint32_t m = 1; m <= 4; m++) {
9958 GemmMicrokernelTester()
9959 .mr(4)
9960 .nr(8)
9961 .kr(1)
9962 .sr(1)
9963 .m(m)
9964 .n(n)
9965 .k(k)
9966 .iterations(1)
9967 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9968 }
9969 }
9970 }
9971 }
9972
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_gt_8)9973 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8) {
9974 TEST_REQUIRES_ARM_NEON;
9975 for (uint32_t n = 9; n < 16; n++) {
9976 for (size_t k = 1; k <= 10; k += 3) {
9977 GemmMicrokernelTester()
9978 .mr(4)
9979 .nr(8)
9980 .kr(1)
9981 .sr(1)
9982 .m(4)
9983 .n(n)
9984 .k(k)
9985 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9986 }
9987 }
9988 }
9989
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_gt_8_strided_cn)9990 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
9991 TEST_REQUIRES_ARM_NEON;
9992 for (uint32_t n = 9; n < 16; n++) {
9993 for (size_t k = 1; k <= 10; k += 3) {
9994 GemmMicrokernelTester()
9995 .mr(4)
9996 .nr(8)
9997 .kr(1)
9998 .sr(1)
9999 .m(4)
10000 .n(n)
10001 .k(k)
10002 .cn_stride(11)
10003 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10004 }
10005 }
10006 }
10007
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_gt_8_subtile)10008 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_subtile) {
10009 TEST_REQUIRES_ARM_NEON;
10010 for (uint32_t n = 9; n < 16; n++) {
10011 for (size_t k = 1; k <= 10; k += 3) {
10012 for (uint32_t m = 1; m <= 4; m++) {
10013 GemmMicrokernelTester()
10014 .mr(4)
10015 .nr(8)
10016 .kr(1)
10017 .sr(1)
10018 .m(m)
10019 .n(n)
10020 .k(k)
10021 .iterations(1)
10022 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10023 }
10024 }
10025 }
10026 }
10027
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_div_8)10028 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8) {
10029 TEST_REQUIRES_ARM_NEON;
10030 for (uint32_t n = 16; n <= 24; n += 8) {
10031 for (size_t k = 1; k <= 10; k += 3) {
10032 GemmMicrokernelTester()
10033 .mr(4)
10034 .nr(8)
10035 .kr(1)
10036 .sr(1)
10037 .m(4)
10038 .n(n)
10039 .k(k)
10040 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10041 }
10042 }
10043 }
10044
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_div_8_strided_cn)10045 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8_strided_cn) {
10046 TEST_REQUIRES_ARM_NEON;
10047 for (uint32_t n = 16; n <= 24; n += 8) {
10048 for (size_t k = 1; k <= 10; k += 3) {
10049 GemmMicrokernelTester()
10050 .mr(4)
10051 .nr(8)
10052 .kr(1)
10053 .sr(1)
10054 .m(4)
10055 .n(n)
10056 .k(k)
10057 .cn_stride(11)
10058 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10059 }
10060 }
10061 }
10062
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_div_8_subtile)10063 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8_subtile) {
10064 TEST_REQUIRES_ARM_NEON;
10065 for (uint32_t n = 16; n <= 24; n += 8) {
10066 for (size_t k = 1; k <= 10; k += 3) {
10067 for (uint32_t m = 1; m <= 4; m++) {
10068 GemmMicrokernelTester()
10069 .mr(4)
10070 .nr(8)
10071 .kr(1)
10072 .sr(1)
10073 .m(m)
10074 .n(n)
10075 .k(k)
10076 .iterations(1)
10077 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10078 }
10079 }
10080 }
10081 }
10082
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,small_kernel)10083 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, small_kernel) {
10084 TEST_REQUIRES_ARM_NEON;
10085 for (size_t k = 1; k <= 10; k += 3) {
10086 GemmMicrokernelTester()
10087 .mr(4)
10088 .nr(8)
10089 .kr(1)
10090 .sr(1)
10091 .m(4)
10092 .n(8)
10093 .k(k)
10094 .ks(3)
10095 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10096 }
10097 }
10098
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,small_kernel_subtile)10099 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, small_kernel_subtile) {
10100 TEST_REQUIRES_ARM_NEON;
10101 for (size_t k = 1; k <= 10; k += 3) {
10102 for (uint32_t n = 1; n <= 8; n++) {
10103 for (uint32_t m = 1; m <= 4; m++) {
10104 GemmMicrokernelTester()
10105 .mr(4)
10106 .nr(8)
10107 .kr(1)
10108 .sr(1)
10109 .m(m)
10110 .n(n)
10111 .k(k)
10112 .ks(3)
10113 .iterations(1)
10114 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10115 }
10116 }
10117 }
10118 }
10119
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_gt_8_small_kernel)10120 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_small_kernel) {
10121 TEST_REQUIRES_ARM_NEON;
10122 for (uint32_t n = 9; n < 16; n++) {
10123 for (size_t k = 1; k <= 10; k += 3) {
10124 GemmMicrokernelTester()
10125 .mr(4)
10126 .nr(8)
10127 .kr(1)
10128 .sr(1)
10129 .m(4)
10130 .n(n)
10131 .k(k)
10132 .ks(3)
10133 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10134 }
10135 }
10136 }
10137
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,n_div_8_small_kernel)10138 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8_small_kernel) {
10139 TEST_REQUIRES_ARM_NEON;
10140 for (uint32_t n = 16; n <= 24; n += 8) {
10141 for (size_t k = 1; k <= 10; k += 3) {
10142 GemmMicrokernelTester()
10143 .mr(4)
10144 .nr(8)
10145 .kr(1)
10146 .sr(1)
10147 .m(4)
10148 .n(n)
10149 .k(k)
10150 .ks(3)
10151 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10152 }
10153 }
10154 }
10155
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,strided_cm_subtile)10156 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, strided_cm_subtile) {
10157 TEST_REQUIRES_ARM_NEON;
10158 for (size_t k = 1; k <= 10; k += 3) {
10159 for (uint32_t n = 1; n <= 8; n++) {
10160 for (uint32_t m = 1; m <= 4; m++) {
10161 GemmMicrokernelTester()
10162 .mr(4)
10163 .nr(8)
10164 .kr(1)
10165 .sr(1)
10166 .m(m)
10167 .n(n)
10168 .k(k)
10169 .cm_stride(11)
10170 .iterations(1)
10171 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10172 }
10173 }
10174 }
10175 }
10176
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,a_offset)10177 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, a_offset) {
10178 TEST_REQUIRES_ARM_NEON;
10179 for (size_t k = 1; k <= 10; k += 3) {
10180 GemmMicrokernelTester()
10181 .mr(4)
10182 .nr(8)
10183 .kr(1)
10184 .sr(1)
10185 .m(4)
10186 .n(8)
10187 .k(k)
10188 .ks(3)
10189 .a_offset(43)
10190 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10191 }
10192 }
10193
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,zero)10194 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, zero) {
10195 TEST_REQUIRES_ARM_NEON;
10196 for (size_t k = 1; k <= 10; k += 3) {
10197 for (uint32_t mz = 0; mz < 4; mz++) {
10198 GemmMicrokernelTester()
10199 .mr(4)
10200 .nr(8)
10201 .kr(1)
10202 .sr(1)
10203 .m(4)
10204 .n(8)
10205 .k(k)
10206 .ks(3)
10207 .a_offset(43)
10208 .zero_index(mz)
10209 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10210 }
10211 }
10212 }
10213
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,qmin)10214 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, qmin) {
10215 TEST_REQUIRES_ARM_NEON;
10216 GemmMicrokernelTester()
10217 .mr(4)
10218 .nr(8)
10219 .kr(1)
10220 .sr(1)
10221 .m(4)
10222 .n(8)
10223 .k(2)
10224 .qmin(128)
10225 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10226 }
10227
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,qmax)10228 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, qmax) {
10229 TEST_REQUIRES_ARM_NEON;
10230 GemmMicrokernelTester()
10231 .mr(4)
10232 .nr(8)
10233 .kr(1)
10234 .sr(1)
10235 .m(4)
10236 .n(8)
10237 .k(2)
10238 .qmax(128)
10239 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10240 }
10241
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64,strided_cm)10242 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, strided_cm) {
10243 TEST_REQUIRES_ARM_NEON;
10244 GemmMicrokernelTester()
10245 .mr(4)
10246 .nr(8)
10247 .kr(1)
10248 .sr(1)
10249 .m(4)
10250 .n(8)
10251 .k(2)
10252 .cm_stride(11)
10253 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
10254 }
10255 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10256
10257
10258 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_eq_4)10259 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4) {
10260 TEST_REQUIRES_ARM_NEON;
10261 GemmMicrokernelTester()
10262 .mr(4)
10263 .nr(8)
10264 .kr(1)
10265 .sr(1)
10266 .m(4)
10267 .n(8)
10268 .k(4)
10269 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10270 }
10271
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,strided_cn)10272 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cn) {
10273 TEST_REQUIRES_ARM_NEON;
10274 GemmMicrokernelTester()
10275 .mr(4)
10276 .nr(8)
10277 .kr(1)
10278 .sr(1)
10279 .m(4)
10280 .n(8)
10281 .k(4)
10282 .cn_stride(11)
10283 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10284 }
10285
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_eq_4_subtile)10286 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile) {
10287 TEST_REQUIRES_ARM_NEON;
10288 for (uint32_t n = 1; n <= 8; n++) {
10289 for (uint32_t m = 1; m <= 4; m++) {
10290 GemmMicrokernelTester()
10291 .mr(4)
10292 .nr(8)
10293 .kr(1)
10294 .sr(1)
10295 .m(m)
10296 .n(n)
10297 .k(4)
10298 .iterations(1)
10299 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10300 }
10301 }
10302 }
10303
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_eq_4_subtile_m)10304 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
10305 TEST_REQUIRES_ARM_NEON;
10306 for (uint32_t m = 1; m <= 4; m++) {
10307 GemmMicrokernelTester()
10308 .mr(4)
10309 .nr(8)
10310 .kr(1)
10311 .sr(1)
10312 .m(m)
10313 .n(8)
10314 .k(4)
10315 .iterations(1)
10316 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10317 }
10318 }
10319
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_eq_4_subtile_n)10320 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
10321 TEST_REQUIRES_ARM_NEON;
10322 for (uint32_t n = 1; n <= 8; n++) {
10323 GemmMicrokernelTester()
10324 .mr(4)
10325 .nr(8)
10326 .kr(1)
10327 .sr(1)
10328 .m(4)
10329 .n(n)
10330 .k(4)
10331 .iterations(1)
10332 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10333 }
10334 }
10335
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_lt_4)10336 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_lt_4) {
10337 TEST_REQUIRES_ARM_NEON;
10338 for (size_t k = 1; k < 4; k++) {
10339 GemmMicrokernelTester()
10340 .mr(4)
10341 .nr(8)
10342 .kr(1)
10343 .sr(1)
10344 .m(4)
10345 .n(8)
10346 .k(k)
10347 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10348 }
10349 }
10350
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_lt_4_subtile)10351 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_subtile) {
10352 TEST_REQUIRES_ARM_NEON;
10353 for (size_t k = 1; k < 4; k++) {
10354 for (uint32_t n = 1; n <= 8; n++) {
10355 for (uint32_t m = 1; m <= 4; m++) {
10356 GemmMicrokernelTester()
10357 .mr(4)
10358 .nr(8)
10359 .kr(1)
10360 .sr(1)
10361 .m(m)
10362 .n(n)
10363 .k(k)
10364 .iterations(1)
10365 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10366 }
10367 }
10368 }
10369 }
10370
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_gt_4)10371 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_gt_4) {
10372 TEST_REQUIRES_ARM_NEON;
10373 for (size_t k = 5; k < 8; k++) {
10374 GemmMicrokernelTester()
10375 .mr(4)
10376 .nr(8)
10377 .kr(1)
10378 .sr(1)
10379 .m(4)
10380 .n(8)
10381 .k(k)
10382 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10383 }
10384 }
10385
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_gt_4_subtile)10386 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_subtile) {
10387 TEST_REQUIRES_ARM_NEON;
10388 for (size_t k = 5; k < 8; k++) {
10389 for (uint32_t n = 1; n <= 8; n++) {
10390 for (uint32_t m = 1; m <= 4; m++) {
10391 GemmMicrokernelTester()
10392 .mr(4)
10393 .nr(8)
10394 .kr(1)
10395 .sr(1)
10396 .m(m)
10397 .n(n)
10398 .k(k)
10399 .iterations(1)
10400 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10401 }
10402 }
10403 }
10404 }
10405
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_div_4)10406 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_div_4) {
10407 TEST_REQUIRES_ARM_NEON;
10408 for (size_t k = 8; k <= 40; k += 4) {
10409 GemmMicrokernelTester()
10410 .mr(4)
10411 .nr(8)
10412 .kr(1)
10413 .sr(1)
10414 .m(4)
10415 .n(8)
10416 .k(k)
10417 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10418 }
10419 }
10420
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,k_div_4_subtile)10421 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_div_4_subtile) {
10422 TEST_REQUIRES_ARM_NEON;
10423 for (size_t k = 8; k <= 40; k += 4) {
10424 for (uint32_t n = 1; n <= 8; n++) {
10425 for (uint32_t m = 1; m <= 4; m++) {
10426 GemmMicrokernelTester()
10427 .mr(4)
10428 .nr(8)
10429 .kr(1)
10430 .sr(1)
10431 .m(m)
10432 .n(n)
10433 .k(k)
10434 .iterations(1)
10435 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10436 }
10437 }
10438 }
10439 }
10440
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_gt_8)10441 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8) {
10442 TEST_REQUIRES_ARM_NEON;
10443 for (uint32_t n = 9; n < 16; n++) {
10444 for (size_t k = 1; k <= 20; k += 5) {
10445 GemmMicrokernelTester()
10446 .mr(4)
10447 .nr(8)
10448 .kr(1)
10449 .sr(1)
10450 .m(4)
10451 .n(n)
10452 .k(k)
10453 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10454 }
10455 }
10456 }
10457
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_gt_8_strided_cn)10458 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
10459 TEST_REQUIRES_ARM_NEON;
10460 for (uint32_t n = 9; n < 16; n++) {
10461 for (size_t k = 1; k <= 20; k += 5) {
10462 GemmMicrokernelTester()
10463 .mr(4)
10464 .nr(8)
10465 .kr(1)
10466 .sr(1)
10467 .m(4)
10468 .n(n)
10469 .k(k)
10470 .cn_stride(11)
10471 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10472 }
10473 }
10474 }
10475
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_gt_8_subtile)10476 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_subtile) {
10477 TEST_REQUIRES_ARM_NEON;
10478 for (uint32_t n = 9; n < 16; n++) {
10479 for (size_t k = 1; k <= 20; k += 5) {
10480 for (uint32_t m = 1; m <= 4; m++) {
10481 GemmMicrokernelTester()
10482 .mr(4)
10483 .nr(8)
10484 .kr(1)
10485 .sr(1)
10486 .m(m)
10487 .n(n)
10488 .k(k)
10489 .iterations(1)
10490 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10491 }
10492 }
10493 }
10494 }
10495
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_div_8)10496 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8) {
10497 TEST_REQUIRES_ARM_NEON;
10498 for (uint32_t n = 16; n <= 24; n += 8) {
10499 for (size_t k = 1; k <= 20; k += 5) {
10500 GemmMicrokernelTester()
10501 .mr(4)
10502 .nr(8)
10503 .kr(1)
10504 .sr(1)
10505 .m(4)
10506 .n(n)
10507 .k(k)
10508 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10509 }
10510 }
10511 }
10512
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_div_8_strided_cn)10513 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_cn) {
10514 TEST_REQUIRES_ARM_NEON;
10515 for (uint32_t n = 16; n <= 24; n += 8) {
10516 for (size_t k = 1; k <= 20; k += 5) {
10517 GemmMicrokernelTester()
10518 .mr(4)
10519 .nr(8)
10520 .kr(1)
10521 .sr(1)
10522 .m(4)
10523 .n(n)
10524 .k(k)
10525 .cn_stride(11)
10526 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10527 }
10528 }
10529 }
10530
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_div_8_subtile)10531 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_subtile) {
10532 TEST_REQUIRES_ARM_NEON;
10533 for (uint32_t n = 16; n <= 24; n += 8) {
10534 for (size_t k = 1; k <= 20; k += 5) {
10535 for (uint32_t m = 1; m <= 4; m++) {
10536 GemmMicrokernelTester()
10537 .mr(4)
10538 .nr(8)
10539 .kr(1)
10540 .sr(1)
10541 .m(m)
10542 .n(n)
10543 .k(k)
10544 .iterations(1)
10545 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10546 }
10547 }
10548 }
10549 }
10550
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,small_kernel)10551 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, small_kernel) {
10552 TEST_REQUIRES_ARM_NEON;
10553 for (size_t k = 1; k <= 20; k += 5) {
10554 GemmMicrokernelTester()
10555 .mr(4)
10556 .nr(8)
10557 .kr(1)
10558 .sr(1)
10559 .m(4)
10560 .n(8)
10561 .k(k)
10562 .ks(3)
10563 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10564 }
10565 }
10566
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,small_kernel_subtile)10567 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, small_kernel_subtile) {
10568 TEST_REQUIRES_ARM_NEON;
10569 for (size_t k = 1; k <= 20; k += 5) {
10570 for (uint32_t n = 1; n <= 8; n++) {
10571 for (uint32_t m = 1; m <= 4; m++) {
10572 GemmMicrokernelTester()
10573 .mr(4)
10574 .nr(8)
10575 .kr(1)
10576 .sr(1)
10577 .m(m)
10578 .n(n)
10579 .k(k)
10580 .ks(3)
10581 .iterations(1)
10582 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10583 }
10584 }
10585 }
10586 }
10587
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_gt_8_small_kernel)10588 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_small_kernel) {
10589 TEST_REQUIRES_ARM_NEON;
10590 for (uint32_t n = 9; n < 16; n++) {
10591 for (size_t k = 1; k <= 20; k += 5) {
10592 GemmMicrokernelTester()
10593 .mr(4)
10594 .nr(8)
10595 .kr(1)
10596 .sr(1)
10597 .m(4)
10598 .n(n)
10599 .k(k)
10600 .ks(3)
10601 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10602 }
10603 }
10604 }
10605
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,n_div_8_small_kernel)10606 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_small_kernel) {
10607 TEST_REQUIRES_ARM_NEON;
10608 for (uint32_t n = 16; n <= 24; n += 8) {
10609 for (size_t k = 1; k <= 20; k += 5) {
10610 GemmMicrokernelTester()
10611 .mr(4)
10612 .nr(8)
10613 .kr(1)
10614 .sr(1)
10615 .m(4)
10616 .n(n)
10617 .k(k)
10618 .ks(3)
10619 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10620 }
10621 }
10622 }
10623
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,strided_cm_subtile)10624 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cm_subtile) {
10625 TEST_REQUIRES_ARM_NEON;
10626 for (size_t k = 1; k <= 20; k += 5) {
10627 for (uint32_t n = 1; n <= 8; n++) {
10628 for (uint32_t m = 1; m <= 4; m++) {
10629 GemmMicrokernelTester()
10630 .mr(4)
10631 .nr(8)
10632 .kr(1)
10633 .sr(1)
10634 .m(m)
10635 .n(n)
10636 .k(k)
10637 .cm_stride(11)
10638 .iterations(1)
10639 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10640 }
10641 }
10642 }
10643 }
10644
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,a_offset)10645 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, a_offset) {
10646 TEST_REQUIRES_ARM_NEON;
10647 for (size_t k = 1; k <= 20; k += 5) {
10648 GemmMicrokernelTester()
10649 .mr(4)
10650 .nr(8)
10651 .kr(1)
10652 .sr(1)
10653 .m(4)
10654 .n(8)
10655 .k(k)
10656 .ks(3)
10657 .a_offset(83)
10658 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10659 }
10660 }
10661
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,zero)10662 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, zero) {
10663 TEST_REQUIRES_ARM_NEON;
10664 for (size_t k = 1; k <= 20; k += 5) {
10665 for (uint32_t mz = 0; mz < 4; mz++) {
10666 GemmMicrokernelTester()
10667 .mr(4)
10668 .nr(8)
10669 .kr(1)
10670 .sr(1)
10671 .m(4)
10672 .n(8)
10673 .k(k)
10674 .ks(3)
10675 .a_offset(83)
10676 .zero_index(mz)
10677 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10678 }
10679 }
10680 }
10681
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,qmin)10682 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, qmin) {
10683 TEST_REQUIRES_ARM_NEON;
10684 GemmMicrokernelTester()
10685 .mr(4)
10686 .nr(8)
10687 .kr(1)
10688 .sr(1)
10689 .m(4)
10690 .n(8)
10691 .k(4)
10692 .qmin(128)
10693 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10694 }
10695
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,qmax)10696 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, qmax) {
10697 TEST_REQUIRES_ARM_NEON;
10698 GemmMicrokernelTester()
10699 .mr(4)
10700 .nr(8)
10701 .kr(1)
10702 .sr(1)
10703 .m(4)
10704 .n(8)
10705 .k(4)
10706 .qmax(128)
10707 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10708 }
10709
TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128,strided_cm)10710 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cm) {
10711 TEST_REQUIRES_ARM_NEON;
10712 GemmMicrokernelTester()
10713 .mr(4)
10714 .nr(8)
10715 .kr(1)
10716 .sr(1)
10717 .m(4)
10718 .n(8)
10719 .k(4)
10720 .cm_stride(11)
10721 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10722 }
10723 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10724
10725
10726 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_eq_2)10727 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2) {
10728 TEST_REQUIRES_ARM_NEON;
10729 GemmMicrokernelTester()
10730 .mr(4)
10731 .nr(8)
10732 .kr(1)
10733 .sr(1)
10734 .m(4)
10735 .n(8)
10736 .k(2)
10737 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10738 }
10739
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,strided_cn)10740 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cn) {
10741 TEST_REQUIRES_ARM_NEON;
10742 GemmMicrokernelTester()
10743 .mr(4)
10744 .nr(8)
10745 .kr(1)
10746 .sr(1)
10747 .m(4)
10748 .n(8)
10749 .k(2)
10750 .cn_stride(11)
10751 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10752 }
10753
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_eq_2_subtile)10754 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile) {
10755 TEST_REQUIRES_ARM_NEON;
10756 for (uint32_t n = 1; n <= 8; n++) {
10757 for (uint32_t m = 1; m <= 4; m++) {
10758 GemmMicrokernelTester()
10759 .mr(4)
10760 .nr(8)
10761 .kr(1)
10762 .sr(1)
10763 .m(m)
10764 .n(n)
10765 .k(2)
10766 .iterations(1)
10767 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10768 }
10769 }
10770 }
10771
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_eq_2_subtile_m)10772 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
10773 TEST_REQUIRES_ARM_NEON;
10774 for (uint32_t m = 1; m <= 4; m++) {
10775 GemmMicrokernelTester()
10776 .mr(4)
10777 .nr(8)
10778 .kr(1)
10779 .sr(1)
10780 .m(m)
10781 .n(8)
10782 .k(2)
10783 .iterations(1)
10784 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10785 }
10786 }
10787
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_eq_2_subtile_n)10788 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
10789 TEST_REQUIRES_ARM_NEON;
10790 for (uint32_t n = 1; n <= 8; n++) {
10791 GemmMicrokernelTester()
10792 .mr(4)
10793 .nr(8)
10794 .kr(1)
10795 .sr(1)
10796 .m(4)
10797 .n(n)
10798 .k(2)
10799 .iterations(1)
10800 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10801 }
10802 }
10803
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_lt_2)10804 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_lt_2) {
10805 TEST_REQUIRES_ARM_NEON;
10806 for (size_t k = 1; k < 2; k++) {
10807 GemmMicrokernelTester()
10808 .mr(4)
10809 .nr(8)
10810 .kr(1)
10811 .sr(1)
10812 .m(4)
10813 .n(8)
10814 .k(k)
10815 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10816 }
10817 }
10818
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_lt_2_subtile)10819 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_subtile) {
10820 TEST_REQUIRES_ARM_NEON;
10821 for (size_t k = 1; k < 2; k++) {
10822 for (uint32_t n = 1; n <= 8; n++) {
10823 for (uint32_t m = 1; m <= 4; m++) {
10824 GemmMicrokernelTester()
10825 .mr(4)
10826 .nr(8)
10827 .kr(1)
10828 .sr(1)
10829 .m(m)
10830 .n(n)
10831 .k(k)
10832 .iterations(1)
10833 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10834 }
10835 }
10836 }
10837 }
10838
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_gt_2)10839 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_gt_2) {
10840 TEST_REQUIRES_ARM_NEON;
10841 for (size_t k = 3; k < 4; k++) {
10842 GemmMicrokernelTester()
10843 .mr(4)
10844 .nr(8)
10845 .kr(1)
10846 .sr(1)
10847 .m(4)
10848 .n(8)
10849 .k(k)
10850 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10851 }
10852 }
10853
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_gt_2_subtile)10854 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_subtile) {
10855 TEST_REQUIRES_ARM_NEON;
10856 for (size_t k = 3; k < 4; k++) {
10857 for (uint32_t n = 1; n <= 8; n++) {
10858 for (uint32_t m = 1; m <= 4; m++) {
10859 GemmMicrokernelTester()
10860 .mr(4)
10861 .nr(8)
10862 .kr(1)
10863 .sr(1)
10864 .m(m)
10865 .n(n)
10866 .k(k)
10867 .iterations(1)
10868 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10869 }
10870 }
10871 }
10872 }
10873
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_div_2)10874 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_div_2) {
10875 TEST_REQUIRES_ARM_NEON;
10876 for (size_t k = 4; k <= 20; k += 2) {
10877 GemmMicrokernelTester()
10878 .mr(4)
10879 .nr(8)
10880 .kr(1)
10881 .sr(1)
10882 .m(4)
10883 .n(8)
10884 .k(k)
10885 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10886 }
10887 }
10888
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,k_div_2_subtile)10889 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_div_2_subtile) {
10890 TEST_REQUIRES_ARM_NEON;
10891 for (size_t k = 4; k <= 20; k += 2) {
10892 for (uint32_t n = 1; n <= 8; n++) {
10893 for (uint32_t m = 1; m <= 4; m++) {
10894 GemmMicrokernelTester()
10895 .mr(4)
10896 .nr(8)
10897 .kr(1)
10898 .sr(1)
10899 .m(m)
10900 .n(n)
10901 .k(k)
10902 .iterations(1)
10903 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10904 }
10905 }
10906 }
10907 }
10908
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_gt_8)10909 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8) {
10910 TEST_REQUIRES_ARM_NEON;
10911 for (uint32_t n = 9; n < 16; n++) {
10912 for (size_t k = 1; k <= 10; k += 3) {
10913 GemmMicrokernelTester()
10914 .mr(4)
10915 .nr(8)
10916 .kr(1)
10917 .sr(1)
10918 .m(4)
10919 .n(n)
10920 .k(k)
10921 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10922 }
10923 }
10924 }
10925
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_gt_8_strided_cn)10926 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
10927 TEST_REQUIRES_ARM_NEON;
10928 for (uint32_t n = 9; n < 16; n++) {
10929 for (size_t k = 1; k <= 10; k += 3) {
10930 GemmMicrokernelTester()
10931 .mr(4)
10932 .nr(8)
10933 .kr(1)
10934 .sr(1)
10935 .m(4)
10936 .n(n)
10937 .k(k)
10938 .cn_stride(11)
10939 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10940 }
10941 }
10942 }
10943
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_gt_8_subtile)10944 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_subtile) {
10945 TEST_REQUIRES_ARM_NEON;
10946 for (uint32_t n = 9; n < 16; n++) {
10947 for (size_t k = 1; k <= 10; k += 3) {
10948 for (uint32_t m = 1; m <= 4; m++) {
10949 GemmMicrokernelTester()
10950 .mr(4)
10951 .nr(8)
10952 .kr(1)
10953 .sr(1)
10954 .m(m)
10955 .n(n)
10956 .k(k)
10957 .iterations(1)
10958 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10959 }
10960 }
10961 }
10962 }
10963
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_div_8)10964 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8) {
10965 TEST_REQUIRES_ARM_NEON;
10966 for (uint32_t n = 16; n <= 24; n += 8) {
10967 for (size_t k = 1; k <= 10; k += 3) {
10968 GemmMicrokernelTester()
10969 .mr(4)
10970 .nr(8)
10971 .kr(1)
10972 .sr(1)
10973 .m(4)
10974 .n(n)
10975 .k(k)
10976 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10977 }
10978 }
10979 }
10980
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_div_8_strided_cn)10981 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_cn) {
10982 TEST_REQUIRES_ARM_NEON;
10983 for (uint32_t n = 16; n <= 24; n += 8) {
10984 for (size_t k = 1; k <= 10; k += 3) {
10985 GemmMicrokernelTester()
10986 .mr(4)
10987 .nr(8)
10988 .kr(1)
10989 .sr(1)
10990 .m(4)
10991 .n(n)
10992 .k(k)
10993 .cn_stride(11)
10994 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10995 }
10996 }
10997 }
10998
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_div_8_subtile)10999 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_subtile) {
11000 TEST_REQUIRES_ARM_NEON;
11001 for (uint32_t n = 16; n <= 24; n += 8) {
11002 for (size_t k = 1; k <= 10; k += 3) {
11003 for (uint32_t m = 1; m <= 4; m++) {
11004 GemmMicrokernelTester()
11005 .mr(4)
11006 .nr(8)
11007 .kr(1)
11008 .sr(1)
11009 .m(m)
11010 .n(n)
11011 .k(k)
11012 .iterations(1)
11013 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11014 }
11015 }
11016 }
11017 }
11018
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,small_kernel)11019 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, small_kernel) {
11020 TEST_REQUIRES_ARM_NEON;
11021 for (size_t k = 1; k <= 10; k += 3) {
11022 GemmMicrokernelTester()
11023 .mr(4)
11024 .nr(8)
11025 .kr(1)
11026 .sr(1)
11027 .m(4)
11028 .n(8)
11029 .k(k)
11030 .ks(3)
11031 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11032 }
11033 }
11034
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,small_kernel_subtile)11035 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, small_kernel_subtile) {
11036 TEST_REQUIRES_ARM_NEON;
11037 for (size_t k = 1; k <= 10; k += 3) {
11038 for (uint32_t n = 1; n <= 8; n++) {
11039 for (uint32_t m = 1; m <= 4; m++) {
11040 GemmMicrokernelTester()
11041 .mr(4)
11042 .nr(8)
11043 .kr(1)
11044 .sr(1)
11045 .m(m)
11046 .n(n)
11047 .k(k)
11048 .ks(3)
11049 .iterations(1)
11050 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11051 }
11052 }
11053 }
11054 }
11055
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_gt_8_small_kernel)11056 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_small_kernel) {
11057 TEST_REQUIRES_ARM_NEON;
11058 for (uint32_t n = 9; n < 16; n++) {
11059 for (size_t k = 1; k <= 10; k += 3) {
11060 GemmMicrokernelTester()
11061 .mr(4)
11062 .nr(8)
11063 .kr(1)
11064 .sr(1)
11065 .m(4)
11066 .n(n)
11067 .k(k)
11068 .ks(3)
11069 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11070 }
11071 }
11072 }
11073
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,n_div_8_small_kernel)11074 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_small_kernel) {
11075 TEST_REQUIRES_ARM_NEON;
11076 for (uint32_t n = 16; n <= 24; n += 8) {
11077 for (size_t k = 1; k <= 10; k += 3) {
11078 GemmMicrokernelTester()
11079 .mr(4)
11080 .nr(8)
11081 .kr(1)
11082 .sr(1)
11083 .m(4)
11084 .n(n)
11085 .k(k)
11086 .ks(3)
11087 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11088 }
11089 }
11090 }
11091
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,strided_cm_subtile)11092 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cm_subtile) {
11093 TEST_REQUIRES_ARM_NEON;
11094 for (size_t k = 1; k <= 10; k += 3) {
11095 for (uint32_t n = 1; n <= 8; n++) {
11096 for (uint32_t m = 1; m <= 4; m++) {
11097 GemmMicrokernelTester()
11098 .mr(4)
11099 .nr(8)
11100 .kr(1)
11101 .sr(1)
11102 .m(m)
11103 .n(n)
11104 .k(k)
11105 .cm_stride(11)
11106 .iterations(1)
11107 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11108 }
11109 }
11110 }
11111 }
11112
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,a_offset)11113 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, a_offset) {
11114 TEST_REQUIRES_ARM_NEON;
11115 for (size_t k = 1; k <= 10; k += 3) {
11116 GemmMicrokernelTester()
11117 .mr(4)
11118 .nr(8)
11119 .kr(1)
11120 .sr(1)
11121 .m(4)
11122 .n(8)
11123 .k(k)
11124 .ks(3)
11125 .a_offset(43)
11126 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11127 }
11128 }
11129
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,zero)11130 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, zero) {
11131 TEST_REQUIRES_ARM_NEON;
11132 for (size_t k = 1; k <= 10; k += 3) {
11133 for (uint32_t mz = 0; mz < 4; mz++) {
11134 GemmMicrokernelTester()
11135 .mr(4)
11136 .nr(8)
11137 .kr(1)
11138 .sr(1)
11139 .m(4)
11140 .n(8)
11141 .k(k)
11142 .ks(3)
11143 .a_offset(43)
11144 .zero_index(mz)
11145 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11146 }
11147 }
11148 }
11149
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,qmin)11150 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, qmin) {
11151 TEST_REQUIRES_ARM_NEON;
11152 GemmMicrokernelTester()
11153 .mr(4)
11154 .nr(8)
11155 .kr(1)
11156 .sr(1)
11157 .m(4)
11158 .n(8)
11159 .k(2)
11160 .qmin(128)
11161 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11162 }
11163
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,qmax)11164 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, qmax) {
11165 TEST_REQUIRES_ARM_NEON;
11166 GemmMicrokernelTester()
11167 .mr(4)
11168 .nr(8)
11169 .kr(1)
11170 .sr(1)
11171 .m(4)
11172 .n(8)
11173 .k(2)
11174 .qmax(128)
11175 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11176 }
11177
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64,strided_cm)11178 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cm) {
11179 TEST_REQUIRES_ARM_NEON;
11180 GemmMicrokernelTester()
11181 .mr(4)
11182 .nr(8)
11183 .kr(1)
11184 .sr(1)
11185 .m(4)
11186 .n(8)
11187 .k(2)
11188 .cm_stride(11)
11189 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
11190 }
11191 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11192
11193
11194 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_eq_4)11195 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4) {
11196 TEST_REQUIRES_ARM_NEON;
11197 GemmMicrokernelTester()
11198 .mr(4)
11199 .nr(8)
11200 .kr(1)
11201 .sr(1)
11202 .m(4)
11203 .n(8)
11204 .k(4)
11205 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11206 }
11207
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,strided_cn)11208 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cn) {
11209 TEST_REQUIRES_ARM_NEON;
11210 GemmMicrokernelTester()
11211 .mr(4)
11212 .nr(8)
11213 .kr(1)
11214 .sr(1)
11215 .m(4)
11216 .n(8)
11217 .k(4)
11218 .cn_stride(11)
11219 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11220 }
11221
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_eq_4_subtile)11222 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile) {
11223 TEST_REQUIRES_ARM_NEON;
11224 for (uint32_t n = 1; n <= 8; n++) {
11225 for (uint32_t m = 1; m <= 4; m++) {
11226 GemmMicrokernelTester()
11227 .mr(4)
11228 .nr(8)
11229 .kr(1)
11230 .sr(1)
11231 .m(m)
11232 .n(n)
11233 .k(4)
11234 .iterations(1)
11235 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11236 }
11237 }
11238 }
11239
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_eq_4_subtile_m)11240 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
11241 TEST_REQUIRES_ARM_NEON;
11242 for (uint32_t m = 1; m <= 4; m++) {
11243 GemmMicrokernelTester()
11244 .mr(4)
11245 .nr(8)
11246 .kr(1)
11247 .sr(1)
11248 .m(m)
11249 .n(8)
11250 .k(4)
11251 .iterations(1)
11252 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11253 }
11254 }
11255
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_eq_4_subtile_n)11256 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
11257 TEST_REQUIRES_ARM_NEON;
11258 for (uint32_t n = 1; n <= 8; n++) {
11259 GemmMicrokernelTester()
11260 .mr(4)
11261 .nr(8)
11262 .kr(1)
11263 .sr(1)
11264 .m(4)
11265 .n(n)
11266 .k(4)
11267 .iterations(1)
11268 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11269 }
11270 }
11271
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_lt_4)11272 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_lt_4) {
11273 TEST_REQUIRES_ARM_NEON;
11274 for (size_t k = 1; k < 4; k++) {
11275 GemmMicrokernelTester()
11276 .mr(4)
11277 .nr(8)
11278 .kr(1)
11279 .sr(1)
11280 .m(4)
11281 .n(8)
11282 .k(k)
11283 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11284 }
11285 }
11286
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_lt_4_subtile)11287 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_subtile) {
11288 TEST_REQUIRES_ARM_NEON;
11289 for (size_t k = 1; k < 4; k++) {
11290 for (uint32_t n = 1; n <= 8; n++) {
11291 for (uint32_t m = 1; m <= 4; m++) {
11292 GemmMicrokernelTester()
11293 .mr(4)
11294 .nr(8)
11295 .kr(1)
11296 .sr(1)
11297 .m(m)
11298 .n(n)
11299 .k(k)
11300 .iterations(1)
11301 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11302 }
11303 }
11304 }
11305 }
11306
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_gt_4)11307 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_gt_4) {
11308 TEST_REQUIRES_ARM_NEON;
11309 for (size_t k = 5; k < 8; k++) {
11310 GemmMicrokernelTester()
11311 .mr(4)
11312 .nr(8)
11313 .kr(1)
11314 .sr(1)
11315 .m(4)
11316 .n(8)
11317 .k(k)
11318 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11319 }
11320 }
11321
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_gt_4_subtile)11322 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_subtile) {
11323 TEST_REQUIRES_ARM_NEON;
11324 for (size_t k = 5; k < 8; k++) {
11325 for (uint32_t n = 1; n <= 8; n++) {
11326 for (uint32_t m = 1; m <= 4; m++) {
11327 GemmMicrokernelTester()
11328 .mr(4)
11329 .nr(8)
11330 .kr(1)
11331 .sr(1)
11332 .m(m)
11333 .n(n)
11334 .k(k)
11335 .iterations(1)
11336 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11337 }
11338 }
11339 }
11340 }
11341
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_div_4)11342 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_div_4) {
11343 TEST_REQUIRES_ARM_NEON;
11344 for (size_t k = 8; k <= 40; k += 4) {
11345 GemmMicrokernelTester()
11346 .mr(4)
11347 .nr(8)
11348 .kr(1)
11349 .sr(1)
11350 .m(4)
11351 .n(8)
11352 .k(k)
11353 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11354 }
11355 }
11356
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,k_div_4_subtile)11357 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_div_4_subtile) {
11358 TEST_REQUIRES_ARM_NEON;
11359 for (size_t k = 8; k <= 40; k += 4) {
11360 for (uint32_t n = 1; n <= 8; n++) {
11361 for (uint32_t m = 1; m <= 4; m++) {
11362 GemmMicrokernelTester()
11363 .mr(4)
11364 .nr(8)
11365 .kr(1)
11366 .sr(1)
11367 .m(m)
11368 .n(n)
11369 .k(k)
11370 .iterations(1)
11371 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11372 }
11373 }
11374 }
11375 }
11376
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_gt_8)11377 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8) {
11378 TEST_REQUIRES_ARM_NEON;
11379 for (uint32_t n = 9; n < 16; n++) {
11380 for (size_t k = 1; k <= 20; k += 5) {
11381 GemmMicrokernelTester()
11382 .mr(4)
11383 .nr(8)
11384 .kr(1)
11385 .sr(1)
11386 .m(4)
11387 .n(n)
11388 .k(k)
11389 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11390 }
11391 }
11392 }
11393
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_gt_8_strided_cn)11394 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
11395 TEST_REQUIRES_ARM_NEON;
11396 for (uint32_t n = 9; n < 16; n++) {
11397 for (size_t k = 1; k <= 20; k += 5) {
11398 GemmMicrokernelTester()
11399 .mr(4)
11400 .nr(8)
11401 .kr(1)
11402 .sr(1)
11403 .m(4)
11404 .n(n)
11405 .k(k)
11406 .cn_stride(11)
11407 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11408 }
11409 }
11410 }
11411
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_gt_8_subtile)11412 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_subtile) {
11413 TEST_REQUIRES_ARM_NEON;
11414 for (uint32_t n = 9; n < 16; n++) {
11415 for (size_t k = 1; k <= 20; k += 5) {
11416 for (uint32_t m = 1; m <= 4; m++) {
11417 GemmMicrokernelTester()
11418 .mr(4)
11419 .nr(8)
11420 .kr(1)
11421 .sr(1)
11422 .m(m)
11423 .n(n)
11424 .k(k)
11425 .iterations(1)
11426 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11427 }
11428 }
11429 }
11430 }
11431
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_div_8)11432 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8) {
11433 TEST_REQUIRES_ARM_NEON;
11434 for (uint32_t n = 16; n <= 24; n += 8) {
11435 for (size_t k = 1; k <= 20; k += 5) {
11436 GemmMicrokernelTester()
11437 .mr(4)
11438 .nr(8)
11439 .kr(1)
11440 .sr(1)
11441 .m(4)
11442 .n(n)
11443 .k(k)
11444 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11445 }
11446 }
11447 }
11448
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_div_8_strided_cn)11449 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_cn) {
11450 TEST_REQUIRES_ARM_NEON;
11451 for (uint32_t n = 16; n <= 24; n += 8) {
11452 for (size_t k = 1; k <= 20; k += 5) {
11453 GemmMicrokernelTester()
11454 .mr(4)
11455 .nr(8)
11456 .kr(1)
11457 .sr(1)
11458 .m(4)
11459 .n(n)
11460 .k(k)
11461 .cn_stride(11)
11462 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11463 }
11464 }
11465 }
11466
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_div_8_subtile)11467 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_subtile) {
11468 TEST_REQUIRES_ARM_NEON;
11469 for (uint32_t n = 16; n <= 24; n += 8) {
11470 for (size_t k = 1; k <= 20; k += 5) {
11471 for (uint32_t m = 1; m <= 4; m++) {
11472 GemmMicrokernelTester()
11473 .mr(4)
11474 .nr(8)
11475 .kr(1)
11476 .sr(1)
11477 .m(m)
11478 .n(n)
11479 .k(k)
11480 .iterations(1)
11481 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11482 }
11483 }
11484 }
11485 }
11486
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,small_kernel)11487 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, small_kernel) {
11488 TEST_REQUIRES_ARM_NEON;
11489 for (size_t k = 1; k <= 20; k += 5) {
11490 GemmMicrokernelTester()
11491 .mr(4)
11492 .nr(8)
11493 .kr(1)
11494 .sr(1)
11495 .m(4)
11496 .n(8)
11497 .k(k)
11498 .ks(3)
11499 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11500 }
11501 }
11502
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,small_kernel_subtile)11503 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, small_kernel_subtile) {
11504 TEST_REQUIRES_ARM_NEON;
11505 for (size_t k = 1; k <= 20; k += 5) {
11506 for (uint32_t n = 1; n <= 8; n++) {
11507 for (uint32_t m = 1; m <= 4; m++) {
11508 GemmMicrokernelTester()
11509 .mr(4)
11510 .nr(8)
11511 .kr(1)
11512 .sr(1)
11513 .m(m)
11514 .n(n)
11515 .k(k)
11516 .ks(3)
11517 .iterations(1)
11518 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11519 }
11520 }
11521 }
11522 }
11523
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_gt_8_small_kernel)11524 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_small_kernel) {
11525 TEST_REQUIRES_ARM_NEON;
11526 for (uint32_t n = 9; n < 16; n++) {
11527 for (size_t k = 1; k <= 20; k += 5) {
11528 GemmMicrokernelTester()
11529 .mr(4)
11530 .nr(8)
11531 .kr(1)
11532 .sr(1)
11533 .m(4)
11534 .n(n)
11535 .k(k)
11536 .ks(3)
11537 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11538 }
11539 }
11540 }
11541
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,n_div_8_small_kernel)11542 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_small_kernel) {
11543 TEST_REQUIRES_ARM_NEON;
11544 for (uint32_t n = 16; n <= 24; n += 8) {
11545 for (size_t k = 1; k <= 20; k += 5) {
11546 GemmMicrokernelTester()
11547 .mr(4)
11548 .nr(8)
11549 .kr(1)
11550 .sr(1)
11551 .m(4)
11552 .n(n)
11553 .k(k)
11554 .ks(3)
11555 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11556 }
11557 }
11558 }
11559
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,strided_cm_subtile)11560 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cm_subtile) {
11561 TEST_REQUIRES_ARM_NEON;
11562 for (size_t k = 1; k <= 20; k += 5) {
11563 for (uint32_t n = 1; n <= 8; n++) {
11564 for (uint32_t m = 1; m <= 4; m++) {
11565 GemmMicrokernelTester()
11566 .mr(4)
11567 .nr(8)
11568 .kr(1)
11569 .sr(1)
11570 .m(m)
11571 .n(n)
11572 .k(k)
11573 .cm_stride(11)
11574 .iterations(1)
11575 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11576 }
11577 }
11578 }
11579 }
11580
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,a_offset)11581 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, a_offset) {
11582 TEST_REQUIRES_ARM_NEON;
11583 for (size_t k = 1; k <= 20; k += 5) {
11584 GemmMicrokernelTester()
11585 .mr(4)
11586 .nr(8)
11587 .kr(1)
11588 .sr(1)
11589 .m(4)
11590 .n(8)
11591 .k(k)
11592 .ks(3)
11593 .a_offset(83)
11594 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11595 }
11596 }
11597
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,zero)11598 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, zero) {
11599 TEST_REQUIRES_ARM_NEON;
11600 for (size_t k = 1; k <= 20; k += 5) {
11601 for (uint32_t mz = 0; mz < 4; mz++) {
11602 GemmMicrokernelTester()
11603 .mr(4)
11604 .nr(8)
11605 .kr(1)
11606 .sr(1)
11607 .m(4)
11608 .n(8)
11609 .k(k)
11610 .ks(3)
11611 .a_offset(83)
11612 .zero_index(mz)
11613 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11614 }
11615 }
11616 }
11617
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,qmin)11618 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, qmin) {
11619 TEST_REQUIRES_ARM_NEON;
11620 GemmMicrokernelTester()
11621 .mr(4)
11622 .nr(8)
11623 .kr(1)
11624 .sr(1)
11625 .m(4)
11626 .n(8)
11627 .k(4)
11628 .qmin(128)
11629 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11630 }
11631
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,qmax)11632 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, qmax) {
11633 TEST_REQUIRES_ARM_NEON;
11634 GemmMicrokernelTester()
11635 .mr(4)
11636 .nr(8)
11637 .kr(1)
11638 .sr(1)
11639 .m(4)
11640 .n(8)
11641 .k(4)
11642 .qmax(128)
11643 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11644 }
11645
TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128,strided_cm)11646 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cm) {
11647 TEST_REQUIRES_ARM_NEON;
11648 GemmMicrokernelTester()
11649 .mr(4)
11650 .nr(8)
11651 .kr(1)
11652 .sr(1)
11653 .m(4)
11654 .n(8)
11655 .k(4)
11656 .cm_stride(11)
11657 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
11658 }
11659 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11660
11661
11662 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2)11663 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2) {
11664 TEST_REQUIRES_ARM_NEON_FMA;
11665 GemmMicrokernelTester()
11666 .mr(4)
11667 .nr(8)
11668 .kr(1)
11669 .sr(1)
11670 .m(4)
11671 .n(8)
11672 .k(2)
11673 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11674 }
11675
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,strided_cn)11676 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cn) {
11677 TEST_REQUIRES_ARM_NEON_FMA;
11678 GemmMicrokernelTester()
11679 .mr(4)
11680 .nr(8)
11681 .kr(1)
11682 .sr(1)
11683 .m(4)
11684 .n(8)
11685 .k(2)
11686 .cn_stride(11)
11687 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11688 }
11689
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2_subtile)11690 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
11691 TEST_REQUIRES_ARM_NEON_FMA;
11692 for (uint32_t n = 1; n <= 8; n++) {
11693 for (uint32_t m = 1; m <= 4; m++) {
11694 GemmMicrokernelTester()
11695 .mr(4)
11696 .nr(8)
11697 .kr(1)
11698 .sr(1)
11699 .m(m)
11700 .n(n)
11701 .k(2)
11702 .iterations(1)
11703 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11704 }
11705 }
11706 }
11707
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2_subtile_m)11708 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
11709 TEST_REQUIRES_ARM_NEON_FMA;
11710 for (uint32_t m = 1; m <= 4; m++) {
11711 GemmMicrokernelTester()
11712 .mr(4)
11713 .nr(8)
11714 .kr(1)
11715 .sr(1)
11716 .m(m)
11717 .n(8)
11718 .k(2)
11719 .iterations(1)
11720 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11721 }
11722 }
11723
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2_subtile_n)11724 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
11725 TEST_REQUIRES_ARM_NEON_FMA;
11726 for (uint32_t n = 1; n <= 8; n++) {
11727 GemmMicrokernelTester()
11728 .mr(4)
11729 .nr(8)
11730 .kr(1)
11731 .sr(1)
11732 .m(4)
11733 .n(n)
11734 .k(2)
11735 .iterations(1)
11736 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11737 }
11738 }
11739
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_lt_2)11740 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2) {
11741 TEST_REQUIRES_ARM_NEON_FMA;
11742 for (size_t k = 1; k < 2; k++) {
11743 GemmMicrokernelTester()
11744 .mr(4)
11745 .nr(8)
11746 .kr(1)
11747 .sr(1)
11748 .m(4)
11749 .n(8)
11750 .k(k)
11751 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11752 }
11753 }
11754
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_lt_2_subtile)11755 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
11756 TEST_REQUIRES_ARM_NEON_FMA;
11757 for (size_t k = 1; k < 2; k++) {
11758 for (uint32_t n = 1; n <= 8; n++) {
11759 for (uint32_t m = 1; m <= 4; m++) {
11760 GemmMicrokernelTester()
11761 .mr(4)
11762 .nr(8)
11763 .kr(1)
11764 .sr(1)
11765 .m(m)
11766 .n(n)
11767 .k(k)
11768 .iterations(1)
11769 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11770 }
11771 }
11772 }
11773 }
11774
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_gt_2)11775 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2) {
11776 TEST_REQUIRES_ARM_NEON_FMA;
11777 for (size_t k = 3; k < 4; k++) {
11778 GemmMicrokernelTester()
11779 .mr(4)
11780 .nr(8)
11781 .kr(1)
11782 .sr(1)
11783 .m(4)
11784 .n(8)
11785 .k(k)
11786 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11787 }
11788 }
11789
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_gt_2_subtile)11790 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
11791 TEST_REQUIRES_ARM_NEON_FMA;
11792 for (size_t k = 3; k < 4; k++) {
11793 for (uint32_t n = 1; n <= 8; n++) {
11794 for (uint32_t m = 1; m <= 4; m++) {
11795 GemmMicrokernelTester()
11796 .mr(4)
11797 .nr(8)
11798 .kr(1)
11799 .sr(1)
11800 .m(m)
11801 .n(n)
11802 .k(k)
11803 .iterations(1)
11804 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11805 }
11806 }
11807 }
11808 }
11809
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_div_2)11810 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2) {
11811 TEST_REQUIRES_ARM_NEON_FMA;
11812 for (size_t k = 4; k <= 20; k += 2) {
11813 GemmMicrokernelTester()
11814 .mr(4)
11815 .nr(8)
11816 .kr(1)
11817 .sr(1)
11818 .m(4)
11819 .n(8)
11820 .k(k)
11821 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11822 }
11823 }
11824
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,k_div_2_subtile)11825 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
11826 TEST_REQUIRES_ARM_NEON_FMA;
11827 for (size_t k = 4; k <= 20; k += 2) {
11828 for (uint32_t n = 1; n <= 8; n++) {
11829 for (uint32_t m = 1; m <= 4; m++) {
11830 GemmMicrokernelTester()
11831 .mr(4)
11832 .nr(8)
11833 .kr(1)
11834 .sr(1)
11835 .m(m)
11836 .n(n)
11837 .k(k)
11838 .iterations(1)
11839 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11840 }
11841 }
11842 }
11843 }
11844
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8)11845 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8) {
11846 TEST_REQUIRES_ARM_NEON_FMA;
11847 for (uint32_t n = 9; n < 16; n++) {
11848 for (size_t k = 1; k <= 10; k += 3) {
11849 GemmMicrokernelTester()
11850 .mr(4)
11851 .nr(8)
11852 .kr(1)
11853 .sr(1)
11854 .m(4)
11855 .n(n)
11856 .k(k)
11857 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11858 }
11859 }
11860 }
11861
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8_strided_cn)11862 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
11863 TEST_REQUIRES_ARM_NEON_FMA;
11864 for (uint32_t n = 9; n < 16; n++) {
11865 for (size_t k = 1; k <= 10; k += 3) {
11866 GemmMicrokernelTester()
11867 .mr(4)
11868 .nr(8)
11869 .kr(1)
11870 .sr(1)
11871 .m(4)
11872 .n(n)
11873 .k(k)
11874 .cn_stride(11)
11875 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11876 }
11877 }
11878 }
11879
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8_subtile)11880 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
11881 TEST_REQUIRES_ARM_NEON_FMA;
11882 for (uint32_t n = 9; n < 16; n++) {
11883 for (size_t k = 1; k <= 10; k += 3) {
11884 for (uint32_t m = 1; m <= 4; m++) {
11885 GemmMicrokernelTester()
11886 .mr(4)
11887 .nr(8)
11888 .kr(1)
11889 .sr(1)
11890 .m(m)
11891 .n(n)
11892 .k(k)
11893 .iterations(1)
11894 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11895 }
11896 }
11897 }
11898 }
11899
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8)11900 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8) {
11901 TEST_REQUIRES_ARM_NEON_FMA;
11902 for (uint32_t n = 16; n <= 24; n += 8) {
11903 for (size_t k = 1; k <= 10; k += 3) {
11904 GemmMicrokernelTester()
11905 .mr(4)
11906 .nr(8)
11907 .kr(1)
11908 .sr(1)
11909 .m(4)
11910 .n(n)
11911 .k(k)
11912 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11913 }
11914 }
11915 }
11916
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8_strided_cn)11917 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
11918 TEST_REQUIRES_ARM_NEON_FMA;
11919 for (uint32_t n = 16; n <= 24; n += 8) {
11920 for (size_t k = 1; k <= 10; k += 3) {
11921 GemmMicrokernelTester()
11922 .mr(4)
11923 .nr(8)
11924 .kr(1)
11925 .sr(1)
11926 .m(4)
11927 .n(n)
11928 .k(k)
11929 .cn_stride(11)
11930 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11931 }
11932 }
11933 }
11934
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8_subtile)11935 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
11936 TEST_REQUIRES_ARM_NEON_FMA;
11937 for (uint32_t n = 16; n <= 24; n += 8) {
11938 for (size_t k = 1; k <= 10; k += 3) {
11939 for (uint32_t m = 1; m <= 4; m++) {
11940 GemmMicrokernelTester()
11941 .mr(4)
11942 .nr(8)
11943 .kr(1)
11944 .sr(1)
11945 .m(m)
11946 .n(n)
11947 .k(k)
11948 .iterations(1)
11949 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11950 }
11951 }
11952 }
11953 }
11954
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,small_kernel)11955 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, small_kernel) {
11956 TEST_REQUIRES_ARM_NEON_FMA;
11957 for (size_t k = 1; k <= 10; k += 3) {
11958 GemmMicrokernelTester()
11959 .mr(4)
11960 .nr(8)
11961 .kr(1)
11962 .sr(1)
11963 .m(4)
11964 .n(8)
11965 .k(k)
11966 .ks(3)
11967 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11968 }
11969 }
11970
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,small_kernel_subtile)11971 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, small_kernel_subtile) {
11972 TEST_REQUIRES_ARM_NEON_FMA;
11973 for (size_t k = 1; k <= 10; k += 3) {
11974 for (uint32_t n = 1; n <= 8; n++) {
11975 for (uint32_t m = 1; m <= 4; m++) {
11976 GemmMicrokernelTester()
11977 .mr(4)
11978 .nr(8)
11979 .kr(1)
11980 .sr(1)
11981 .m(m)
11982 .n(n)
11983 .k(k)
11984 .ks(3)
11985 .iterations(1)
11986 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
11987 }
11988 }
11989 }
11990 }
11991
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8_small_kernel)11992 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_small_kernel) {
11993 TEST_REQUIRES_ARM_NEON_FMA;
11994 for (uint32_t n = 9; n < 16; n++) {
11995 for (size_t k = 1; k <= 10; k += 3) {
11996 GemmMicrokernelTester()
11997 .mr(4)
11998 .nr(8)
11999 .kr(1)
12000 .sr(1)
12001 .m(4)
12002 .n(n)
12003 .k(k)
12004 .ks(3)
12005 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12006 }
12007 }
12008 }
12009
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8_small_kernel)12010 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_small_kernel) {
12011 TEST_REQUIRES_ARM_NEON_FMA;
12012 for (uint32_t n = 16; n <= 24; n += 8) {
12013 for (size_t k = 1; k <= 10; k += 3) {
12014 GemmMicrokernelTester()
12015 .mr(4)
12016 .nr(8)
12017 .kr(1)
12018 .sr(1)
12019 .m(4)
12020 .n(n)
12021 .k(k)
12022 .ks(3)
12023 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12024 }
12025 }
12026 }
12027
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,strided_cm_subtile)12028 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
12029 TEST_REQUIRES_ARM_NEON_FMA;
12030 for (size_t k = 1; k <= 10; k += 3) {
12031 for (uint32_t n = 1; n <= 8; n++) {
12032 for (uint32_t m = 1; m <= 4; m++) {
12033 GemmMicrokernelTester()
12034 .mr(4)
12035 .nr(8)
12036 .kr(1)
12037 .sr(1)
12038 .m(m)
12039 .n(n)
12040 .k(k)
12041 .cm_stride(11)
12042 .iterations(1)
12043 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12044 }
12045 }
12046 }
12047 }
12048
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,a_offset)12049 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, a_offset) {
12050 TEST_REQUIRES_ARM_NEON_FMA;
12051 for (size_t k = 1; k <= 10; k += 3) {
12052 GemmMicrokernelTester()
12053 .mr(4)
12054 .nr(8)
12055 .kr(1)
12056 .sr(1)
12057 .m(4)
12058 .n(8)
12059 .k(k)
12060 .ks(3)
12061 .a_offset(43)
12062 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12063 }
12064 }
12065
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,zero)12066 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, zero) {
12067 TEST_REQUIRES_ARM_NEON_FMA;
12068 for (size_t k = 1; k <= 10; k += 3) {
12069 for (uint32_t mz = 0; mz < 4; mz++) {
12070 GemmMicrokernelTester()
12071 .mr(4)
12072 .nr(8)
12073 .kr(1)
12074 .sr(1)
12075 .m(4)
12076 .n(8)
12077 .k(k)
12078 .ks(3)
12079 .a_offset(43)
12080 .zero_index(mz)
12081 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12082 }
12083 }
12084 }
12085
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,qmin)12086 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, qmin) {
12087 TEST_REQUIRES_ARM_NEON_FMA;
12088 GemmMicrokernelTester()
12089 .mr(4)
12090 .nr(8)
12091 .kr(1)
12092 .sr(1)
12093 .m(4)
12094 .n(8)
12095 .k(2)
12096 .qmin(128)
12097 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12098 }
12099
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,qmax)12100 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, qmax) {
12101 TEST_REQUIRES_ARM_NEON_FMA;
12102 GemmMicrokernelTester()
12103 .mr(4)
12104 .nr(8)
12105 .kr(1)
12106 .sr(1)
12107 .m(4)
12108 .n(8)
12109 .k(2)
12110 .qmax(128)
12111 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12112 }
12113
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64,strided_cm)12114 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm) {
12115 TEST_REQUIRES_ARM_NEON_FMA;
12116 GemmMicrokernelTester()
12117 .mr(4)
12118 .nr(8)
12119 .kr(1)
12120 .sr(1)
12121 .m(4)
12122 .n(8)
12123 .k(2)
12124 .cm_stride(11)
12125 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12126 }
12127 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12128
12129
12130 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4)12131 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4) {
12132 TEST_REQUIRES_ARM_NEON_FMA;
12133 GemmMicrokernelTester()
12134 .mr(4)
12135 .nr(8)
12136 .kr(1)
12137 .sr(1)
12138 .m(4)
12139 .n(8)
12140 .k(4)
12141 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12142 }
12143
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,strided_cn)12144 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cn) {
12145 TEST_REQUIRES_ARM_NEON_FMA;
12146 GemmMicrokernelTester()
12147 .mr(4)
12148 .nr(8)
12149 .kr(1)
12150 .sr(1)
12151 .m(4)
12152 .n(8)
12153 .k(4)
12154 .cn_stride(11)
12155 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12156 }
12157
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4_subtile)12158 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
12159 TEST_REQUIRES_ARM_NEON_FMA;
12160 for (uint32_t n = 1; n <= 8; n++) {
12161 for (uint32_t m = 1; m <= 4; m++) {
12162 GemmMicrokernelTester()
12163 .mr(4)
12164 .nr(8)
12165 .kr(1)
12166 .sr(1)
12167 .m(m)
12168 .n(n)
12169 .k(4)
12170 .iterations(1)
12171 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12172 }
12173 }
12174 }
12175
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4_subtile_m)12176 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
12177 TEST_REQUIRES_ARM_NEON_FMA;
12178 for (uint32_t m = 1; m <= 4; m++) {
12179 GemmMicrokernelTester()
12180 .mr(4)
12181 .nr(8)
12182 .kr(1)
12183 .sr(1)
12184 .m(m)
12185 .n(8)
12186 .k(4)
12187 .iterations(1)
12188 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12189 }
12190 }
12191
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4_subtile_n)12192 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
12193 TEST_REQUIRES_ARM_NEON_FMA;
12194 for (uint32_t n = 1; n <= 8; n++) {
12195 GemmMicrokernelTester()
12196 .mr(4)
12197 .nr(8)
12198 .kr(1)
12199 .sr(1)
12200 .m(4)
12201 .n(n)
12202 .k(4)
12203 .iterations(1)
12204 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12205 }
12206 }
12207
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_lt_4)12208 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4) {
12209 TEST_REQUIRES_ARM_NEON_FMA;
12210 for (size_t k = 1; k < 4; k++) {
12211 GemmMicrokernelTester()
12212 .mr(4)
12213 .nr(8)
12214 .kr(1)
12215 .sr(1)
12216 .m(4)
12217 .n(8)
12218 .k(k)
12219 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12220 }
12221 }
12222
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_lt_4_subtile)12223 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
12224 TEST_REQUIRES_ARM_NEON_FMA;
12225 for (size_t k = 1; k < 4; k++) {
12226 for (uint32_t n = 1; n <= 8; n++) {
12227 for (uint32_t m = 1; m <= 4; m++) {
12228 GemmMicrokernelTester()
12229 .mr(4)
12230 .nr(8)
12231 .kr(1)
12232 .sr(1)
12233 .m(m)
12234 .n(n)
12235 .k(k)
12236 .iterations(1)
12237 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12238 }
12239 }
12240 }
12241 }
12242
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_gt_4)12243 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4) {
12244 TEST_REQUIRES_ARM_NEON_FMA;
12245 for (size_t k = 5; k < 8; k++) {
12246 GemmMicrokernelTester()
12247 .mr(4)
12248 .nr(8)
12249 .kr(1)
12250 .sr(1)
12251 .m(4)
12252 .n(8)
12253 .k(k)
12254 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12255 }
12256 }
12257
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_gt_4_subtile)12258 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
12259 TEST_REQUIRES_ARM_NEON_FMA;
12260 for (size_t k = 5; k < 8; k++) {
12261 for (uint32_t n = 1; n <= 8; n++) {
12262 for (uint32_t m = 1; m <= 4; m++) {
12263 GemmMicrokernelTester()
12264 .mr(4)
12265 .nr(8)
12266 .kr(1)
12267 .sr(1)
12268 .m(m)
12269 .n(n)
12270 .k(k)
12271 .iterations(1)
12272 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12273 }
12274 }
12275 }
12276 }
12277
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_div_4)12278 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4) {
12279 TEST_REQUIRES_ARM_NEON_FMA;
12280 for (size_t k = 8; k <= 40; k += 4) {
12281 GemmMicrokernelTester()
12282 .mr(4)
12283 .nr(8)
12284 .kr(1)
12285 .sr(1)
12286 .m(4)
12287 .n(8)
12288 .k(k)
12289 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12290 }
12291 }
12292
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,k_div_4_subtile)12293 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
12294 TEST_REQUIRES_ARM_NEON_FMA;
12295 for (size_t k = 8; k <= 40; k += 4) {
12296 for (uint32_t n = 1; n <= 8; n++) {
12297 for (uint32_t m = 1; m <= 4; m++) {
12298 GemmMicrokernelTester()
12299 .mr(4)
12300 .nr(8)
12301 .kr(1)
12302 .sr(1)
12303 .m(m)
12304 .n(n)
12305 .k(k)
12306 .iterations(1)
12307 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12308 }
12309 }
12310 }
12311 }
12312
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8)12313 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8) {
12314 TEST_REQUIRES_ARM_NEON_FMA;
12315 for (uint32_t n = 9; n < 16; n++) {
12316 for (size_t k = 1; k <= 20; k += 5) {
12317 GemmMicrokernelTester()
12318 .mr(4)
12319 .nr(8)
12320 .kr(1)
12321 .sr(1)
12322 .m(4)
12323 .n(n)
12324 .k(k)
12325 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12326 }
12327 }
12328 }
12329
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8_strided_cn)12330 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
12331 TEST_REQUIRES_ARM_NEON_FMA;
12332 for (uint32_t n = 9; n < 16; n++) {
12333 for (size_t k = 1; k <= 20; k += 5) {
12334 GemmMicrokernelTester()
12335 .mr(4)
12336 .nr(8)
12337 .kr(1)
12338 .sr(1)
12339 .m(4)
12340 .n(n)
12341 .k(k)
12342 .cn_stride(11)
12343 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12344 }
12345 }
12346 }
12347
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8_subtile)12348 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
12349 TEST_REQUIRES_ARM_NEON_FMA;
12350 for (uint32_t n = 9; n < 16; n++) {
12351 for (size_t k = 1; k <= 20; k += 5) {
12352 for (uint32_t m = 1; m <= 4; m++) {
12353 GemmMicrokernelTester()
12354 .mr(4)
12355 .nr(8)
12356 .kr(1)
12357 .sr(1)
12358 .m(m)
12359 .n(n)
12360 .k(k)
12361 .iterations(1)
12362 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12363 }
12364 }
12365 }
12366 }
12367
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8)12368 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8) {
12369 TEST_REQUIRES_ARM_NEON_FMA;
12370 for (uint32_t n = 16; n <= 24; n += 8) {
12371 for (size_t k = 1; k <= 20; k += 5) {
12372 GemmMicrokernelTester()
12373 .mr(4)
12374 .nr(8)
12375 .kr(1)
12376 .sr(1)
12377 .m(4)
12378 .n(n)
12379 .k(k)
12380 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12381 }
12382 }
12383 }
12384
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8_strided_cn)12385 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
12386 TEST_REQUIRES_ARM_NEON_FMA;
12387 for (uint32_t n = 16; n <= 24; n += 8) {
12388 for (size_t k = 1; k <= 20; k += 5) {
12389 GemmMicrokernelTester()
12390 .mr(4)
12391 .nr(8)
12392 .kr(1)
12393 .sr(1)
12394 .m(4)
12395 .n(n)
12396 .k(k)
12397 .cn_stride(11)
12398 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12399 }
12400 }
12401 }
12402
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8_subtile)12403 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
12404 TEST_REQUIRES_ARM_NEON_FMA;
12405 for (uint32_t n = 16; n <= 24; n += 8) {
12406 for (size_t k = 1; k <= 20; k += 5) {
12407 for (uint32_t m = 1; m <= 4; m++) {
12408 GemmMicrokernelTester()
12409 .mr(4)
12410 .nr(8)
12411 .kr(1)
12412 .sr(1)
12413 .m(m)
12414 .n(n)
12415 .k(k)
12416 .iterations(1)
12417 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12418 }
12419 }
12420 }
12421 }
12422
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,small_kernel)12423 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, small_kernel) {
12424 TEST_REQUIRES_ARM_NEON_FMA;
12425 for (size_t k = 1; k <= 20; k += 5) {
12426 GemmMicrokernelTester()
12427 .mr(4)
12428 .nr(8)
12429 .kr(1)
12430 .sr(1)
12431 .m(4)
12432 .n(8)
12433 .k(k)
12434 .ks(3)
12435 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12436 }
12437 }
12438
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,small_kernel_subtile)12439 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, small_kernel_subtile) {
12440 TEST_REQUIRES_ARM_NEON_FMA;
12441 for (size_t k = 1; k <= 20; k += 5) {
12442 for (uint32_t n = 1; n <= 8; n++) {
12443 for (uint32_t m = 1; m <= 4; m++) {
12444 GemmMicrokernelTester()
12445 .mr(4)
12446 .nr(8)
12447 .kr(1)
12448 .sr(1)
12449 .m(m)
12450 .n(n)
12451 .k(k)
12452 .ks(3)
12453 .iterations(1)
12454 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12455 }
12456 }
12457 }
12458 }
12459
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8_small_kernel)12460 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_small_kernel) {
12461 TEST_REQUIRES_ARM_NEON_FMA;
12462 for (uint32_t n = 9; n < 16; n++) {
12463 for (size_t k = 1; k <= 20; k += 5) {
12464 GemmMicrokernelTester()
12465 .mr(4)
12466 .nr(8)
12467 .kr(1)
12468 .sr(1)
12469 .m(4)
12470 .n(n)
12471 .k(k)
12472 .ks(3)
12473 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12474 }
12475 }
12476 }
12477
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8_small_kernel)12478 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_small_kernel) {
12479 TEST_REQUIRES_ARM_NEON_FMA;
12480 for (uint32_t n = 16; n <= 24; n += 8) {
12481 for (size_t k = 1; k <= 20; k += 5) {
12482 GemmMicrokernelTester()
12483 .mr(4)
12484 .nr(8)
12485 .kr(1)
12486 .sr(1)
12487 .m(4)
12488 .n(n)
12489 .k(k)
12490 .ks(3)
12491 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12492 }
12493 }
12494 }
12495
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,strided_cm_subtile)12496 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
12497 TEST_REQUIRES_ARM_NEON_FMA;
12498 for (size_t k = 1; k <= 20; k += 5) {
12499 for (uint32_t n = 1; n <= 8; n++) {
12500 for (uint32_t m = 1; m <= 4; m++) {
12501 GemmMicrokernelTester()
12502 .mr(4)
12503 .nr(8)
12504 .kr(1)
12505 .sr(1)
12506 .m(m)
12507 .n(n)
12508 .k(k)
12509 .cm_stride(11)
12510 .iterations(1)
12511 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12512 }
12513 }
12514 }
12515 }
12516
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,a_offset)12517 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, a_offset) {
12518 TEST_REQUIRES_ARM_NEON_FMA;
12519 for (size_t k = 1; k <= 20; k += 5) {
12520 GemmMicrokernelTester()
12521 .mr(4)
12522 .nr(8)
12523 .kr(1)
12524 .sr(1)
12525 .m(4)
12526 .n(8)
12527 .k(k)
12528 .ks(3)
12529 .a_offset(83)
12530 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12531 }
12532 }
12533
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,zero)12534 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, zero) {
12535 TEST_REQUIRES_ARM_NEON_FMA;
12536 for (size_t k = 1; k <= 20; k += 5) {
12537 for (uint32_t mz = 0; mz < 4; mz++) {
12538 GemmMicrokernelTester()
12539 .mr(4)
12540 .nr(8)
12541 .kr(1)
12542 .sr(1)
12543 .m(4)
12544 .n(8)
12545 .k(k)
12546 .ks(3)
12547 .a_offset(83)
12548 .zero_index(mz)
12549 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12550 }
12551 }
12552 }
12553
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,qmin)12554 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, qmin) {
12555 TEST_REQUIRES_ARM_NEON_FMA;
12556 GemmMicrokernelTester()
12557 .mr(4)
12558 .nr(8)
12559 .kr(1)
12560 .sr(1)
12561 .m(4)
12562 .n(8)
12563 .k(4)
12564 .qmin(128)
12565 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12566 }
12567
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,qmax)12568 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, qmax) {
12569 TEST_REQUIRES_ARM_NEON_FMA;
12570 GemmMicrokernelTester()
12571 .mr(4)
12572 .nr(8)
12573 .kr(1)
12574 .sr(1)
12575 .m(4)
12576 .n(8)
12577 .k(4)
12578 .qmax(128)
12579 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12580 }
12581
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128,strided_cm)12582 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm) {
12583 TEST_REQUIRES_ARM_NEON_FMA;
12584 GemmMicrokernelTester()
12585 .mr(4)
12586 .nr(8)
12587 .kr(1)
12588 .sr(1)
12589 .m(4)
12590 .n(8)
12591 .k(4)
12592 .cm_stride(11)
12593 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12594 }
12595 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12596
12597
12598 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2)12599 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2) {
12600 TEST_REQUIRES_ARM_NEON_FMA;
12601 GemmMicrokernelTester()
12602 .mr(4)
12603 .nr(8)
12604 .kr(1)
12605 .sr(1)
12606 .m(4)
12607 .n(8)
12608 .k(2)
12609 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12610 }
12611
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,strided_cn)12612 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cn) {
12613 TEST_REQUIRES_ARM_NEON_FMA;
12614 GemmMicrokernelTester()
12615 .mr(4)
12616 .nr(8)
12617 .kr(1)
12618 .sr(1)
12619 .m(4)
12620 .n(8)
12621 .k(2)
12622 .cn_stride(11)
12623 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12624 }
12625
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2_subtile)12626 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
12627 TEST_REQUIRES_ARM_NEON_FMA;
12628 for (uint32_t n = 1; n <= 8; n++) {
12629 for (uint32_t m = 1; m <= 4; m++) {
12630 GemmMicrokernelTester()
12631 .mr(4)
12632 .nr(8)
12633 .kr(1)
12634 .sr(1)
12635 .m(m)
12636 .n(n)
12637 .k(2)
12638 .iterations(1)
12639 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12640 }
12641 }
12642 }
12643
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2_subtile_m)12644 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
12645 TEST_REQUIRES_ARM_NEON_FMA;
12646 for (uint32_t m = 1; m <= 4; m++) {
12647 GemmMicrokernelTester()
12648 .mr(4)
12649 .nr(8)
12650 .kr(1)
12651 .sr(1)
12652 .m(m)
12653 .n(8)
12654 .k(2)
12655 .iterations(1)
12656 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12657 }
12658 }
12659
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2_subtile_n)12660 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
12661 TEST_REQUIRES_ARM_NEON_FMA;
12662 for (uint32_t n = 1; n <= 8; n++) {
12663 GemmMicrokernelTester()
12664 .mr(4)
12665 .nr(8)
12666 .kr(1)
12667 .sr(1)
12668 .m(4)
12669 .n(n)
12670 .k(2)
12671 .iterations(1)
12672 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12673 }
12674 }
12675
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_lt_2)12676 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2) {
12677 TEST_REQUIRES_ARM_NEON_FMA;
12678 for (size_t k = 1; k < 2; k++) {
12679 GemmMicrokernelTester()
12680 .mr(4)
12681 .nr(8)
12682 .kr(1)
12683 .sr(1)
12684 .m(4)
12685 .n(8)
12686 .k(k)
12687 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12688 }
12689 }
12690
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_lt_2_subtile)12691 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
12692 TEST_REQUIRES_ARM_NEON_FMA;
12693 for (size_t k = 1; k < 2; k++) {
12694 for (uint32_t n = 1; n <= 8; n++) {
12695 for (uint32_t m = 1; m <= 4; m++) {
12696 GemmMicrokernelTester()
12697 .mr(4)
12698 .nr(8)
12699 .kr(1)
12700 .sr(1)
12701 .m(m)
12702 .n(n)
12703 .k(k)
12704 .iterations(1)
12705 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12706 }
12707 }
12708 }
12709 }
12710
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_gt_2)12711 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2) {
12712 TEST_REQUIRES_ARM_NEON_FMA;
12713 for (size_t k = 3; k < 4; k++) {
12714 GemmMicrokernelTester()
12715 .mr(4)
12716 .nr(8)
12717 .kr(1)
12718 .sr(1)
12719 .m(4)
12720 .n(8)
12721 .k(k)
12722 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12723 }
12724 }
12725
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_gt_2_subtile)12726 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
12727 TEST_REQUIRES_ARM_NEON_FMA;
12728 for (size_t k = 3; k < 4; k++) {
12729 for (uint32_t n = 1; n <= 8; n++) {
12730 for (uint32_t m = 1; m <= 4; m++) {
12731 GemmMicrokernelTester()
12732 .mr(4)
12733 .nr(8)
12734 .kr(1)
12735 .sr(1)
12736 .m(m)
12737 .n(n)
12738 .k(k)
12739 .iterations(1)
12740 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12741 }
12742 }
12743 }
12744 }
12745
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_div_2)12746 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2) {
12747 TEST_REQUIRES_ARM_NEON_FMA;
12748 for (size_t k = 4; k <= 20; k += 2) {
12749 GemmMicrokernelTester()
12750 .mr(4)
12751 .nr(8)
12752 .kr(1)
12753 .sr(1)
12754 .m(4)
12755 .n(8)
12756 .k(k)
12757 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12758 }
12759 }
12760
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,k_div_2_subtile)12761 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
12762 TEST_REQUIRES_ARM_NEON_FMA;
12763 for (size_t k = 4; k <= 20; k += 2) {
12764 for (uint32_t n = 1; n <= 8; n++) {
12765 for (uint32_t m = 1; m <= 4; m++) {
12766 GemmMicrokernelTester()
12767 .mr(4)
12768 .nr(8)
12769 .kr(1)
12770 .sr(1)
12771 .m(m)
12772 .n(n)
12773 .k(k)
12774 .iterations(1)
12775 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12776 }
12777 }
12778 }
12779 }
12780
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8)12781 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8) {
12782 TEST_REQUIRES_ARM_NEON_FMA;
12783 for (uint32_t n = 9; n < 16; n++) {
12784 for (size_t k = 1; k <= 10; k += 3) {
12785 GemmMicrokernelTester()
12786 .mr(4)
12787 .nr(8)
12788 .kr(1)
12789 .sr(1)
12790 .m(4)
12791 .n(n)
12792 .k(k)
12793 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12794 }
12795 }
12796 }
12797
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8_strided_cn)12798 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
12799 TEST_REQUIRES_ARM_NEON_FMA;
12800 for (uint32_t n = 9; n < 16; n++) {
12801 for (size_t k = 1; k <= 10; k += 3) {
12802 GemmMicrokernelTester()
12803 .mr(4)
12804 .nr(8)
12805 .kr(1)
12806 .sr(1)
12807 .m(4)
12808 .n(n)
12809 .k(k)
12810 .cn_stride(11)
12811 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12812 }
12813 }
12814 }
12815
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8_subtile)12816 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
12817 TEST_REQUIRES_ARM_NEON_FMA;
12818 for (uint32_t n = 9; n < 16; n++) {
12819 for (size_t k = 1; k <= 10; k += 3) {
12820 for (uint32_t m = 1; m <= 4; m++) {
12821 GemmMicrokernelTester()
12822 .mr(4)
12823 .nr(8)
12824 .kr(1)
12825 .sr(1)
12826 .m(m)
12827 .n(n)
12828 .k(k)
12829 .iterations(1)
12830 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12831 }
12832 }
12833 }
12834 }
12835
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8)12836 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8) {
12837 TEST_REQUIRES_ARM_NEON_FMA;
12838 for (uint32_t n = 16; n <= 24; n += 8) {
12839 for (size_t k = 1; k <= 10; k += 3) {
12840 GemmMicrokernelTester()
12841 .mr(4)
12842 .nr(8)
12843 .kr(1)
12844 .sr(1)
12845 .m(4)
12846 .n(n)
12847 .k(k)
12848 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12849 }
12850 }
12851 }
12852
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8_strided_cn)12853 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
12854 TEST_REQUIRES_ARM_NEON_FMA;
12855 for (uint32_t n = 16; n <= 24; n += 8) {
12856 for (size_t k = 1; k <= 10; k += 3) {
12857 GemmMicrokernelTester()
12858 .mr(4)
12859 .nr(8)
12860 .kr(1)
12861 .sr(1)
12862 .m(4)
12863 .n(n)
12864 .k(k)
12865 .cn_stride(11)
12866 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12867 }
12868 }
12869 }
12870
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8_subtile)12871 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
12872 TEST_REQUIRES_ARM_NEON_FMA;
12873 for (uint32_t n = 16; n <= 24; n += 8) {
12874 for (size_t k = 1; k <= 10; k += 3) {
12875 for (uint32_t m = 1; m <= 4; m++) {
12876 GemmMicrokernelTester()
12877 .mr(4)
12878 .nr(8)
12879 .kr(1)
12880 .sr(1)
12881 .m(m)
12882 .n(n)
12883 .k(k)
12884 .iterations(1)
12885 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12886 }
12887 }
12888 }
12889 }
12890
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,small_kernel)12891 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, small_kernel) {
12892 TEST_REQUIRES_ARM_NEON_FMA;
12893 for (size_t k = 1; k <= 10; k += 3) {
12894 GemmMicrokernelTester()
12895 .mr(4)
12896 .nr(8)
12897 .kr(1)
12898 .sr(1)
12899 .m(4)
12900 .n(8)
12901 .k(k)
12902 .ks(3)
12903 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12904 }
12905 }
12906
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,small_kernel_subtile)12907 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, small_kernel_subtile) {
12908 TEST_REQUIRES_ARM_NEON_FMA;
12909 for (size_t k = 1; k <= 10; k += 3) {
12910 for (uint32_t n = 1; n <= 8; n++) {
12911 for (uint32_t m = 1; m <= 4; m++) {
12912 GemmMicrokernelTester()
12913 .mr(4)
12914 .nr(8)
12915 .kr(1)
12916 .sr(1)
12917 .m(m)
12918 .n(n)
12919 .k(k)
12920 .ks(3)
12921 .iterations(1)
12922 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12923 }
12924 }
12925 }
12926 }
12927
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8_small_kernel)12928 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_small_kernel) {
12929 TEST_REQUIRES_ARM_NEON_FMA;
12930 for (uint32_t n = 9; n < 16; n++) {
12931 for (size_t k = 1; k <= 10; k += 3) {
12932 GemmMicrokernelTester()
12933 .mr(4)
12934 .nr(8)
12935 .kr(1)
12936 .sr(1)
12937 .m(4)
12938 .n(n)
12939 .k(k)
12940 .ks(3)
12941 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12942 }
12943 }
12944 }
12945
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8_small_kernel)12946 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_small_kernel) {
12947 TEST_REQUIRES_ARM_NEON_FMA;
12948 for (uint32_t n = 16; n <= 24; n += 8) {
12949 for (size_t k = 1; k <= 10; k += 3) {
12950 GemmMicrokernelTester()
12951 .mr(4)
12952 .nr(8)
12953 .kr(1)
12954 .sr(1)
12955 .m(4)
12956 .n(n)
12957 .k(k)
12958 .ks(3)
12959 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12960 }
12961 }
12962 }
12963
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,strided_cm_subtile)12964 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
12965 TEST_REQUIRES_ARM_NEON_FMA;
12966 for (size_t k = 1; k <= 10; k += 3) {
12967 for (uint32_t n = 1; n <= 8; n++) {
12968 for (uint32_t m = 1; m <= 4; m++) {
12969 GemmMicrokernelTester()
12970 .mr(4)
12971 .nr(8)
12972 .kr(1)
12973 .sr(1)
12974 .m(m)
12975 .n(n)
12976 .k(k)
12977 .cm_stride(11)
12978 .iterations(1)
12979 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12980 }
12981 }
12982 }
12983 }
12984
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,a_offset)12985 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, a_offset) {
12986 TEST_REQUIRES_ARM_NEON_FMA;
12987 for (size_t k = 1; k <= 10; k += 3) {
12988 GemmMicrokernelTester()
12989 .mr(4)
12990 .nr(8)
12991 .kr(1)
12992 .sr(1)
12993 .m(4)
12994 .n(8)
12995 .k(k)
12996 .ks(3)
12997 .a_offset(43)
12998 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
12999 }
13000 }
13001
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,zero)13002 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, zero) {
13003 TEST_REQUIRES_ARM_NEON_FMA;
13004 for (size_t k = 1; k <= 10; k += 3) {
13005 for (uint32_t mz = 0; mz < 4; mz++) {
13006 GemmMicrokernelTester()
13007 .mr(4)
13008 .nr(8)
13009 .kr(1)
13010 .sr(1)
13011 .m(4)
13012 .n(8)
13013 .k(k)
13014 .ks(3)
13015 .a_offset(43)
13016 .zero_index(mz)
13017 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
13018 }
13019 }
13020 }
13021
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,qmin)13022 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, qmin) {
13023 TEST_REQUIRES_ARM_NEON_FMA;
13024 GemmMicrokernelTester()
13025 .mr(4)
13026 .nr(8)
13027 .kr(1)
13028 .sr(1)
13029 .m(4)
13030 .n(8)
13031 .k(2)
13032 .qmin(128)
13033 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
13034 }
13035
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,qmax)13036 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, qmax) {
13037 TEST_REQUIRES_ARM_NEON_FMA;
13038 GemmMicrokernelTester()
13039 .mr(4)
13040 .nr(8)
13041 .kr(1)
13042 .sr(1)
13043 .m(4)
13044 .n(8)
13045 .k(2)
13046 .qmax(128)
13047 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
13048 }
13049
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64,strided_cm)13050 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm) {
13051 TEST_REQUIRES_ARM_NEON_FMA;
13052 GemmMicrokernelTester()
13053 .mr(4)
13054 .nr(8)
13055 .kr(1)
13056 .sr(1)
13057 .m(4)
13058 .n(8)
13059 .k(2)
13060 .cm_stride(11)
13061 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
13062 }
13063 #endif // XNN_ARCH_ARM64
13064
13065
13066 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4)13067 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4) {
13068 TEST_REQUIRES_ARM_NEON_FMA;
13069 GemmMicrokernelTester()
13070 .mr(4)
13071 .nr(8)
13072 .kr(1)
13073 .sr(1)
13074 .m(4)
13075 .n(8)
13076 .k(4)
13077 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13078 }
13079
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,strided_cn)13080 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cn) {
13081 TEST_REQUIRES_ARM_NEON_FMA;
13082 GemmMicrokernelTester()
13083 .mr(4)
13084 .nr(8)
13085 .kr(1)
13086 .sr(1)
13087 .m(4)
13088 .n(8)
13089 .k(4)
13090 .cn_stride(11)
13091 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13092 }
13093
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4_subtile)13094 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
13095 TEST_REQUIRES_ARM_NEON_FMA;
13096 for (uint32_t n = 1; n <= 8; n++) {
13097 for (uint32_t m = 1; m <= 4; m++) {
13098 GemmMicrokernelTester()
13099 .mr(4)
13100 .nr(8)
13101 .kr(1)
13102 .sr(1)
13103 .m(m)
13104 .n(n)
13105 .k(4)
13106 .iterations(1)
13107 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13108 }
13109 }
13110 }
13111
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4_subtile_m)13112 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
13113 TEST_REQUIRES_ARM_NEON_FMA;
13114 for (uint32_t m = 1; m <= 4; m++) {
13115 GemmMicrokernelTester()
13116 .mr(4)
13117 .nr(8)
13118 .kr(1)
13119 .sr(1)
13120 .m(m)
13121 .n(8)
13122 .k(4)
13123 .iterations(1)
13124 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13125 }
13126 }
13127
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4_subtile_n)13128 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
13129 TEST_REQUIRES_ARM_NEON_FMA;
13130 for (uint32_t n = 1; n <= 8; n++) {
13131 GemmMicrokernelTester()
13132 .mr(4)
13133 .nr(8)
13134 .kr(1)
13135 .sr(1)
13136 .m(4)
13137 .n(n)
13138 .k(4)
13139 .iterations(1)
13140 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13141 }
13142 }
13143
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_lt_4)13144 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4) {
13145 TEST_REQUIRES_ARM_NEON_FMA;
13146 for (size_t k = 1; k < 4; k++) {
13147 GemmMicrokernelTester()
13148 .mr(4)
13149 .nr(8)
13150 .kr(1)
13151 .sr(1)
13152 .m(4)
13153 .n(8)
13154 .k(k)
13155 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13156 }
13157 }
13158
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_lt_4_subtile)13159 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
13160 TEST_REQUIRES_ARM_NEON_FMA;
13161 for (size_t k = 1; k < 4; k++) {
13162 for (uint32_t n = 1; n <= 8; n++) {
13163 for (uint32_t m = 1; m <= 4; m++) {
13164 GemmMicrokernelTester()
13165 .mr(4)
13166 .nr(8)
13167 .kr(1)
13168 .sr(1)
13169 .m(m)
13170 .n(n)
13171 .k(k)
13172 .iterations(1)
13173 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13174 }
13175 }
13176 }
13177 }
13178
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_gt_4)13179 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4) {
13180 TEST_REQUIRES_ARM_NEON_FMA;
13181 for (size_t k = 5; k < 8; k++) {
13182 GemmMicrokernelTester()
13183 .mr(4)
13184 .nr(8)
13185 .kr(1)
13186 .sr(1)
13187 .m(4)
13188 .n(8)
13189 .k(k)
13190 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13191 }
13192 }
13193
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_gt_4_subtile)13194 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
13195 TEST_REQUIRES_ARM_NEON_FMA;
13196 for (size_t k = 5; k < 8; k++) {
13197 for (uint32_t n = 1; n <= 8; n++) {
13198 for (uint32_t m = 1; m <= 4; m++) {
13199 GemmMicrokernelTester()
13200 .mr(4)
13201 .nr(8)
13202 .kr(1)
13203 .sr(1)
13204 .m(m)
13205 .n(n)
13206 .k(k)
13207 .iterations(1)
13208 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13209 }
13210 }
13211 }
13212 }
13213
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_div_4)13214 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4) {
13215 TEST_REQUIRES_ARM_NEON_FMA;
13216 for (size_t k = 8; k <= 40; k += 4) {
13217 GemmMicrokernelTester()
13218 .mr(4)
13219 .nr(8)
13220 .kr(1)
13221 .sr(1)
13222 .m(4)
13223 .n(8)
13224 .k(k)
13225 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13226 }
13227 }
13228
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,k_div_4_subtile)13229 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
13230 TEST_REQUIRES_ARM_NEON_FMA;
13231 for (size_t k = 8; k <= 40; k += 4) {
13232 for (uint32_t n = 1; n <= 8; n++) {
13233 for (uint32_t m = 1; m <= 4; m++) {
13234 GemmMicrokernelTester()
13235 .mr(4)
13236 .nr(8)
13237 .kr(1)
13238 .sr(1)
13239 .m(m)
13240 .n(n)
13241 .k(k)
13242 .iterations(1)
13243 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13244 }
13245 }
13246 }
13247 }
13248
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8)13249 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8) {
13250 TEST_REQUIRES_ARM_NEON_FMA;
13251 for (uint32_t n = 9; n < 16; n++) {
13252 for (size_t k = 1; k <= 20; k += 5) {
13253 GemmMicrokernelTester()
13254 .mr(4)
13255 .nr(8)
13256 .kr(1)
13257 .sr(1)
13258 .m(4)
13259 .n(n)
13260 .k(k)
13261 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13262 }
13263 }
13264 }
13265
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8_strided_cn)13266 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
13267 TEST_REQUIRES_ARM_NEON_FMA;
13268 for (uint32_t n = 9; n < 16; n++) {
13269 for (size_t k = 1; k <= 20; k += 5) {
13270 GemmMicrokernelTester()
13271 .mr(4)
13272 .nr(8)
13273 .kr(1)
13274 .sr(1)
13275 .m(4)
13276 .n(n)
13277 .k(k)
13278 .cn_stride(11)
13279 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13280 }
13281 }
13282 }
13283
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8_subtile)13284 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
13285 TEST_REQUIRES_ARM_NEON_FMA;
13286 for (uint32_t n = 9; n < 16; n++) {
13287 for (size_t k = 1; k <= 20; k += 5) {
13288 for (uint32_t m = 1; m <= 4; m++) {
13289 GemmMicrokernelTester()
13290 .mr(4)
13291 .nr(8)
13292 .kr(1)
13293 .sr(1)
13294 .m(m)
13295 .n(n)
13296 .k(k)
13297 .iterations(1)
13298 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13299 }
13300 }
13301 }
13302 }
13303
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8)13304 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8) {
13305 TEST_REQUIRES_ARM_NEON_FMA;
13306 for (uint32_t n = 16; n <= 24; n += 8) {
13307 for (size_t k = 1; k <= 20; k += 5) {
13308 GemmMicrokernelTester()
13309 .mr(4)
13310 .nr(8)
13311 .kr(1)
13312 .sr(1)
13313 .m(4)
13314 .n(n)
13315 .k(k)
13316 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13317 }
13318 }
13319 }
13320
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8_strided_cn)13321 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
13322 TEST_REQUIRES_ARM_NEON_FMA;
13323 for (uint32_t n = 16; n <= 24; n += 8) {
13324 for (size_t k = 1; k <= 20; k += 5) {
13325 GemmMicrokernelTester()
13326 .mr(4)
13327 .nr(8)
13328 .kr(1)
13329 .sr(1)
13330 .m(4)
13331 .n(n)
13332 .k(k)
13333 .cn_stride(11)
13334 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13335 }
13336 }
13337 }
13338
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8_subtile)13339 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
13340 TEST_REQUIRES_ARM_NEON_FMA;
13341 for (uint32_t n = 16; n <= 24; n += 8) {
13342 for (size_t k = 1; k <= 20; k += 5) {
13343 for (uint32_t m = 1; m <= 4; m++) {
13344 GemmMicrokernelTester()
13345 .mr(4)
13346 .nr(8)
13347 .kr(1)
13348 .sr(1)
13349 .m(m)
13350 .n(n)
13351 .k(k)
13352 .iterations(1)
13353 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13354 }
13355 }
13356 }
13357 }
13358
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,small_kernel)13359 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, small_kernel) {
13360 TEST_REQUIRES_ARM_NEON_FMA;
13361 for (size_t k = 1; k <= 20; k += 5) {
13362 GemmMicrokernelTester()
13363 .mr(4)
13364 .nr(8)
13365 .kr(1)
13366 .sr(1)
13367 .m(4)
13368 .n(8)
13369 .k(k)
13370 .ks(3)
13371 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13372 }
13373 }
13374
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,small_kernel_subtile)13375 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, small_kernel_subtile) {
13376 TEST_REQUIRES_ARM_NEON_FMA;
13377 for (size_t k = 1; k <= 20; k += 5) {
13378 for (uint32_t n = 1; n <= 8; n++) {
13379 for (uint32_t m = 1; m <= 4; m++) {
13380 GemmMicrokernelTester()
13381 .mr(4)
13382 .nr(8)
13383 .kr(1)
13384 .sr(1)
13385 .m(m)
13386 .n(n)
13387 .k(k)
13388 .ks(3)
13389 .iterations(1)
13390 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13391 }
13392 }
13393 }
13394 }
13395
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8_small_kernel)13396 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_small_kernel) {
13397 TEST_REQUIRES_ARM_NEON_FMA;
13398 for (uint32_t n = 9; n < 16; n++) {
13399 for (size_t k = 1; k <= 20; k += 5) {
13400 GemmMicrokernelTester()
13401 .mr(4)
13402 .nr(8)
13403 .kr(1)
13404 .sr(1)
13405 .m(4)
13406 .n(n)
13407 .k(k)
13408 .ks(3)
13409 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13410 }
13411 }
13412 }
13413
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8_small_kernel)13414 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_small_kernel) {
13415 TEST_REQUIRES_ARM_NEON_FMA;
13416 for (uint32_t n = 16; n <= 24; n += 8) {
13417 for (size_t k = 1; k <= 20; k += 5) {
13418 GemmMicrokernelTester()
13419 .mr(4)
13420 .nr(8)
13421 .kr(1)
13422 .sr(1)
13423 .m(4)
13424 .n(n)
13425 .k(k)
13426 .ks(3)
13427 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13428 }
13429 }
13430 }
13431
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,strided_cm_subtile)13432 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
13433 TEST_REQUIRES_ARM_NEON_FMA;
13434 for (size_t k = 1; k <= 20; k += 5) {
13435 for (uint32_t n = 1; n <= 8; n++) {
13436 for (uint32_t m = 1; m <= 4; m++) {
13437 GemmMicrokernelTester()
13438 .mr(4)
13439 .nr(8)
13440 .kr(1)
13441 .sr(1)
13442 .m(m)
13443 .n(n)
13444 .k(k)
13445 .cm_stride(11)
13446 .iterations(1)
13447 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13448 }
13449 }
13450 }
13451 }
13452
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,a_offset)13453 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, a_offset) {
13454 TEST_REQUIRES_ARM_NEON_FMA;
13455 for (size_t k = 1; k <= 20; k += 5) {
13456 GemmMicrokernelTester()
13457 .mr(4)
13458 .nr(8)
13459 .kr(1)
13460 .sr(1)
13461 .m(4)
13462 .n(8)
13463 .k(k)
13464 .ks(3)
13465 .a_offset(83)
13466 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13467 }
13468 }
13469
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,zero)13470 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, zero) {
13471 TEST_REQUIRES_ARM_NEON_FMA;
13472 for (size_t k = 1; k <= 20; k += 5) {
13473 for (uint32_t mz = 0; mz < 4; mz++) {
13474 GemmMicrokernelTester()
13475 .mr(4)
13476 .nr(8)
13477 .kr(1)
13478 .sr(1)
13479 .m(4)
13480 .n(8)
13481 .k(k)
13482 .ks(3)
13483 .a_offset(83)
13484 .zero_index(mz)
13485 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13486 }
13487 }
13488 }
13489
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,qmin)13490 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, qmin) {
13491 TEST_REQUIRES_ARM_NEON_FMA;
13492 GemmMicrokernelTester()
13493 .mr(4)
13494 .nr(8)
13495 .kr(1)
13496 .sr(1)
13497 .m(4)
13498 .n(8)
13499 .k(4)
13500 .qmin(128)
13501 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13502 }
13503
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,qmax)13504 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, qmax) {
13505 TEST_REQUIRES_ARM_NEON_FMA;
13506 GemmMicrokernelTester()
13507 .mr(4)
13508 .nr(8)
13509 .kr(1)
13510 .sr(1)
13511 .m(4)
13512 .n(8)
13513 .k(4)
13514 .qmax(128)
13515 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13516 }
13517
TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128,strided_cm)13518 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm) {
13519 TEST_REQUIRES_ARM_NEON_FMA;
13520 GemmMicrokernelTester()
13521 .mr(4)
13522 .nr(8)
13523 .kr(1)
13524 .sr(1)
13525 .m(4)
13526 .n(8)
13527 .k(4)
13528 .cm_stride(11)
13529 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13530 }
13531 #endif // XNN_ARCH_ARM64
13532
13533
13534 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_eq_4)13535 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4) {
13536 TEST_REQUIRES_ARM_NEON;
13537 GemmMicrokernelTester()
13538 .mr(4)
13539 .nr(8)
13540 .kr(1)
13541 .sr(4)
13542 .m(4)
13543 .n(8)
13544 .k(4)
13545 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13546 }
13547
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,strided_cn)13548 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, strided_cn) {
13549 TEST_REQUIRES_ARM_NEON;
13550 GemmMicrokernelTester()
13551 .mr(4)
13552 .nr(8)
13553 .kr(1)
13554 .sr(4)
13555 .m(4)
13556 .n(8)
13557 .k(4)
13558 .cn_stride(11)
13559 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13560 }
13561
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_eq_4_subtile)13562 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile) {
13563 TEST_REQUIRES_ARM_NEON;
13564 for (uint32_t n = 1; n <= 8; n++) {
13565 for (uint32_t m = 1; m <= 4; m++) {
13566 GemmMicrokernelTester()
13567 .mr(4)
13568 .nr(8)
13569 .kr(1)
13570 .sr(4)
13571 .m(m)
13572 .n(n)
13573 .k(4)
13574 .iterations(1)
13575 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13576 }
13577 }
13578 }
13579
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_eq_4_subtile_m)13580 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile_m) {
13581 TEST_REQUIRES_ARM_NEON;
13582 for (uint32_t m = 1; m <= 4; m++) {
13583 GemmMicrokernelTester()
13584 .mr(4)
13585 .nr(8)
13586 .kr(1)
13587 .sr(4)
13588 .m(m)
13589 .n(8)
13590 .k(4)
13591 .iterations(1)
13592 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13593 }
13594 }
13595
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_eq_4_subtile_n)13596 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile_n) {
13597 TEST_REQUIRES_ARM_NEON;
13598 for (uint32_t n = 1; n <= 8; n++) {
13599 GemmMicrokernelTester()
13600 .mr(4)
13601 .nr(8)
13602 .kr(1)
13603 .sr(4)
13604 .m(4)
13605 .n(n)
13606 .k(4)
13607 .iterations(1)
13608 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13609 }
13610 }
13611
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_lt_4)13612 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_lt_4) {
13613 TEST_REQUIRES_ARM_NEON;
13614 for (size_t k = 1; k < 4; k++) {
13615 GemmMicrokernelTester()
13616 .mr(4)
13617 .nr(8)
13618 .kr(1)
13619 .sr(4)
13620 .m(4)
13621 .n(8)
13622 .k(k)
13623 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13624 }
13625 }
13626
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_lt_4_subtile)13627 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_lt_4_subtile) {
13628 TEST_REQUIRES_ARM_NEON;
13629 for (size_t k = 1; k < 4; k++) {
13630 for (uint32_t n = 1; n <= 8; n++) {
13631 for (uint32_t m = 1; m <= 4; m++) {
13632 GemmMicrokernelTester()
13633 .mr(4)
13634 .nr(8)
13635 .kr(1)
13636 .sr(4)
13637 .m(m)
13638 .n(n)
13639 .k(k)
13640 .iterations(1)
13641 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13642 }
13643 }
13644 }
13645 }
13646
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_gt_4)13647 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_gt_4) {
13648 TEST_REQUIRES_ARM_NEON;
13649 for (size_t k = 5; k < 8; k++) {
13650 GemmMicrokernelTester()
13651 .mr(4)
13652 .nr(8)
13653 .kr(1)
13654 .sr(4)
13655 .m(4)
13656 .n(8)
13657 .k(k)
13658 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13659 }
13660 }
13661
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_gt_4_subtile)13662 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_gt_4_subtile) {
13663 TEST_REQUIRES_ARM_NEON;
13664 for (size_t k = 5; k < 8; k++) {
13665 for (uint32_t n = 1; n <= 8; n++) {
13666 for (uint32_t m = 1; m <= 4; m++) {
13667 GemmMicrokernelTester()
13668 .mr(4)
13669 .nr(8)
13670 .kr(1)
13671 .sr(4)
13672 .m(m)
13673 .n(n)
13674 .k(k)
13675 .iterations(1)
13676 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13677 }
13678 }
13679 }
13680 }
13681
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_div_4)13682 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_div_4) {
13683 TEST_REQUIRES_ARM_NEON;
13684 for (size_t k = 8; k <= 40; k += 4) {
13685 GemmMicrokernelTester()
13686 .mr(4)
13687 .nr(8)
13688 .kr(1)
13689 .sr(4)
13690 .m(4)
13691 .n(8)
13692 .k(k)
13693 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13694 }
13695 }
13696
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,k_div_4_subtile)13697 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_div_4_subtile) {
13698 TEST_REQUIRES_ARM_NEON;
13699 for (size_t k = 8; k <= 40; k += 4) {
13700 for (uint32_t n = 1; n <= 8; n++) {
13701 for (uint32_t m = 1; m <= 4; m++) {
13702 GemmMicrokernelTester()
13703 .mr(4)
13704 .nr(8)
13705 .kr(1)
13706 .sr(4)
13707 .m(m)
13708 .n(n)
13709 .k(k)
13710 .iterations(1)
13711 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13712 }
13713 }
13714 }
13715 }
13716
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_gt_8)13717 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8) {
13718 TEST_REQUIRES_ARM_NEON;
13719 for (uint32_t n = 9; n < 16; n++) {
13720 for (size_t k = 1; k <= 20; k += 5) {
13721 GemmMicrokernelTester()
13722 .mr(4)
13723 .nr(8)
13724 .kr(1)
13725 .sr(4)
13726 .m(4)
13727 .n(n)
13728 .k(k)
13729 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13730 }
13731 }
13732 }
13733
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_gt_8_strided_cn)13734 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8_strided_cn) {
13735 TEST_REQUIRES_ARM_NEON;
13736 for (uint32_t n = 9; n < 16; n++) {
13737 for (size_t k = 1; k <= 20; k += 5) {
13738 GemmMicrokernelTester()
13739 .mr(4)
13740 .nr(8)
13741 .kr(1)
13742 .sr(4)
13743 .m(4)
13744 .n(n)
13745 .k(k)
13746 .cn_stride(11)
13747 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13748 }
13749 }
13750 }
13751
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_gt_8_subtile)13752 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8_subtile) {
13753 TEST_REQUIRES_ARM_NEON;
13754 for (uint32_t n = 9; n < 16; n++) {
13755 for (size_t k = 1; k <= 20; k += 5) {
13756 for (uint32_t m = 1; m <= 4; m++) {
13757 GemmMicrokernelTester()
13758 .mr(4)
13759 .nr(8)
13760 .kr(1)
13761 .sr(4)
13762 .m(m)
13763 .n(n)
13764 .k(k)
13765 .iterations(1)
13766 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13767 }
13768 }
13769 }
13770 }
13771
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_div_8)13772 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8) {
13773 TEST_REQUIRES_ARM_NEON;
13774 for (uint32_t n = 16; n <= 24; n += 8) {
13775 for (size_t k = 1; k <= 20; k += 5) {
13776 GemmMicrokernelTester()
13777 .mr(4)
13778 .nr(8)
13779 .kr(1)
13780 .sr(4)
13781 .m(4)
13782 .n(n)
13783 .k(k)
13784 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13785 }
13786 }
13787 }
13788
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_div_8_strided_cn)13789 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8_strided_cn) {
13790 TEST_REQUIRES_ARM_NEON;
13791 for (uint32_t n = 16; n <= 24; n += 8) {
13792 for (size_t k = 1; k <= 20; k += 5) {
13793 GemmMicrokernelTester()
13794 .mr(4)
13795 .nr(8)
13796 .kr(1)
13797 .sr(4)
13798 .m(4)
13799 .n(n)
13800 .k(k)
13801 .cn_stride(11)
13802 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13803 }
13804 }
13805 }
13806
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_div_8_subtile)13807 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8_subtile) {
13808 TEST_REQUIRES_ARM_NEON;
13809 for (uint32_t n = 16; n <= 24; n += 8) {
13810 for (size_t k = 1; k <= 20; k += 5) {
13811 for (uint32_t m = 1; m <= 4; m++) {
13812 GemmMicrokernelTester()
13813 .mr(4)
13814 .nr(8)
13815 .kr(1)
13816 .sr(4)
13817 .m(m)
13818 .n(n)
13819 .k(k)
13820 .iterations(1)
13821 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13822 }
13823 }
13824 }
13825 }
13826
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,small_kernel)13827 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, small_kernel) {
13828 TEST_REQUIRES_ARM_NEON;
13829 for (size_t k = 1; k <= 20; k += 5) {
13830 GemmMicrokernelTester()
13831 .mr(4)
13832 .nr(8)
13833 .kr(1)
13834 .sr(4)
13835 .m(4)
13836 .n(8)
13837 .k(k)
13838 .ks(3)
13839 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13840 }
13841 }
13842
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,small_kernel_subtile)13843 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, small_kernel_subtile) {
13844 TEST_REQUIRES_ARM_NEON;
13845 for (size_t k = 1; k <= 20; k += 5) {
13846 for (uint32_t n = 1; n <= 8; n++) {
13847 for (uint32_t m = 1; m <= 4; m++) {
13848 GemmMicrokernelTester()
13849 .mr(4)
13850 .nr(8)
13851 .kr(1)
13852 .sr(4)
13853 .m(m)
13854 .n(n)
13855 .k(k)
13856 .ks(3)
13857 .iterations(1)
13858 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13859 }
13860 }
13861 }
13862 }
13863
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_gt_8_small_kernel)13864 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8_small_kernel) {
13865 TEST_REQUIRES_ARM_NEON;
13866 for (uint32_t n = 9; n < 16; n++) {
13867 for (size_t k = 1; k <= 20; k += 5) {
13868 GemmMicrokernelTester()
13869 .mr(4)
13870 .nr(8)
13871 .kr(1)
13872 .sr(4)
13873 .m(4)
13874 .n(n)
13875 .k(k)
13876 .ks(3)
13877 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13878 }
13879 }
13880 }
13881
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,n_div_8_small_kernel)13882 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8_small_kernel) {
13883 TEST_REQUIRES_ARM_NEON;
13884 for (uint32_t n = 16; n <= 24; n += 8) {
13885 for (size_t k = 1; k <= 20; k += 5) {
13886 GemmMicrokernelTester()
13887 .mr(4)
13888 .nr(8)
13889 .kr(1)
13890 .sr(4)
13891 .m(4)
13892 .n(n)
13893 .k(k)
13894 .ks(3)
13895 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13896 }
13897 }
13898 }
13899
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,strided_cm_subtile)13900 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, strided_cm_subtile) {
13901 TEST_REQUIRES_ARM_NEON;
13902 for (size_t k = 1; k <= 20; k += 5) {
13903 for (uint32_t n = 1; n <= 8; n++) {
13904 for (uint32_t m = 1; m <= 4; m++) {
13905 GemmMicrokernelTester()
13906 .mr(4)
13907 .nr(8)
13908 .kr(1)
13909 .sr(4)
13910 .m(m)
13911 .n(n)
13912 .k(k)
13913 .cm_stride(11)
13914 .iterations(1)
13915 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13916 }
13917 }
13918 }
13919 }
13920
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,a_offset)13921 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, a_offset) {
13922 TEST_REQUIRES_ARM_NEON;
13923 for (size_t k = 1; k <= 20; k += 5) {
13924 GemmMicrokernelTester()
13925 .mr(4)
13926 .nr(8)
13927 .kr(1)
13928 .sr(4)
13929 .m(4)
13930 .n(8)
13931 .k(k)
13932 .ks(3)
13933 .a_offset(83)
13934 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13935 }
13936 }
13937
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,zero)13938 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, zero) {
13939 TEST_REQUIRES_ARM_NEON;
13940 for (size_t k = 1; k <= 20; k += 5) {
13941 for (uint32_t mz = 0; mz < 4; mz++) {
13942 GemmMicrokernelTester()
13943 .mr(4)
13944 .nr(8)
13945 .kr(1)
13946 .sr(4)
13947 .m(4)
13948 .n(8)
13949 .k(k)
13950 .ks(3)
13951 .a_offset(83)
13952 .zero_index(mz)
13953 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13954 }
13955 }
13956 }
13957
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,qmin)13958 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, qmin) {
13959 TEST_REQUIRES_ARM_NEON;
13960 GemmMicrokernelTester()
13961 .mr(4)
13962 .nr(8)
13963 .kr(1)
13964 .sr(4)
13965 .m(4)
13966 .n(8)
13967 .k(4)
13968 .qmin(128)
13969 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13970 }
13971
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,qmax)13972 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, qmax) {
13973 TEST_REQUIRES_ARM_NEON;
13974 GemmMicrokernelTester()
13975 .mr(4)
13976 .nr(8)
13977 .kr(1)
13978 .sr(4)
13979 .m(4)
13980 .n(8)
13981 .k(4)
13982 .qmax(128)
13983 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13984 }
13985
TEST(F32_IGEMM_MINMAX_4X8S4__NEON,strided_cm)13986 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, strided_cm) {
13987 TEST_REQUIRES_ARM_NEON;
13988 GemmMicrokernelTester()
13989 .mr(4)
13990 .nr(8)
13991 .kr(1)
13992 .sr(4)
13993 .m(4)
13994 .n(8)
13995 .k(4)
13996 .cm_stride(11)
13997 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13998 }
13999 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14000
14001
14002 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_eq_4)14003 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4) {
14004 TEST_REQUIRES_ARM_NEON_FMA;
14005 GemmMicrokernelTester()
14006 .mr(4)
14007 .nr(8)
14008 .kr(1)
14009 .sr(4)
14010 .m(4)
14011 .n(8)
14012 .k(4)
14013 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14014 }
14015
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,strided_cn)14016 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, strided_cn) {
14017 TEST_REQUIRES_ARM_NEON_FMA;
14018 GemmMicrokernelTester()
14019 .mr(4)
14020 .nr(8)
14021 .kr(1)
14022 .sr(4)
14023 .m(4)
14024 .n(8)
14025 .k(4)
14026 .cn_stride(11)
14027 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14028 }
14029
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_eq_4_subtile)14030 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile) {
14031 TEST_REQUIRES_ARM_NEON_FMA;
14032 for (uint32_t n = 1; n <= 8; n++) {
14033 for (uint32_t m = 1; m <= 4; m++) {
14034 GemmMicrokernelTester()
14035 .mr(4)
14036 .nr(8)
14037 .kr(1)
14038 .sr(4)
14039 .m(m)
14040 .n(n)
14041 .k(4)
14042 .iterations(1)
14043 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14044 }
14045 }
14046 }
14047
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_eq_4_subtile_m)14048 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_m) {
14049 TEST_REQUIRES_ARM_NEON_FMA;
14050 for (uint32_t m = 1; m <= 4; m++) {
14051 GemmMicrokernelTester()
14052 .mr(4)
14053 .nr(8)
14054 .kr(1)
14055 .sr(4)
14056 .m(m)
14057 .n(8)
14058 .k(4)
14059 .iterations(1)
14060 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14061 }
14062 }
14063
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_eq_4_subtile_n)14064 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_n) {
14065 TEST_REQUIRES_ARM_NEON_FMA;
14066 for (uint32_t n = 1; n <= 8; n++) {
14067 GemmMicrokernelTester()
14068 .mr(4)
14069 .nr(8)
14070 .kr(1)
14071 .sr(4)
14072 .m(4)
14073 .n(n)
14074 .k(4)
14075 .iterations(1)
14076 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14077 }
14078 }
14079
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_lt_4)14080 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_lt_4) {
14081 TEST_REQUIRES_ARM_NEON_FMA;
14082 for (size_t k = 1; k < 4; k++) {
14083 GemmMicrokernelTester()
14084 .mr(4)
14085 .nr(8)
14086 .kr(1)
14087 .sr(4)
14088 .m(4)
14089 .n(8)
14090 .k(k)
14091 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14092 }
14093 }
14094
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_lt_4_subtile)14095 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_lt_4_subtile) {
14096 TEST_REQUIRES_ARM_NEON_FMA;
14097 for (size_t k = 1; k < 4; k++) {
14098 for (uint32_t n = 1; n <= 8; n++) {
14099 for (uint32_t m = 1; m <= 4; m++) {
14100 GemmMicrokernelTester()
14101 .mr(4)
14102 .nr(8)
14103 .kr(1)
14104 .sr(4)
14105 .m(m)
14106 .n(n)
14107 .k(k)
14108 .iterations(1)
14109 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14110 }
14111 }
14112 }
14113 }
14114
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_gt_4)14115 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_gt_4) {
14116 TEST_REQUIRES_ARM_NEON_FMA;
14117 for (size_t k = 5; k < 8; k++) {
14118 GemmMicrokernelTester()
14119 .mr(4)
14120 .nr(8)
14121 .kr(1)
14122 .sr(4)
14123 .m(4)
14124 .n(8)
14125 .k(k)
14126 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14127 }
14128 }
14129
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_gt_4_subtile)14130 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_gt_4_subtile) {
14131 TEST_REQUIRES_ARM_NEON_FMA;
14132 for (size_t k = 5; k < 8; k++) {
14133 for (uint32_t n = 1; n <= 8; n++) {
14134 for (uint32_t m = 1; m <= 4; m++) {
14135 GemmMicrokernelTester()
14136 .mr(4)
14137 .nr(8)
14138 .kr(1)
14139 .sr(4)
14140 .m(m)
14141 .n(n)
14142 .k(k)
14143 .iterations(1)
14144 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14145 }
14146 }
14147 }
14148 }
14149
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_div_4)14150 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_div_4) {
14151 TEST_REQUIRES_ARM_NEON_FMA;
14152 for (size_t k = 8; k <= 40; k += 4) {
14153 GemmMicrokernelTester()
14154 .mr(4)
14155 .nr(8)
14156 .kr(1)
14157 .sr(4)
14158 .m(4)
14159 .n(8)
14160 .k(k)
14161 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14162 }
14163 }
14164
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,k_div_4_subtile)14165 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_div_4_subtile) {
14166 TEST_REQUIRES_ARM_NEON_FMA;
14167 for (size_t k = 8; k <= 40; k += 4) {
14168 for (uint32_t n = 1; n <= 8; n++) {
14169 for (uint32_t m = 1; m <= 4; m++) {
14170 GemmMicrokernelTester()
14171 .mr(4)
14172 .nr(8)
14173 .kr(1)
14174 .sr(4)
14175 .m(m)
14176 .n(n)
14177 .k(k)
14178 .iterations(1)
14179 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14180 }
14181 }
14182 }
14183 }
14184
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_gt_8)14185 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8) {
14186 TEST_REQUIRES_ARM_NEON_FMA;
14187 for (uint32_t n = 9; n < 16; n++) {
14188 for (size_t k = 1; k <= 20; k += 5) {
14189 GemmMicrokernelTester()
14190 .mr(4)
14191 .nr(8)
14192 .kr(1)
14193 .sr(4)
14194 .m(4)
14195 .n(n)
14196 .k(k)
14197 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14198 }
14199 }
14200 }
14201
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_gt_8_strided_cn)14202 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8_strided_cn) {
14203 TEST_REQUIRES_ARM_NEON_FMA;
14204 for (uint32_t n = 9; n < 16; n++) {
14205 for (size_t k = 1; k <= 20; k += 5) {
14206 GemmMicrokernelTester()
14207 .mr(4)
14208 .nr(8)
14209 .kr(1)
14210 .sr(4)
14211 .m(4)
14212 .n(n)
14213 .k(k)
14214 .cn_stride(11)
14215 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14216 }
14217 }
14218 }
14219
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_gt_8_subtile)14220 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8_subtile) {
14221 TEST_REQUIRES_ARM_NEON_FMA;
14222 for (uint32_t n = 9; n < 16; n++) {
14223 for (size_t k = 1; k <= 20; k += 5) {
14224 for (uint32_t m = 1; m <= 4; m++) {
14225 GemmMicrokernelTester()
14226 .mr(4)
14227 .nr(8)
14228 .kr(1)
14229 .sr(4)
14230 .m(m)
14231 .n(n)
14232 .k(k)
14233 .iterations(1)
14234 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14235 }
14236 }
14237 }
14238 }
14239
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_div_8)14240 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8) {
14241 TEST_REQUIRES_ARM_NEON_FMA;
14242 for (uint32_t n = 16; n <= 24; n += 8) {
14243 for (size_t k = 1; k <= 20; k += 5) {
14244 GemmMicrokernelTester()
14245 .mr(4)
14246 .nr(8)
14247 .kr(1)
14248 .sr(4)
14249 .m(4)
14250 .n(n)
14251 .k(k)
14252 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14253 }
14254 }
14255 }
14256
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_div_8_strided_cn)14257 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8_strided_cn) {
14258 TEST_REQUIRES_ARM_NEON_FMA;
14259 for (uint32_t n = 16; n <= 24; n += 8) {
14260 for (size_t k = 1; k <= 20; k += 5) {
14261 GemmMicrokernelTester()
14262 .mr(4)
14263 .nr(8)
14264 .kr(1)
14265 .sr(4)
14266 .m(4)
14267 .n(n)
14268 .k(k)
14269 .cn_stride(11)
14270 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14271 }
14272 }
14273 }
14274
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_div_8_subtile)14275 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8_subtile) {
14276 TEST_REQUIRES_ARM_NEON_FMA;
14277 for (uint32_t n = 16; n <= 24; n += 8) {
14278 for (size_t k = 1; k <= 20; k += 5) {
14279 for (uint32_t m = 1; m <= 4; m++) {
14280 GemmMicrokernelTester()
14281 .mr(4)
14282 .nr(8)
14283 .kr(1)
14284 .sr(4)
14285 .m(m)
14286 .n(n)
14287 .k(k)
14288 .iterations(1)
14289 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14290 }
14291 }
14292 }
14293 }
14294
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,small_kernel)14295 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, small_kernel) {
14296 TEST_REQUIRES_ARM_NEON_FMA;
14297 for (size_t k = 1; k <= 20; k += 5) {
14298 GemmMicrokernelTester()
14299 .mr(4)
14300 .nr(8)
14301 .kr(1)
14302 .sr(4)
14303 .m(4)
14304 .n(8)
14305 .k(k)
14306 .ks(3)
14307 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14308 }
14309 }
14310
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,small_kernel_subtile)14311 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, small_kernel_subtile) {
14312 TEST_REQUIRES_ARM_NEON_FMA;
14313 for (size_t k = 1; k <= 20; k += 5) {
14314 for (uint32_t n = 1; n <= 8; n++) {
14315 for (uint32_t m = 1; m <= 4; m++) {
14316 GemmMicrokernelTester()
14317 .mr(4)
14318 .nr(8)
14319 .kr(1)
14320 .sr(4)
14321 .m(m)
14322 .n(n)
14323 .k(k)
14324 .ks(3)
14325 .iterations(1)
14326 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14327 }
14328 }
14329 }
14330 }
14331
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_gt_8_small_kernel)14332 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8_small_kernel) {
14333 TEST_REQUIRES_ARM_NEON_FMA;
14334 for (uint32_t n = 9; n < 16; n++) {
14335 for (size_t k = 1; k <= 20; k += 5) {
14336 GemmMicrokernelTester()
14337 .mr(4)
14338 .nr(8)
14339 .kr(1)
14340 .sr(4)
14341 .m(4)
14342 .n(n)
14343 .k(k)
14344 .ks(3)
14345 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14346 }
14347 }
14348 }
14349
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,n_div_8_small_kernel)14350 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8_small_kernel) {
14351 TEST_REQUIRES_ARM_NEON_FMA;
14352 for (uint32_t n = 16; n <= 24; n += 8) {
14353 for (size_t k = 1; k <= 20; k += 5) {
14354 GemmMicrokernelTester()
14355 .mr(4)
14356 .nr(8)
14357 .kr(1)
14358 .sr(4)
14359 .m(4)
14360 .n(n)
14361 .k(k)
14362 .ks(3)
14363 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14364 }
14365 }
14366 }
14367
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,strided_cm_subtile)14368 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, strided_cm_subtile) {
14369 TEST_REQUIRES_ARM_NEON_FMA;
14370 for (size_t k = 1; k <= 20; k += 5) {
14371 for (uint32_t n = 1; n <= 8; n++) {
14372 for (uint32_t m = 1; m <= 4; m++) {
14373 GemmMicrokernelTester()
14374 .mr(4)
14375 .nr(8)
14376 .kr(1)
14377 .sr(4)
14378 .m(m)
14379 .n(n)
14380 .k(k)
14381 .cm_stride(11)
14382 .iterations(1)
14383 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14384 }
14385 }
14386 }
14387 }
14388
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,a_offset)14389 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, a_offset) {
14390 TEST_REQUIRES_ARM_NEON_FMA;
14391 for (size_t k = 1; k <= 20; k += 5) {
14392 GemmMicrokernelTester()
14393 .mr(4)
14394 .nr(8)
14395 .kr(1)
14396 .sr(4)
14397 .m(4)
14398 .n(8)
14399 .k(k)
14400 .ks(3)
14401 .a_offset(83)
14402 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14403 }
14404 }
14405
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,zero)14406 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, zero) {
14407 TEST_REQUIRES_ARM_NEON_FMA;
14408 for (size_t k = 1; k <= 20; k += 5) {
14409 for (uint32_t mz = 0; mz < 4; mz++) {
14410 GemmMicrokernelTester()
14411 .mr(4)
14412 .nr(8)
14413 .kr(1)
14414 .sr(4)
14415 .m(4)
14416 .n(8)
14417 .k(k)
14418 .ks(3)
14419 .a_offset(83)
14420 .zero_index(mz)
14421 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14422 }
14423 }
14424 }
14425
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,qmin)14426 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, qmin) {
14427 TEST_REQUIRES_ARM_NEON_FMA;
14428 GemmMicrokernelTester()
14429 .mr(4)
14430 .nr(8)
14431 .kr(1)
14432 .sr(4)
14433 .m(4)
14434 .n(8)
14435 .k(4)
14436 .qmin(128)
14437 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14438 }
14439
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,qmax)14440 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, qmax) {
14441 TEST_REQUIRES_ARM_NEON_FMA;
14442 GemmMicrokernelTester()
14443 .mr(4)
14444 .nr(8)
14445 .kr(1)
14446 .sr(4)
14447 .m(4)
14448 .n(8)
14449 .k(4)
14450 .qmax(128)
14451 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14452 }
14453
TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA,strided_cm)14454 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, strided_cm) {
14455 TEST_REQUIRES_ARM_NEON_FMA;
14456 GemmMicrokernelTester()
14457 .mr(4)
14458 .nr(8)
14459 .kr(1)
14460 .sr(4)
14461 .m(4)
14462 .n(8)
14463 .k(4)
14464 .cm_stride(11)
14465 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14466 }
14467 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14468
14469
14470 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_eq_2)14471 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_eq_2) {
14472 TEST_REQUIRES_ARM_NEON;
14473 GemmMicrokernelTester()
14474 .mr(6)
14475 .nr(2)
14476 .kr(1)
14477 .sr(1)
14478 .m(6)
14479 .n(2)
14480 .k(2)
14481 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14482 }
14483
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,strided_cn)14484 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, strided_cn) {
14485 TEST_REQUIRES_ARM_NEON;
14486 GemmMicrokernelTester()
14487 .mr(6)
14488 .nr(2)
14489 .kr(1)
14490 .sr(1)
14491 .m(6)
14492 .n(2)
14493 .k(2)
14494 .cn_stride(5)
14495 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14496 }
14497
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_eq_2_subtile)14498 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_eq_2_subtile) {
14499 TEST_REQUIRES_ARM_NEON;
14500 for (uint32_t n = 1; n <= 2; n++) {
14501 for (uint32_t m = 1; m <= 6; m++) {
14502 GemmMicrokernelTester()
14503 .mr(6)
14504 .nr(2)
14505 .kr(1)
14506 .sr(1)
14507 .m(m)
14508 .n(n)
14509 .k(2)
14510 .iterations(1)
14511 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14512 }
14513 }
14514 }
14515
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_eq_2_subtile_m)14516 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_eq_2_subtile_m) {
14517 TEST_REQUIRES_ARM_NEON;
14518 for (uint32_t m = 1; m <= 6; m++) {
14519 GemmMicrokernelTester()
14520 .mr(6)
14521 .nr(2)
14522 .kr(1)
14523 .sr(1)
14524 .m(m)
14525 .n(2)
14526 .k(2)
14527 .iterations(1)
14528 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14529 }
14530 }
14531
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_eq_2_subtile_n)14532 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_eq_2_subtile_n) {
14533 TEST_REQUIRES_ARM_NEON;
14534 for (uint32_t n = 1; n <= 2; n++) {
14535 GemmMicrokernelTester()
14536 .mr(6)
14537 .nr(2)
14538 .kr(1)
14539 .sr(1)
14540 .m(6)
14541 .n(n)
14542 .k(2)
14543 .iterations(1)
14544 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14545 }
14546 }
14547
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_lt_2)14548 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_lt_2) {
14549 TEST_REQUIRES_ARM_NEON;
14550 for (size_t k = 1; k < 2; k++) {
14551 GemmMicrokernelTester()
14552 .mr(6)
14553 .nr(2)
14554 .kr(1)
14555 .sr(1)
14556 .m(6)
14557 .n(2)
14558 .k(k)
14559 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14560 }
14561 }
14562
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_lt_2_subtile)14563 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_lt_2_subtile) {
14564 TEST_REQUIRES_ARM_NEON;
14565 for (size_t k = 1; k < 2; k++) {
14566 for (uint32_t n = 1; n <= 2; n++) {
14567 for (uint32_t m = 1; m <= 6; m++) {
14568 GemmMicrokernelTester()
14569 .mr(6)
14570 .nr(2)
14571 .kr(1)
14572 .sr(1)
14573 .m(m)
14574 .n(n)
14575 .k(k)
14576 .iterations(1)
14577 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14578 }
14579 }
14580 }
14581 }
14582
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_gt_2)14583 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_gt_2) {
14584 TEST_REQUIRES_ARM_NEON;
14585 for (size_t k = 3; k < 4; k++) {
14586 GemmMicrokernelTester()
14587 .mr(6)
14588 .nr(2)
14589 .kr(1)
14590 .sr(1)
14591 .m(6)
14592 .n(2)
14593 .k(k)
14594 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14595 }
14596 }
14597
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_gt_2_subtile)14598 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_gt_2_subtile) {
14599 TEST_REQUIRES_ARM_NEON;
14600 for (size_t k = 3; k < 4; k++) {
14601 for (uint32_t n = 1; n <= 2; n++) {
14602 for (uint32_t m = 1; m <= 6; m++) {
14603 GemmMicrokernelTester()
14604 .mr(6)
14605 .nr(2)
14606 .kr(1)
14607 .sr(1)
14608 .m(m)
14609 .n(n)
14610 .k(k)
14611 .iterations(1)
14612 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14613 }
14614 }
14615 }
14616 }
14617
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_div_2)14618 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_div_2) {
14619 TEST_REQUIRES_ARM_NEON;
14620 for (size_t k = 4; k <= 20; k += 2) {
14621 GemmMicrokernelTester()
14622 .mr(6)
14623 .nr(2)
14624 .kr(1)
14625 .sr(1)
14626 .m(6)
14627 .n(2)
14628 .k(k)
14629 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14630 }
14631 }
14632
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,k_div_2_subtile)14633 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, k_div_2_subtile) {
14634 TEST_REQUIRES_ARM_NEON;
14635 for (size_t k = 4; k <= 20; k += 2) {
14636 for (uint32_t n = 1; n <= 2; n++) {
14637 for (uint32_t m = 1; m <= 6; m++) {
14638 GemmMicrokernelTester()
14639 .mr(6)
14640 .nr(2)
14641 .kr(1)
14642 .sr(1)
14643 .m(m)
14644 .n(n)
14645 .k(k)
14646 .iterations(1)
14647 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14648 }
14649 }
14650 }
14651 }
14652
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_gt_2)14653 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_gt_2) {
14654 TEST_REQUIRES_ARM_NEON;
14655 for (uint32_t n = 3; n < 4; n++) {
14656 for (size_t k = 1; k <= 10; k += 3) {
14657 GemmMicrokernelTester()
14658 .mr(6)
14659 .nr(2)
14660 .kr(1)
14661 .sr(1)
14662 .m(6)
14663 .n(n)
14664 .k(k)
14665 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14666 }
14667 }
14668 }
14669
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_gt_2_strided_cn)14670 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_gt_2_strided_cn) {
14671 TEST_REQUIRES_ARM_NEON;
14672 for (uint32_t n = 3; n < 4; n++) {
14673 for (size_t k = 1; k <= 10; k += 3) {
14674 GemmMicrokernelTester()
14675 .mr(6)
14676 .nr(2)
14677 .kr(1)
14678 .sr(1)
14679 .m(6)
14680 .n(n)
14681 .k(k)
14682 .cn_stride(5)
14683 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14684 }
14685 }
14686 }
14687
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_gt_2_subtile)14688 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_gt_2_subtile) {
14689 TEST_REQUIRES_ARM_NEON;
14690 for (uint32_t n = 3; n < 4; n++) {
14691 for (size_t k = 1; k <= 10; k += 3) {
14692 for (uint32_t m = 1; m <= 6; m++) {
14693 GemmMicrokernelTester()
14694 .mr(6)
14695 .nr(2)
14696 .kr(1)
14697 .sr(1)
14698 .m(m)
14699 .n(n)
14700 .k(k)
14701 .iterations(1)
14702 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14703 }
14704 }
14705 }
14706 }
14707
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_div_2)14708 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_div_2) {
14709 TEST_REQUIRES_ARM_NEON;
14710 for (uint32_t n = 4; n <= 6; n += 2) {
14711 for (size_t k = 1; k <= 10; k += 3) {
14712 GemmMicrokernelTester()
14713 .mr(6)
14714 .nr(2)
14715 .kr(1)
14716 .sr(1)
14717 .m(6)
14718 .n(n)
14719 .k(k)
14720 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14721 }
14722 }
14723 }
14724
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_div_2_strided_cn)14725 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_div_2_strided_cn) {
14726 TEST_REQUIRES_ARM_NEON;
14727 for (uint32_t n = 4; n <= 6; n += 2) {
14728 for (size_t k = 1; k <= 10; k += 3) {
14729 GemmMicrokernelTester()
14730 .mr(6)
14731 .nr(2)
14732 .kr(1)
14733 .sr(1)
14734 .m(6)
14735 .n(n)
14736 .k(k)
14737 .cn_stride(5)
14738 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14739 }
14740 }
14741 }
14742
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_div_2_subtile)14743 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_div_2_subtile) {
14744 TEST_REQUIRES_ARM_NEON;
14745 for (uint32_t n = 4; n <= 6; n += 2) {
14746 for (size_t k = 1; k <= 10; k += 3) {
14747 for (uint32_t m = 1; m <= 6; m++) {
14748 GemmMicrokernelTester()
14749 .mr(6)
14750 .nr(2)
14751 .kr(1)
14752 .sr(1)
14753 .m(m)
14754 .n(n)
14755 .k(k)
14756 .iterations(1)
14757 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14758 }
14759 }
14760 }
14761 }
14762
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,small_kernel)14763 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, small_kernel) {
14764 TEST_REQUIRES_ARM_NEON;
14765 for (size_t k = 1; k <= 10; k += 3) {
14766 GemmMicrokernelTester()
14767 .mr(6)
14768 .nr(2)
14769 .kr(1)
14770 .sr(1)
14771 .m(6)
14772 .n(2)
14773 .k(k)
14774 .ks(3)
14775 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14776 }
14777 }
14778
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,small_kernel_subtile)14779 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, small_kernel_subtile) {
14780 TEST_REQUIRES_ARM_NEON;
14781 for (size_t k = 1; k <= 10; k += 3) {
14782 for (uint32_t n = 1; n <= 2; n++) {
14783 for (uint32_t m = 1; m <= 6; m++) {
14784 GemmMicrokernelTester()
14785 .mr(6)
14786 .nr(2)
14787 .kr(1)
14788 .sr(1)
14789 .m(m)
14790 .n(n)
14791 .k(k)
14792 .ks(3)
14793 .iterations(1)
14794 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14795 }
14796 }
14797 }
14798 }
14799
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_gt_2_small_kernel)14800 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_gt_2_small_kernel) {
14801 TEST_REQUIRES_ARM_NEON;
14802 for (uint32_t n = 3; n < 4; n++) {
14803 for (size_t k = 1; k <= 10; k += 3) {
14804 GemmMicrokernelTester()
14805 .mr(6)
14806 .nr(2)
14807 .kr(1)
14808 .sr(1)
14809 .m(6)
14810 .n(n)
14811 .k(k)
14812 .ks(3)
14813 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14814 }
14815 }
14816 }
14817
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,n_div_2_small_kernel)14818 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, n_div_2_small_kernel) {
14819 TEST_REQUIRES_ARM_NEON;
14820 for (uint32_t n = 4; n <= 6; n += 2) {
14821 for (size_t k = 1; k <= 10; k += 3) {
14822 GemmMicrokernelTester()
14823 .mr(6)
14824 .nr(2)
14825 .kr(1)
14826 .sr(1)
14827 .m(6)
14828 .n(n)
14829 .k(k)
14830 .ks(3)
14831 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14832 }
14833 }
14834 }
14835
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,strided_cm_subtile)14836 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, strided_cm_subtile) {
14837 TEST_REQUIRES_ARM_NEON;
14838 for (size_t k = 1; k <= 10; k += 3) {
14839 for (uint32_t n = 1; n <= 2; n++) {
14840 for (uint32_t m = 1; m <= 6; m++) {
14841 GemmMicrokernelTester()
14842 .mr(6)
14843 .nr(2)
14844 .kr(1)
14845 .sr(1)
14846 .m(m)
14847 .n(n)
14848 .k(k)
14849 .cm_stride(5)
14850 .iterations(1)
14851 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14852 }
14853 }
14854 }
14855 }
14856
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,a_offset)14857 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, a_offset) {
14858 TEST_REQUIRES_ARM_NEON;
14859 for (size_t k = 1; k <= 10; k += 3) {
14860 GemmMicrokernelTester()
14861 .mr(6)
14862 .nr(2)
14863 .kr(1)
14864 .sr(1)
14865 .m(6)
14866 .n(2)
14867 .k(k)
14868 .ks(3)
14869 .a_offset(67)
14870 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14871 }
14872 }
14873
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,zero)14874 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, zero) {
14875 TEST_REQUIRES_ARM_NEON;
14876 for (size_t k = 1; k <= 10; k += 3) {
14877 for (uint32_t mz = 0; mz < 6; mz++) {
14878 GemmMicrokernelTester()
14879 .mr(6)
14880 .nr(2)
14881 .kr(1)
14882 .sr(1)
14883 .m(6)
14884 .n(2)
14885 .k(k)
14886 .ks(3)
14887 .a_offset(67)
14888 .zero_index(mz)
14889 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14890 }
14891 }
14892 }
14893
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,qmin)14894 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, qmin) {
14895 TEST_REQUIRES_ARM_NEON;
14896 GemmMicrokernelTester()
14897 .mr(6)
14898 .nr(2)
14899 .kr(1)
14900 .sr(1)
14901 .m(6)
14902 .n(2)
14903 .k(2)
14904 .qmin(128)
14905 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14906 }
14907
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,qmax)14908 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, qmax) {
14909 TEST_REQUIRES_ARM_NEON;
14910 GemmMicrokernelTester()
14911 .mr(6)
14912 .nr(2)
14913 .kr(1)
14914 .sr(1)
14915 .m(6)
14916 .n(2)
14917 .k(2)
14918 .qmax(128)
14919 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14920 }
14921
TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64,strided_cm)14922 TEST(F32_IGEMM_MINMAX_6X2__NEON_LANE_LD64, strided_cm) {
14923 TEST_REQUIRES_ARM_NEON;
14924 GemmMicrokernelTester()
14925 .mr(6)
14926 .nr(2)
14927 .kr(1)
14928 .sr(1)
14929 .m(6)
14930 .n(2)
14931 .k(2)
14932 .cm_stride(5)
14933 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
14934 }
14935 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14936
14937
14938 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_eq_2)14939 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_eq_2) {
14940 TEST_REQUIRES_ARM_NEON_FMA;
14941 GemmMicrokernelTester()
14942 .mr(6)
14943 .nr(2)
14944 .kr(1)
14945 .sr(1)
14946 .m(6)
14947 .n(2)
14948 .k(2)
14949 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
14950 }
14951
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,strided_cn)14952 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, strided_cn) {
14953 TEST_REQUIRES_ARM_NEON_FMA;
14954 GemmMicrokernelTester()
14955 .mr(6)
14956 .nr(2)
14957 .kr(1)
14958 .sr(1)
14959 .m(6)
14960 .n(2)
14961 .k(2)
14962 .cn_stride(5)
14963 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
14964 }
14965
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_eq_2_subtile)14966 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_eq_2_subtile) {
14967 TEST_REQUIRES_ARM_NEON_FMA;
14968 for (uint32_t n = 1; n <= 2; n++) {
14969 for (uint32_t m = 1; m <= 6; m++) {
14970 GemmMicrokernelTester()
14971 .mr(6)
14972 .nr(2)
14973 .kr(1)
14974 .sr(1)
14975 .m(m)
14976 .n(n)
14977 .k(2)
14978 .iterations(1)
14979 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
14980 }
14981 }
14982 }
14983
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_eq_2_subtile_m)14984 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
14985 TEST_REQUIRES_ARM_NEON_FMA;
14986 for (uint32_t m = 1; m <= 6; m++) {
14987 GemmMicrokernelTester()
14988 .mr(6)
14989 .nr(2)
14990 .kr(1)
14991 .sr(1)
14992 .m(m)
14993 .n(2)
14994 .k(2)
14995 .iterations(1)
14996 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
14997 }
14998 }
14999
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_eq_2_subtile_n)15000 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
15001 TEST_REQUIRES_ARM_NEON_FMA;
15002 for (uint32_t n = 1; n <= 2; n++) {
15003 GemmMicrokernelTester()
15004 .mr(6)
15005 .nr(2)
15006 .kr(1)
15007 .sr(1)
15008 .m(6)
15009 .n(n)
15010 .k(2)
15011 .iterations(1)
15012 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15013 }
15014 }
15015
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_lt_2)15016 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_lt_2) {
15017 TEST_REQUIRES_ARM_NEON_FMA;
15018 for (size_t k = 1; k < 2; k++) {
15019 GemmMicrokernelTester()
15020 .mr(6)
15021 .nr(2)
15022 .kr(1)
15023 .sr(1)
15024 .m(6)
15025 .n(2)
15026 .k(k)
15027 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15028 }
15029 }
15030
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_lt_2_subtile)15031 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_lt_2_subtile) {
15032 TEST_REQUIRES_ARM_NEON_FMA;
15033 for (size_t k = 1; k < 2; k++) {
15034 for (uint32_t n = 1; n <= 2; n++) {
15035 for (uint32_t m = 1; m <= 6; m++) {
15036 GemmMicrokernelTester()
15037 .mr(6)
15038 .nr(2)
15039 .kr(1)
15040 .sr(1)
15041 .m(m)
15042 .n(n)
15043 .k(k)
15044 .iterations(1)
15045 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15046 }
15047 }
15048 }
15049 }
15050
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_gt_2)15051 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_gt_2) {
15052 TEST_REQUIRES_ARM_NEON_FMA;
15053 for (size_t k = 3; k < 4; k++) {
15054 GemmMicrokernelTester()
15055 .mr(6)
15056 .nr(2)
15057 .kr(1)
15058 .sr(1)
15059 .m(6)
15060 .n(2)
15061 .k(k)
15062 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15063 }
15064 }
15065
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_gt_2_subtile)15066 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_gt_2_subtile) {
15067 TEST_REQUIRES_ARM_NEON_FMA;
15068 for (size_t k = 3; k < 4; k++) {
15069 for (uint32_t n = 1; n <= 2; n++) {
15070 for (uint32_t m = 1; m <= 6; m++) {
15071 GemmMicrokernelTester()
15072 .mr(6)
15073 .nr(2)
15074 .kr(1)
15075 .sr(1)
15076 .m(m)
15077 .n(n)
15078 .k(k)
15079 .iterations(1)
15080 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15081 }
15082 }
15083 }
15084 }
15085
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_div_2)15086 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_div_2) {
15087 TEST_REQUIRES_ARM_NEON_FMA;
15088 for (size_t k = 4; k <= 20; k += 2) {
15089 GemmMicrokernelTester()
15090 .mr(6)
15091 .nr(2)
15092 .kr(1)
15093 .sr(1)
15094 .m(6)
15095 .n(2)
15096 .k(k)
15097 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15098 }
15099 }
15100
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,k_div_2_subtile)15101 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, k_div_2_subtile) {
15102 TEST_REQUIRES_ARM_NEON_FMA;
15103 for (size_t k = 4; k <= 20; k += 2) {
15104 for (uint32_t n = 1; n <= 2; n++) {
15105 for (uint32_t m = 1; m <= 6; m++) {
15106 GemmMicrokernelTester()
15107 .mr(6)
15108 .nr(2)
15109 .kr(1)
15110 .sr(1)
15111 .m(m)
15112 .n(n)
15113 .k(k)
15114 .iterations(1)
15115 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15116 }
15117 }
15118 }
15119 }
15120
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_gt_2)15121 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_gt_2) {
15122 TEST_REQUIRES_ARM_NEON_FMA;
15123 for (uint32_t n = 3; n < 4; n++) {
15124 for (size_t k = 1; k <= 10; k += 3) {
15125 GemmMicrokernelTester()
15126 .mr(6)
15127 .nr(2)
15128 .kr(1)
15129 .sr(1)
15130 .m(6)
15131 .n(n)
15132 .k(k)
15133 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15134 }
15135 }
15136 }
15137
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_gt_2_strided_cn)15138 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_gt_2_strided_cn) {
15139 TEST_REQUIRES_ARM_NEON_FMA;
15140 for (uint32_t n = 3; n < 4; n++) {
15141 for (size_t k = 1; k <= 10; k += 3) {
15142 GemmMicrokernelTester()
15143 .mr(6)
15144 .nr(2)
15145 .kr(1)
15146 .sr(1)
15147 .m(6)
15148 .n(n)
15149 .k(k)
15150 .cn_stride(5)
15151 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15152 }
15153 }
15154 }
15155
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_gt_2_subtile)15156 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_gt_2_subtile) {
15157 TEST_REQUIRES_ARM_NEON_FMA;
15158 for (uint32_t n = 3; n < 4; n++) {
15159 for (size_t k = 1; k <= 10; k += 3) {
15160 for (uint32_t m = 1; m <= 6; m++) {
15161 GemmMicrokernelTester()
15162 .mr(6)
15163 .nr(2)
15164 .kr(1)
15165 .sr(1)
15166 .m(m)
15167 .n(n)
15168 .k(k)
15169 .iterations(1)
15170 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15171 }
15172 }
15173 }
15174 }
15175
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_div_2)15176 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_div_2) {
15177 TEST_REQUIRES_ARM_NEON_FMA;
15178 for (uint32_t n = 4; n <= 6; n += 2) {
15179 for (size_t k = 1; k <= 10; k += 3) {
15180 GemmMicrokernelTester()
15181 .mr(6)
15182 .nr(2)
15183 .kr(1)
15184 .sr(1)
15185 .m(6)
15186 .n(n)
15187 .k(k)
15188 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15189 }
15190 }
15191 }
15192
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_div_2_strided_cn)15193 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_div_2_strided_cn) {
15194 TEST_REQUIRES_ARM_NEON_FMA;
15195 for (uint32_t n = 4; n <= 6; n += 2) {
15196 for (size_t k = 1; k <= 10; k += 3) {
15197 GemmMicrokernelTester()
15198 .mr(6)
15199 .nr(2)
15200 .kr(1)
15201 .sr(1)
15202 .m(6)
15203 .n(n)
15204 .k(k)
15205 .cn_stride(5)
15206 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15207 }
15208 }
15209 }
15210
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_div_2_subtile)15211 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_div_2_subtile) {
15212 TEST_REQUIRES_ARM_NEON_FMA;
15213 for (uint32_t n = 4; n <= 6; n += 2) {
15214 for (size_t k = 1; k <= 10; k += 3) {
15215 for (uint32_t m = 1; m <= 6; m++) {
15216 GemmMicrokernelTester()
15217 .mr(6)
15218 .nr(2)
15219 .kr(1)
15220 .sr(1)
15221 .m(m)
15222 .n(n)
15223 .k(k)
15224 .iterations(1)
15225 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15226 }
15227 }
15228 }
15229 }
15230
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,small_kernel)15231 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, small_kernel) {
15232 TEST_REQUIRES_ARM_NEON_FMA;
15233 for (size_t k = 1; k <= 10; k += 3) {
15234 GemmMicrokernelTester()
15235 .mr(6)
15236 .nr(2)
15237 .kr(1)
15238 .sr(1)
15239 .m(6)
15240 .n(2)
15241 .k(k)
15242 .ks(3)
15243 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15244 }
15245 }
15246
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,small_kernel_subtile)15247 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, small_kernel_subtile) {
15248 TEST_REQUIRES_ARM_NEON_FMA;
15249 for (size_t k = 1; k <= 10; k += 3) {
15250 for (uint32_t n = 1; n <= 2; n++) {
15251 for (uint32_t m = 1; m <= 6; m++) {
15252 GemmMicrokernelTester()
15253 .mr(6)
15254 .nr(2)
15255 .kr(1)
15256 .sr(1)
15257 .m(m)
15258 .n(n)
15259 .k(k)
15260 .ks(3)
15261 .iterations(1)
15262 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15263 }
15264 }
15265 }
15266 }
15267
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_gt_2_small_kernel)15268 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_gt_2_small_kernel) {
15269 TEST_REQUIRES_ARM_NEON_FMA;
15270 for (uint32_t n = 3; n < 4; n++) {
15271 for (size_t k = 1; k <= 10; k += 3) {
15272 GemmMicrokernelTester()
15273 .mr(6)
15274 .nr(2)
15275 .kr(1)
15276 .sr(1)
15277 .m(6)
15278 .n(n)
15279 .k(k)
15280 .ks(3)
15281 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15282 }
15283 }
15284 }
15285
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,n_div_2_small_kernel)15286 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, n_div_2_small_kernel) {
15287 TEST_REQUIRES_ARM_NEON_FMA;
15288 for (uint32_t n = 4; n <= 6; n += 2) {
15289 for (size_t k = 1; k <= 10; k += 3) {
15290 GemmMicrokernelTester()
15291 .mr(6)
15292 .nr(2)
15293 .kr(1)
15294 .sr(1)
15295 .m(6)
15296 .n(n)
15297 .k(k)
15298 .ks(3)
15299 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15300 }
15301 }
15302 }
15303
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,strided_cm_subtile)15304 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, strided_cm_subtile) {
15305 TEST_REQUIRES_ARM_NEON_FMA;
15306 for (size_t k = 1; k <= 10; k += 3) {
15307 for (uint32_t n = 1; n <= 2; n++) {
15308 for (uint32_t m = 1; m <= 6; m++) {
15309 GemmMicrokernelTester()
15310 .mr(6)
15311 .nr(2)
15312 .kr(1)
15313 .sr(1)
15314 .m(m)
15315 .n(n)
15316 .k(k)
15317 .cm_stride(5)
15318 .iterations(1)
15319 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15320 }
15321 }
15322 }
15323 }
15324
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,a_offset)15325 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, a_offset) {
15326 TEST_REQUIRES_ARM_NEON_FMA;
15327 for (size_t k = 1; k <= 10; k += 3) {
15328 GemmMicrokernelTester()
15329 .mr(6)
15330 .nr(2)
15331 .kr(1)
15332 .sr(1)
15333 .m(6)
15334 .n(2)
15335 .k(k)
15336 .ks(3)
15337 .a_offset(67)
15338 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15339 }
15340 }
15341
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,zero)15342 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, zero) {
15343 TEST_REQUIRES_ARM_NEON_FMA;
15344 for (size_t k = 1; k <= 10; k += 3) {
15345 for (uint32_t mz = 0; mz < 6; mz++) {
15346 GemmMicrokernelTester()
15347 .mr(6)
15348 .nr(2)
15349 .kr(1)
15350 .sr(1)
15351 .m(6)
15352 .n(2)
15353 .k(k)
15354 .ks(3)
15355 .a_offset(67)
15356 .zero_index(mz)
15357 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15358 }
15359 }
15360 }
15361
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,qmin)15362 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, qmin) {
15363 TEST_REQUIRES_ARM_NEON_FMA;
15364 GemmMicrokernelTester()
15365 .mr(6)
15366 .nr(2)
15367 .kr(1)
15368 .sr(1)
15369 .m(6)
15370 .n(2)
15371 .k(2)
15372 .qmin(128)
15373 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15374 }
15375
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,qmax)15376 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, qmax) {
15377 TEST_REQUIRES_ARM_NEON_FMA;
15378 GemmMicrokernelTester()
15379 .mr(6)
15380 .nr(2)
15381 .kr(1)
15382 .sr(1)
15383 .m(6)
15384 .n(2)
15385 .k(2)
15386 .qmax(128)
15387 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15388 }
15389
TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64,strided_cm)15390 TEST(F32_IGEMM_MINMAX_6X2__NEONFMA_LANE_LD64, strided_cm) {
15391 TEST_REQUIRES_ARM_NEON_FMA;
15392 GemmMicrokernelTester()
15393 .mr(6)
15394 .nr(2)
15395 .kr(1)
15396 .sr(1)
15397 .m(6)
15398 .n(2)
15399 .k(2)
15400 .cm_stride(5)
15401 .Test(xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
15402 }
15403 #endif // XNN_ARCH_ARM64
15404
15405
15406 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_eq_4)15407 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4) {
15408 TEST_REQUIRES_ARM_NEON;
15409 GemmMicrokernelTester()
15410 .mr(6)
15411 .nr(8)
15412 .kr(1)
15413 .sr(1)
15414 .m(6)
15415 .n(8)
15416 .k(4)
15417 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15418 }
15419
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,strided_cn)15420 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cn) {
15421 TEST_REQUIRES_ARM_NEON;
15422 GemmMicrokernelTester()
15423 .mr(6)
15424 .nr(8)
15425 .kr(1)
15426 .sr(1)
15427 .m(6)
15428 .n(8)
15429 .k(4)
15430 .cn_stride(11)
15431 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15432 }
15433
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_eq_4_subtile)15434 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile) {
15435 TEST_REQUIRES_ARM_NEON;
15436 for (uint32_t n = 1; n <= 8; n++) {
15437 for (uint32_t m = 1; m <= 6; m++) {
15438 GemmMicrokernelTester()
15439 .mr(6)
15440 .nr(8)
15441 .kr(1)
15442 .sr(1)
15443 .m(m)
15444 .n(n)
15445 .k(4)
15446 .iterations(1)
15447 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15448 }
15449 }
15450 }
15451
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_eq_4_subtile_m)15452 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
15453 TEST_REQUIRES_ARM_NEON;
15454 for (uint32_t m = 1; m <= 6; m++) {
15455 GemmMicrokernelTester()
15456 .mr(6)
15457 .nr(8)
15458 .kr(1)
15459 .sr(1)
15460 .m(m)
15461 .n(8)
15462 .k(4)
15463 .iterations(1)
15464 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15465 }
15466 }
15467
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_eq_4_subtile_n)15468 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
15469 TEST_REQUIRES_ARM_NEON;
15470 for (uint32_t n = 1; n <= 8; n++) {
15471 GemmMicrokernelTester()
15472 .mr(6)
15473 .nr(8)
15474 .kr(1)
15475 .sr(1)
15476 .m(6)
15477 .n(n)
15478 .k(4)
15479 .iterations(1)
15480 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15481 }
15482 }
15483
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_lt_4)15484 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_lt_4) {
15485 TEST_REQUIRES_ARM_NEON;
15486 for (size_t k = 1; k < 4; k++) {
15487 GemmMicrokernelTester()
15488 .mr(6)
15489 .nr(8)
15490 .kr(1)
15491 .sr(1)
15492 .m(6)
15493 .n(8)
15494 .k(k)
15495 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15496 }
15497 }
15498
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_lt_4_subtile)15499 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_subtile) {
15500 TEST_REQUIRES_ARM_NEON;
15501 for (size_t k = 1; k < 4; k++) {
15502 for (uint32_t n = 1; n <= 8; n++) {
15503 for (uint32_t m = 1; m <= 6; m++) {
15504 GemmMicrokernelTester()
15505 .mr(6)
15506 .nr(8)
15507 .kr(1)
15508 .sr(1)
15509 .m(m)
15510 .n(n)
15511 .k(k)
15512 .iterations(1)
15513 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15514 }
15515 }
15516 }
15517 }
15518
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_gt_4)15519 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_gt_4) {
15520 TEST_REQUIRES_ARM_NEON;
15521 for (size_t k = 5; k < 8; k++) {
15522 GemmMicrokernelTester()
15523 .mr(6)
15524 .nr(8)
15525 .kr(1)
15526 .sr(1)
15527 .m(6)
15528 .n(8)
15529 .k(k)
15530 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15531 }
15532 }
15533
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_gt_4_subtile)15534 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_subtile) {
15535 TEST_REQUIRES_ARM_NEON;
15536 for (size_t k = 5; k < 8; k++) {
15537 for (uint32_t n = 1; n <= 8; n++) {
15538 for (uint32_t m = 1; m <= 6; m++) {
15539 GemmMicrokernelTester()
15540 .mr(6)
15541 .nr(8)
15542 .kr(1)
15543 .sr(1)
15544 .m(m)
15545 .n(n)
15546 .k(k)
15547 .iterations(1)
15548 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15549 }
15550 }
15551 }
15552 }
15553
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_div_4)15554 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_div_4) {
15555 TEST_REQUIRES_ARM_NEON;
15556 for (size_t k = 8; k <= 40; k += 4) {
15557 GemmMicrokernelTester()
15558 .mr(6)
15559 .nr(8)
15560 .kr(1)
15561 .sr(1)
15562 .m(6)
15563 .n(8)
15564 .k(k)
15565 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15566 }
15567 }
15568
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,k_div_4_subtile)15569 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_div_4_subtile) {
15570 TEST_REQUIRES_ARM_NEON;
15571 for (size_t k = 8; k <= 40; k += 4) {
15572 for (uint32_t n = 1; n <= 8; n++) {
15573 for (uint32_t m = 1; m <= 6; m++) {
15574 GemmMicrokernelTester()
15575 .mr(6)
15576 .nr(8)
15577 .kr(1)
15578 .sr(1)
15579 .m(m)
15580 .n(n)
15581 .k(k)
15582 .iterations(1)
15583 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15584 }
15585 }
15586 }
15587 }
15588
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_gt_8)15589 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8) {
15590 TEST_REQUIRES_ARM_NEON;
15591 for (uint32_t n = 9; n < 16; n++) {
15592 for (size_t k = 1; k <= 20; k += 5) {
15593 GemmMicrokernelTester()
15594 .mr(6)
15595 .nr(8)
15596 .kr(1)
15597 .sr(1)
15598 .m(6)
15599 .n(n)
15600 .k(k)
15601 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15602 }
15603 }
15604 }
15605
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_gt_8_strided_cn)15606 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
15607 TEST_REQUIRES_ARM_NEON;
15608 for (uint32_t n = 9; n < 16; n++) {
15609 for (size_t k = 1; k <= 20; k += 5) {
15610 GemmMicrokernelTester()
15611 .mr(6)
15612 .nr(8)
15613 .kr(1)
15614 .sr(1)
15615 .m(6)
15616 .n(n)
15617 .k(k)
15618 .cn_stride(11)
15619 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15620 }
15621 }
15622 }
15623
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_gt_8_subtile)15624 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_subtile) {
15625 TEST_REQUIRES_ARM_NEON;
15626 for (uint32_t n = 9; n < 16; n++) {
15627 for (size_t k = 1; k <= 20; k += 5) {
15628 for (uint32_t m = 1; m <= 6; m++) {
15629 GemmMicrokernelTester()
15630 .mr(6)
15631 .nr(8)
15632 .kr(1)
15633 .sr(1)
15634 .m(m)
15635 .n(n)
15636 .k(k)
15637 .iterations(1)
15638 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15639 }
15640 }
15641 }
15642 }
15643
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_div_8)15644 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8) {
15645 TEST_REQUIRES_ARM_NEON;
15646 for (uint32_t n = 16; n <= 24; n += 8) {
15647 for (size_t k = 1; k <= 20; k += 5) {
15648 GemmMicrokernelTester()
15649 .mr(6)
15650 .nr(8)
15651 .kr(1)
15652 .sr(1)
15653 .m(6)
15654 .n(n)
15655 .k(k)
15656 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15657 }
15658 }
15659 }
15660
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_div_8_strided_cn)15661 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_cn) {
15662 TEST_REQUIRES_ARM_NEON;
15663 for (uint32_t n = 16; n <= 24; n += 8) {
15664 for (size_t k = 1; k <= 20; k += 5) {
15665 GemmMicrokernelTester()
15666 .mr(6)
15667 .nr(8)
15668 .kr(1)
15669 .sr(1)
15670 .m(6)
15671 .n(n)
15672 .k(k)
15673 .cn_stride(11)
15674 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15675 }
15676 }
15677 }
15678
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_div_8_subtile)15679 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_subtile) {
15680 TEST_REQUIRES_ARM_NEON;
15681 for (uint32_t n = 16; n <= 24; n += 8) {
15682 for (size_t k = 1; k <= 20; k += 5) {
15683 for (uint32_t m = 1; m <= 6; m++) {
15684 GemmMicrokernelTester()
15685 .mr(6)
15686 .nr(8)
15687 .kr(1)
15688 .sr(1)
15689 .m(m)
15690 .n(n)
15691 .k(k)
15692 .iterations(1)
15693 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15694 }
15695 }
15696 }
15697 }
15698
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,small_kernel)15699 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, small_kernel) {
15700 TEST_REQUIRES_ARM_NEON;
15701 for (size_t k = 1; k <= 20; k += 5) {
15702 GemmMicrokernelTester()
15703 .mr(6)
15704 .nr(8)
15705 .kr(1)
15706 .sr(1)
15707 .m(6)
15708 .n(8)
15709 .k(k)
15710 .ks(3)
15711 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15712 }
15713 }
15714
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,small_kernel_subtile)15715 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, small_kernel_subtile) {
15716 TEST_REQUIRES_ARM_NEON;
15717 for (size_t k = 1; k <= 20; k += 5) {
15718 for (uint32_t n = 1; n <= 8; n++) {
15719 for (uint32_t m = 1; m <= 6; m++) {
15720 GemmMicrokernelTester()
15721 .mr(6)
15722 .nr(8)
15723 .kr(1)
15724 .sr(1)
15725 .m(m)
15726 .n(n)
15727 .k(k)
15728 .ks(3)
15729 .iterations(1)
15730 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15731 }
15732 }
15733 }
15734 }
15735
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_gt_8_small_kernel)15736 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_small_kernel) {
15737 TEST_REQUIRES_ARM_NEON;
15738 for (uint32_t n = 9; n < 16; n++) {
15739 for (size_t k = 1; k <= 20; k += 5) {
15740 GemmMicrokernelTester()
15741 .mr(6)
15742 .nr(8)
15743 .kr(1)
15744 .sr(1)
15745 .m(6)
15746 .n(n)
15747 .k(k)
15748 .ks(3)
15749 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15750 }
15751 }
15752 }
15753
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,n_div_8_small_kernel)15754 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_small_kernel) {
15755 TEST_REQUIRES_ARM_NEON;
15756 for (uint32_t n = 16; n <= 24; n += 8) {
15757 for (size_t k = 1; k <= 20; k += 5) {
15758 GemmMicrokernelTester()
15759 .mr(6)
15760 .nr(8)
15761 .kr(1)
15762 .sr(1)
15763 .m(6)
15764 .n(n)
15765 .k(k)
15766 .ks(3)
15767 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15768 }
15769 }
15770 }
15771
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,strided_cm_subtile)15772 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cm_subtile) {
15773 TEST_REQUIRES_ARM_NEON;
15774 for (size_t k = 1; k <= 20; k += 5) {
15775 for (uint32_t n = 1; n <= 8; n++) {
15776 for (uint32_t m = 1; m <= 6; m++) {
15777 GemmMicrokernelTester()
15778 .mr(6)
15779 .nr(8)
15780 .kr(1)
15781 .sr(1)
15782 .m(m)
15783 .n(n)
15784 .k(k)
15785 .cm_stride(11)
15786 .iterations(1)
15787 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15788 }
15789 }
15790 }
15791 }
15792
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,a_offset)15793 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, a_offset) {
15794 TEST_REQUIRES_ARM_NEON;
15795 for (size_t k = 1; k <= 20; k += 5) {
15796 GemmMicrokernelTester()
15797 .mr(6)
15798 .nr(8)
15799 .kr(1)
15800 .sr(1)
15801 .m(6)
15802 .n(8)
15803 .k(k)
15804 .ks(3)
15805 .a_offset(127)
15806 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15807 }
15808 }
15809
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,zero)15810 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, zero) {
15811 TEST_REQUIRES_ARM_NEON;
15812 for (size_t k = 1; k <= 20; k += 5) {
15813 for (uint32_t mz = 0; mz < 6; mz++) {
15814 GemmMicrokernelTester()
15815 .mr(6)
15816 .nr(8)
15817 .kr(1)
15818 .sr(1)
15819 .m(6)
15820 .n(8)
15821 .k(k)
15822 .ks(3)
15823 .a_offset(127)
15824 .zero_index(mz)
15825 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15826 }
15827 }
15828 }
15829
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,qmin)15830 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, qmin) {
15831 TEST_REQUIRES_ARM_NEON;
15832 GemmMicrokernelTester()
15833 .mr(6)
15834 .nr(8)
15835 .kr(1)
15836 .sr(1)
15837 .m(6)
15838 .n(8)
15839 .k(4)
15840 .qmin(128)
15841 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15842 }
15843
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,qmax)15844 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, qmax) {
15845 TEST_REQUIRES_ARM_NEON;
15846 GemmMicrokernelTester()
15847 .mr(6)
15848 .nr(8)
15849 .kr(1)
15850 .sr(1)
15851 .m(6)
15852 .n(8)
15853 .k(4)
15854 .qmax(128)
15855 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15856 }
15857
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128,strided_cm)15858 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cm) {
15859 TEST_REQUIRES_ARM_NEON;
15860 GemmMicrokernelTester()
15861 .mr(6)
15862 .nr(8)
15863 .kr(1)
15864 .sr(1)
15865 .m(6)
15866 .n(8)
15867 .k(4)
15868 .cm_stride(11)
15869 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
15870 }
15871 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15872
15873
15874 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4)15875 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4) {
15876 TEST_REQUIRES_ARM_NEON_FMA;
15877 GemmMicrokernelTester()
15878 .mr(6)
15879 .nr(8)
15880 .kr(1)
15881 .sr(1)
15882 .m(6)
15883 .n(8)
15884 .k(4)
15885 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15886 }
15887
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,strided_cn)15888 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cn) {
15889 TEST_REQUIRES_ARM_NEON_FMA;
15890 GemmMicrokernelTester()
15891 .mr(6)
15892 .nr(8)
15893 .kr(1)
15894 .sr(1)
15895 .m(6)
15896 .n(8)
15897 .k(4)
15898 .cn_stride(11)
15899 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15900 }
15901
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4_subtile)15902 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
15903 TEST_REQUIRES_ARM_NEON_FMA;
15904 for (uint32_t n = 1; n <= 8; n++) {
15905 for (uint32_t m = 1; m <= 6; m++) {
15906 GemmMicrokernelTester()
15907 .mr(6)
15908 .nr(8)
15909 .kr(1)
15910 .sr(1)
15911 .m(m)
15912 .n(n)
15913 .k(4)
15914 .iterations(1)
15915 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15916 }
15917 }
15918 }
15919
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4_subtile_m)15920 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
15921 TEST_REQUIRES_ARM_NEON_FMA;
15922 for (uint32_t m = 1; m <= 6; m++) {
15923 GemmMicrokernelTester()
15924 .mr(6)
15925 .nr(8)
15926 .kr(1)
15927 .sr(1)
15928 .m(m)
15929 .n(8)
15930 .k(4)
15931 .iterations(1)
15932 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15933 }
15934 }
15935
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4_subtile_n)15936 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
15937 TEST_REQUIRES_ARM_NEON_FMA;
15938 for (uint32_t n = 1; n <= 8; n++) {
15939 GemmMicrokernelTester()
15940 .mr(6)
15941 .nr(8)
15942 .kr(1)
15943 .sr(1)
15944 .m(6)
15945 .n(n)
15946 .k(4)
15947 .iterations(1)
15948 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15949 }
15950 }
15951
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_lt_4)15952 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4) {
15953 TEST_REQUIRES_ARM_NEON_FMA;
15954 for (size_t k = 1; k < 4; k++) {
15955 GemmMicrokernelTester()
15956 .mr(6)
15957 .nr(8)
15958 .kr(1)
15959 .sr(1)
15960 .m(6)
15961 .n(8)
15962 .k(k)
15963 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15964 }
15965 }
15966
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_lt_4_subtile)15967 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
15968 TEST_REQUIRES_ARM_NEON_FMA;
15969 for (size_t k = 1; k < 4; k++) {
15970 for (uint32_t n = 1; n <= 8; n++) {
15971 for (uint32_t m = 1; m <= 6; m++) {
15972 GemmMicrokernelTester()
15973 .mr(6)
15974 .nr(8)
15975 .kr(1)
15976 .sr(1)
15977 .m(m)
15978 .n(n)
15979 .k(k)
15980 .iterations(1)
15981 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15982 }
15983 }
15984 }
15985 }
15986
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_gt_4)15987 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4) {
15988 TEST_REQUIRES_ARM_NEON_FMA;
15989 for (size_t k = 5; k < 8; k++) {
15990 GemmMicrokernelTester()
15991 .mr(6)
15992 .nr(8)
15993 .kr(1)
15994 .sr(1)
15995 .m(6)
15996 .n(8)
15997 .k(k)
15998 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
15999 }
16000 }
16001
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_gt_4_subtile)16002 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
16003 TEST_REQUIRES_ARM_NEON_FMA;
16004 for (size_t k = 5; k < 8; k++) {
16005 for (uint32_t n = 1; n <= 8; n++) {
16006 for (uint32_t m = 1; m <= 6; m++) {
16007 GemmMicrokernelTester()
16008 .mr(6)
16009 .nr(8)
16010 .kr(1)
16011 .sr(1)
16012 .m(m)
16013 .n(n)
16014 .k(k)
16015 .iterations(1)
16016 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16017 }
16018 }
16019 }
16020 }
16021
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_div_4)16022 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4) {
16023 TEST_REQUIRES_ARM_NEON_FMA;
16024 for (size_t k = 8; k <= 40; k += 4) {
16025 GemmMicrokernelTester()
16026 .mr(6)
16027 .nr(8)
16028 .kr(1)
16029 .sr(1)
16030 .m(6)
16031 .n(8)
16032 .k(k)
16033 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16034 }
16035 }
16036
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,k_div_4_subtile)16037 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
16038 TEST_REQUIRES_ARM_NEON_FMA;
16039 for (size_t k = 8; k <= 40; k += 4) {
16040 for (uint32_t n = 1; n <= 8; n++) {
16041 for (uint32_t m = 1; m <= 6; m++) {
16042 GemmMicrokernelTester()
16043 .mr(6)
16044 .nr(8)
16045 .kr(1)
16046 .sr(1)
16047 .m(m)
16048 .n(n)
16049 .k(k)
16050 .iterations(1)
16051 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16052 }
16053 }
16054 }
16055 }
16056
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8)16057 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8) {
16058 TEST_REQUIRES_ARM_NEON_FMA;
16059 for (uint32_t n = 9; n < 16; n++) {
16060 for (size_t k = 1; k <= 20; k += 5) {
16061 GemmMicrokernelTester()
16062 .mr(6)
16063 .nr(8)
16064 .kr(1)
16065 .sr(1)
16066 .m(6)
16067 .n(n)
16068 .k(k)
16069 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16070 }
16071 }
16072 }
16073
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8_strided_cn)16074 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
16075 TEST_REQUIRES_ARM_NEON_FMA;
16076 for (uint32_t n = 9; n < 16; n++) {
16077 for (size_t k = 1; k <= 20; k += 5) {
16078 GemmMicrokernelTester()
16079 .mr(6)
16080 .nr(8)
16081 .kr(1)
16082 .sr(1)
16083 .m(6)
16084 .n(n)
16085 .k(k)
16086 .cn_stride(11)
16087 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16088 }
16089 }
16090 }
16091
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8_subtile)16092 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
16093 TEST_REQUIRES_ARM_NEON_FMA;
16094 for (uint32_t n = 9; n < 16; n++) {
16095 for (size_t k = 1; k <= 20; k += 5) {
16096 for (uint32_t m = 1; m <= 6; m++) {
16097 GemmMicrokernelTester()
16098 .mr(6)
16099 .nr(8)
16100 .kr(1)
16101 .sr(1)
16102 .m(m)
16103 .n(n)
16104 .k(k)
16105 .iterations(1)
16106 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16107 }
16108 }
16109 }
16110 }
16111
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8)16112 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8) {
16113 TEST_REQUIRES_ARM_NEON_FMA;
16114 for (uint32_t n = 16; n <= 24; n += 8) {
16115 for (size_t k = 1; k <= 20; k += 5) {
16116 GemmMicrokernelTester()
16117 .mr(6)
16118 .nr(8)
16119 .kr(1)
16120 .sr(1)
16121 .m(6)
16122 .n(n)
16123 .k(k)
16124 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16125 }
16126 }
16127 }
16128
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8_strided_cn)16129 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
16130 TEST_REQUIRES_ARM_NEON_FMA;
16131 for (uint32_t n = 16; n <= 24; n += 8) {
16132 for (size_t k = 1; k <= 20; k += 5) {
16133 GemmMicrokernelTester()
16134 .mr(6)
16135 .nr(8)
16136 .kr(1)
16137 .sr(1)
16138 .m(6)
16139 .n(n)
16140 .k(k)
16141 .cn_stride(11)
16142 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16143 }
16144 }
16145 }
16146
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8_subtile)16147 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
16148 TEST_REQUIRES_ARM_NEON_FMA;
16149 for (uint32_t n = 16; n <= 24; n += 8) {
16150 for (size_t k = 1; k <= 20; k += 5) {
16151 for (uint32_t m = 1; m <= 6; m++) {
16152 GemmMicrokernelTester()
16153 .mr(6)
16154 .nr(8)
16155 .kr(1)
16156 .sr(1)
16157 .m(m)
16158 .n(n)
16159 .k(k)
16160 .iterations(1)
16161 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16162 }
16163 }
16164 }
16165 }
16166
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,small_kernel)16167 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, small_kernel) {
16168 TEST_REQUIRES_ARM_NEON_FMA;
16169 for (size_t k = 1; k <= 20; k += 5) {
16170 GemmMicrokernelTester()
16171 .mr(6)
16172 .nr(8)
16173 .kr(1)
16174 .sr(1)
16175 .m(6)
16176 .n(8)
16177 .k(k)
16178 .ks(3)
16179 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16180 }
16181 }
16182
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,small_kernel_subtile)16183 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, small_kernel_subtile) {
16184 TEST_REQUIRES_ARM_NEON_FMA;
16185 for (size_t k = 1; k <= 20; k += 5) {
16186 for (uint32_t n = 1; n <= 8; n++) {
16187 for (uint32_t m = 1; m <= 6; m++) {
16188 GemmMicrokernelTester()
16189 .mr(6)
16190 .nr(8)
16191 .kr(1)
16192 .sr(1)
16193 .m(m)
16194 .n(n)
16195 .k(k)
16196 .ks(3)
16197 .iterations(1)
16198 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16199 }
16200 }
16201 }
16202 }
16203
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8_small_kernel)16204 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_small_kernel) {
16205 TEST_REQUIRES_ARM_NEON_FMA;
16206 for (uint32_t n = 9; n < 16; n++) {
16207 for (size_t k = 1; k <= 20; k += 5) {
16208 GemmMicrokernelTester()
16209 .mr(6)
16210 .nr(8)
16211 .kr(1)
16212 .sr(1)
16213 .m(6)
16214 .n(n)
16215 .k(k)
16216 .ks(3)
16217 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16218 }
16219 }
16220 }
16221
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8_small_kernel)16222 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_small_kernel) {
16223 TEST_REQUIRES_ARM_NEON_FMA;
16224 for (uint32_t n = 16; n <= 24; n += 8) {
16225 for (size_t k = 1; k <= 20; k += 5) {
16226 GemmMicrokernelTester()
16227 .mr(6)
16228 .nr(8)
16229 .kr(1)
16230 .sr(1)
16231 .m(6)
16232 .n(n)
16233 .k(k)
16234 .ks(3)
16235 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16236 }
16237 }
16238 }
16239
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,strided_cm_subtile)16240 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
16241 TEST_REQUIRES_ARM_NEON_FMA;
16242 for (size_t k = 1; k <= 20; k += 5) {
16243 for (uint32_t n = 1; n <= 8; n++) {
16244 for (uint32_t m = 1; m <= 6; m++) {
16245 GemmMicrokernelTester()
16246 .mr(6)
16247 .nr(8)
16248 .kr(1)
16249 .sr(1)
16250 .m(m)
16251 .n(n)
16252 .k(k)
16253 .cm_stride(11)
16254 .iterations(1)
16255 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16256 }
16257 }
16258 }
16259 }
16260
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,a_offset)16261 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, a_offset) {
16262 TEST_REQUIRES_ARM_NEON_FMA;
16263 for (size_t k = 1; k <= 20; k += 5) {
16264 GemmMicrokernelTester()
16265 .mr(6)
16266 .nr(8)
16267 .kr(1)
16268 .sr(1)
16269 .m(6)
16270 .n(8)
16271 .k(k)
16272 .ks(3)
16273 .a_offset(127)
16274 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16275 }
16276 }
16277
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,zero)16278 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, zero) {
16279 TEST_REQUIRES_ARM_NEON_FMA;
16280 for (size_t k = 1; k <= 20; k += 5) {
16281 for (uint32_t mz = 0; mz < 6; mz++) {
16282 GemmMicrokernelTester()
16283 .mr(6)
16284 .nr(8)
16285 .kr(1)
16286 .sr(1)
16287 .m(6)
16288 .n(8)
16289 .k(k)
16290 .ks(3)
16291 .a_offset(127)
16292 .zero_index(mz)
16293 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16294 }
16295 }
16296 }
16297
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,qmin)16298 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, qmin) {
16299 TEST_REQUIRES_ARM_NEON_FMA;
16300 GemmMicrokernelTester()
16301 .mr(6)
16302 .nr(8)
16303 .kr(1)
16304 .sr(1)
16305 .m(6)
16306 .n(8)
16307 .k(4)
16308 .qmin(128)
16309 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16310 }
16311
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,qmax)16312 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, qmax) {
16313 TEST_REQUIRES_ARM_NEON_FMA;
16314 GemmMicrokernelTester()
16315 .mr(6)
16316 .nr(8)
16317 .kr(1)
16318 .sr(1)
16319 .m(6)
16320 .n(8)
16321 .k(4)
16322 .qmax(128)
16323 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16324 }
16325
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128,strided_cm)16326 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm) {
16327 TEST_REQUIRES_ARM_NEON_FMA;
16328 GemmMicrokernelTester()
16329 .mr(6)
16330 .nr(8)
16331 .kr(1)
16332 .sr(1)
16333 .m(6)
16334 .n(8)
16335 .k(4)
16336 .cm_stride(11)
16337 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
16338 }
16339 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16340
16341
16342 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2)16343 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2) {
16344 TEST_REQUIRES_ARM_NEON_FMA;
16345 GemmMicrokernelTester()
16346 .mr(6)
16347 .nr(8)
16348 .kr(1)
16349 .sr(1)
16350 .m(6)
16351 .n(8)
16352 .k(2)
16353 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16354 }
16355
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,strided_cn)16356 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cn) {
16357 TEST_REQUIRES_ARM_NEON_FMA;
16358 GemmMicrokernelTester()
16359 .mr(6)
16360 .nr(8)
16361 .kr(1)
16362 .sr(1)
16363 .m(6)
16364 .n(8)
16365 .k(2)
16366 .cn_stride(11)
16367 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16368 }
16369
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2_subtile)16370 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
16371 TEST_REQUIRES_ARM_NEON_FMA;
16372 for (uint32_t n = 1; n <= 8; n++) {
16373 for (uint32_t m = 1; m <= 6; m++) {
16374 GemmMicrokernelTester()
16375 .mr(6)
16376 .nr(8)
16377 .kr(1)
16378 .sr(1)
16379 .m(m)
16380 .n(n)
16381 .k(2)
16382 .iterations(1)
16383 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16384 }
16385 }
16386 }
16387
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2_subtile_m)16388 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
16389 TEST_REQUIRES_ARM_NEON_FMA;
16390 for (uint32_t m = 1; m <= 6; m++) {
16391 GemmMicrokernelTester()
16392 .mr(6)
16393 .nr(8)
16394 .kr(1)
16395 .sr(1)
16396 .m(m)
16397 .n(8)
16398 .k(2)
16399 .iterations(1)
16400 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16401 }
16402 }
16403
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2_subtile_n)16404 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
16405 TEST_REQUIRES_ARM_NEON_FMA;
16406 for (uint32_t n = 1; n <= 8; n++) {
16407 GemmMicrokernelTester()
16408 .mr(6)
16409 .nr(8)
16410 .kr(1)
16411 .sr(1)
16412 .m(6)
16413 .n(n)
16414 .k(2)
16415 .iterations(1)
16416 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16417 }
16418 }
16419
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_lt_2)16420 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2) {
16421 TEST_REQUIRES_ARM_NEON_FMA;
16422 for (size_t k = 1; k < 2; k++) {
16423 GemmMicrokernelTester()
16424 .mr(6)
16425 .nr(8)
16426 .kr(1)
16427 .sr(1)
16428 .m(6)
16429 .n(8)
16430 .k(k)
16431 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16432 }
16433 }
16434
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_lt_2_subtile)16435 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
16436 TEST_REQUIRES_ARM_NEON_FMA;
16437 for (size_t k = 1; k < 2; k++) {
16438 for (uint32_t n = 1; n <= 8; n++) {
16439 for (uint32_t m = 1; m <= 6; m++) {
16440 GemmMicrokernelTester()
16441 .mr(6)
16442 .nr(8)
16443 .kr(1)
16444 .sr(1)
16445 .m(m)
16446 .n(n)
16447 .k(k)
16448 .iterations(1)
16449 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16450 }
16451 }
16452 }
16453 }
16454
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_gt_2)16455 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2) {
16456 TEST_REQUIRES_ARM_NEON_FMA;
16457 for (size_t k = 3; k < 4; k++) {
16458 GemmMicrokernelTester()
16459 .mr(6)
16460 .nr(8)
16461 .kr(1)
16462 .sr(1)
16463 .m(6)
16464 .n(8)
16465 .k(k)
16466 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16467 }
16468 }
16469
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_gt_2_subtile)16470 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
16471 TEST_REQUIRES_ARM_NEON_FMA;
16472 for (size_t k = 3; k < 4; k++) {
16473 for (uint32_t n = 1; n <= 8; n++) {
16474 for (uint32_t m = 1; m <= 6; m++) {
16475 GemmMicrokernelTester()
16476 .mr(6)
16477 .nr(8)
16478 .kr(1)
16479 .sr(1)
16480 .m(m)
16481 .n(n)
16482 .k(k)
16483 .iterations(1)
16484 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16485 }
16486 }
16487 }
16488 }
16489
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_div_2)16490 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2) {
16491 TEST_REQUIRES_ARM_NEON_FMA;
16492 for (size_t k = 4; k <= 20; k += 2) {
16493 GemmMicrokernelTester()
16494 .mr(6)
16495 .nr(8)
16496 .kr(1)
16497 .sr(1)
16498 .m(6)
16499 .n(8)
16500 .k(k)
16501 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16502 }
16503 }
16504
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,k_div_2_subtile)16505 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
16506 TEST_REQUIRES_ARM_NEON_FMA;
16507 for (size_t k = 4; k <= 20; k += 2) {
16508 for (uint32_t n = 1; n <= 8; n++) {
16509 for (uint32_t m = 1; m <= 6; m++) {
16510 GemmMicrokernelTester()
16511 .mr(6)
16512 .nr(8)
16513 .kr(1)
16514 .sr(1)
16515 .m(m)
16516 .n(n)
16517 .k(k)
16518 .iterations(1)
16519 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16520 }
16521 }
16522 }
16523 }
16524
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8)16525 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8) {
16526 TEST_REQUIRES_ARM_NEON_FMA;
16527 for (uint32_t n = 9; n < 16; n++) {
16528 for (size_t k = 1; k <= 10; k += 3) {
16529 GemmMicrokernelTester()
16530 .mr(6)
16531 .nr(8)
16532 .kr(1)
16533 .sr(1)
16534 .m(6)
16535 .n(n)
16536 .k(k)
16537 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16538 }
16539 }
16540 }
16541
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8_strided_cn)16542 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
16543 TEST_REQUIRES_ARM_NEON_FMA;
16544 for (uint32_t n = 9; n < 16; n++) {
16545 for (size_t k = 1; k <= 10; k += 3) {
16546 GemmMicrokernelTester()
16547 .mr(6)
16548 .nr(8)
16549 .kr(1)
16550 .sr(1)
16551 .m(6)
16552 .n(n)
16553 .k(k)
16554 .cn_stride(11)
16555 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16556 }
16557 }
16558 }
16559
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8_subtile)16560 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
16561 TEST_REQUIRES_ARM_NEON_FMA;
16562 for (uint32_t n = 9; n < 16; n++) {
16563 for (size_t k = 1; k <= 10; k += 3) {
16564 for (uint32_t m = 1; m <= 6; m++) {
16565 GemmMicrokernelTester()
16566 .mr(6)
16567 .nr(8)
16568 .kr(1)
16569 .sr(1)
16570 .m(m)
16571 .n(n)
16572 .k(k)
16573 .iterations(1)
16574 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16575 }
16576 }
16577 }
16578 }
16579
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8)16580 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8) {
16581 TEST_REQUIRES_ARM_NEON_FMA;
16582 for (uint32_t n = 16; n <= 24; n += 8) {
16583 for (size_t k = 1; k <= 10; k += 3) {
16584 GemmMicrokernelTester()
16585 .mr(6)
16586 .nr(8)
16587 .kr(1)
16588 .sr(1)
16589 .m(6)
16590 .n(n)
16591 .k(k)
16592 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16593 }
16594 }
16595 }
16596
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8_strided_cn)16597 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
16598 TEST_REQUIRES_ARM_NEON_FMA;
16599 for (uint32_t n = 16; n <= 24; n += 8) {
16600 for (size_t k = 1; k <= 10; k += 3) {
16601 GemmMicrokernelTester()
16602 .mr(6)
16603 .nr(8)
16604 .kr(1)
16605 .sr(1)
16606 .m(6)
16607 .n(n)
16608 .k(k)
16609 .cn_stride(11)
16610 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16611 }
16612 }
16613 }
16614
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8_subtile)16615 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
16616 TEST_REQUIRES_ARM_NEON_FMA;
16617 for (uint32_t n = 16; n <= 24; n += 8) {
16618 for (size_t k = 1; k <= 10; k += 3) {
16619 for (uint32_t m = 1; m <= 6; m++) {
16620 GemmMicrokernelTester()
16621 .mr(6)
16622 .nr(8)
16623 .kr(1)
16624 .sr(1)
16625 .m(m)
16626 .n(n)
16627 .k(k)
16628 .iterations(1)
16629 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16630 }
16631 }
16632 }
16633 }
16634
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,small_kernel)16635 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, small_kernel) {
16636 TEST_REQUIRES_ARM_NEON_FMA;
16637 for (size_t k = 1; k <= 10; k += 3) {
16638 GemmMicrokernelTester()
16639 .mr(6)
16640 .nr(8)
16641 .kr(1)
16642 .sr(1)
16643 .m(6)
16644 .n(8)
16645 .k(k)
16646 .ks(3)
16647 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16648 }
16649 }
16650
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,small_kernel_subtile)16651 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, small_kernel_subtile) {
16652 TEST_REQUIRES_ARM_NEON_FMA;
16653 for (size_t k = 1; k <= 10; k += 3) {
16654 for (uint32_t n = 1; n <= 8; n++) {
16655 for (uint32_t m = 1; m <= 6; m++) {
16656 GemmMicrokernelTester()
16657 .mr(6)
16658 .nr(8)
16659 .kr(1)
16660 .sr(1)
16661 .m(m)
16662 .n(n)
16663 .k(k)
16664 .ks(3)
16665 .iterations(1)
16666 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16667 }
16668 }
16669 }
16670 }
16671
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8_small_kernel)16672 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_small_kernel) {
16673 TEST_REQUIRES_ARM_NEON_FMA;
16674 for (uint32_t n = 9; n < 16; n++) {
16675 for (size_t k = 1; k <= 10; k += 3) {
16676 GemmMicrokernelTester()
16677 .mr(6)
16678 .nr(8)
16679 .kr(1)
16680 .sr(1)
16681 .m(6)
16682 .n(n)
16683 .k(k)
16684 .ks(3)
16685 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16686 }
16687 }
16688 }
16689
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8_small_kernel)16690 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_small_kernel) {
16691 TEST_REQUIRES_ARM_NEON_FMA;
16692 for (uint32_t n = 16; n <= 24; n += 8) {
16693 for (size_t k = 1; k <= 10; k += 3) {
16694 GemmMicrokernelTester()
16695 .mr(6)
16696 .nr(8)
16697 .kr(1)
16698 .sr(1)
16699 .m(6)
16700 .n(n)
16701 .k(k)
16702 .ks(3)
16703 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16704 }
16705 }
16706 }
16707
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,strided_cm_subtile)16708 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
16709 TEST_REQUIRES_ARM_NEON_FMA;
16710 for (size_t k = 1; k <= 10; k += 3) {
16711 for (uint32_t n = 1; n <= 8; n++) {
16712 for (uint32_t m = 1; m <= 6; m++) {
16713 GemmMicrokernelTester()
16714 .mr(6)
16715 .nr(8)
16716 .kr(1)
16717 .sr(1)
16718 .m(m)
16719 .n(n)
16720 .k(k)
16721 .cm_stride(11)
16722 .iterations(1)
16723 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16724 }
16725 }
16726 }
16727 }
16728
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,a_offset)16729 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, a_offset) {
16730 TEST_REQUIRES_ARM_NEON_FMA;
16731 for (size_t k = 1; k <= 10; k += 3) {
16732 GemmMicrokernelTester()
16733 .mr(6)
16734 .nr(8)
16735 .kr(1)
16736 .sr(1)
16737 .m(6)
16738 .n(8)
16739 .k(k)
16740 .ks(3)
16741 .a_offset(67)
16742 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16743 }
16744 }
16745
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,zero)16746 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, zero) {
16747 TEST_REQUIRES_ARM_NEON_FMA;
16748 for (size_t k = 1; k <= 10; k += 3) {
16749 for (uint32_t mz = 0; mz < 6; mz++) {
16750 GemmMicrokernelTester()
16751 .mr(6)
16752 .nr(8)
16753 .kr(1)
16754 .sr(1)
16755 .m(6)
16756 .n(8)
16757 .k(k)
16758 .ks(3)
16759 .a_offset(67)
16760 .zero_index(mz)
16761 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16762 }
16763 }
16764 }
16765
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,qmin)16766 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, qmin) {
16767 TEST_REQUIRES_ARM_NEON_FMA;
16768 GemmMicrokernelTester()
16769 .mr(6)
16770 .nr(8)
16771 .kr(1)
16772 .sr(1)
16773 .m(6)
16774 .n(8)
16775 .k(2)
16776 .qmin(128)
16777 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16778 }
16779
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,qmax)16780 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, qmax) {
16781 TEST_REQUIRES_ARM_NEON_FMA;
16782 GemmMicrokernelTester()
16783 .mr(6)
16784 .nr(8)
16785 .kr(1)
16786 .sr(1)
16787 .m(6)
16788 .n(8)
16789 .k(2)
16790 .qmax(128)
16791 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16792 }
16793
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64,strided_cm)16794 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm) {
16795 TEST_REQUIRES_ARM_NEON_FMA;
16796 GemmMicrokernelTester()
16797 .mr(6)
16798 .nr(8)
16799 .kr(1)
16800 .sr(1)
16801 .m(6)
16802 .n(8)
16803 .k(2)
16804 .cm_stride(11)
16805 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
16806 }
16807 #endif // XNN_ARCH_ARM64
16808
16809
16810 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_eq_4)16811 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4) {
16812 TEST_REQUIRES_X86_SSE;
16813 GemmMicrokernelTester()
16814 .mr(1)
16815 .nr(8)
16816 .kr(1)
16817 .sr(1)
16818 .m(1)
16819 .n(8)
16820 .k(4)
16821 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16822 }
16823
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,strided_cn)16824 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, strided_cn) {
16825 TEST_REQUIRES_X86_SSE;
16826 GemmMicrokernelTester()
16827 .mr(1)
16828 .nr(8)
16829 .kr(1)
16830 .sr(1)
16831 .m(1)
16832 .n(8)
16833 .k(4)
16834 .cn_stride(11)
16835 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16836 }
16837
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_eq_4_subtile)16838 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4_subtile) {
16839 TEST_REQUIRES_X86_SSE;
16840 for (uint32_t n = 1; n <= 8; n++) {
16841 for (uint32_t m = 1; m <= 1; m++) {
16842 GemmMicrokernelTester()
16843 .mr(1)
16844 .nr(8)
16845 .kr(1)
16846 .sr(1)
16847 .m(m)
16848 .n(n)
16849 .k(4)
16850 .iterations(1)
16851 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16852 }
16853 }
16854 }
16855
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_eq_4_subtile_m)16856 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_m) {
16857 TEST_REQUIRES_X86_SSE;
16858 for (uint32_t m = 1; m <= 1; m++) {
16859 GemmMicrokernelTester()
16860 .mr(1)
16861 .nr(8)
16862 .kr(1)
16863 .sr(1)
16864 .m(m)
16865 .n(8)
16866 .k(4)
16867 .iterations(1)
16868 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16869 }
16870 }
16871
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_eq_4_subtile_n)16872 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_n) {
16873 TEST_REQUIRES_X86_SSE;
16874 for (uint32_t n = 1; n <= 8; n++) {
16875 GemmMicrokernelTester()
16876 .mr(1)
16877 .nr(8)
16878 .kr(1)
16879 .sr(1)
16880 .m(1)
16881 .n(n)
16882 .k(4)
16883 .iterations(1)
16884 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16885 }
16886 }
16887
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_lt_4)16888 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_lt_4) {
16889 TEST_REQUIRES_X86_SSE;
16890 for (size_t k = 1; k < 4; k++) {
16891 GemmMicrokernelTester()
16892 .mr(1)
16893 .nr(8)
16894 .kr(1)
16895 .sr(1)
16896 .m(1)
16897 .n(8)
16898 .k(k)
16899 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16900 }
16901 }
16902
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_lt_4_subtile)16903 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_lt_4_subtile) {
16904 TEST_REQUIRES_X86_SSE;
16905 for (size_t k = 1; k < 4; k++) {
16906 for (uint32_t n = 1; n <= 8; n++) {
16907 for (uint32_t m = 1; m <= 1; m++) {
16908 GemmMicrokernelTester()
16909 .mr(1)
16910 .nr(8)
16911 .kr(1)
16912 .sr(1)
16913 .m(m)
16914 .n(n)
16915 .k(k)
16916 .iterations(1)
16917 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16918 }
16919 }
16920 }
16921 }
16922
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_gt_4)16923 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_gt_4) {
16924 TEST_REQUIRES_X86_SSE;
16925 for (size_t k = 5; k < 8; k++) {
16926 GemmMicrokernelTester()
16927 .mr(1)
16928 .nr(8)
16929 .kr(1)
16930 .sr(1)
16931 .m(1)
16932 .n(8)
16933 .k(k)
16934 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16935 }
16936 }
16937
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_gt_4_subtile)16938 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_gt_4_subtile) {
16939 TEST_REQUIRES_X86_SSE;
16940 for (size_t k = 5; k < 8; k++) {
16941 for (uint32_t n = 1; n <= 8; n++) {
16942 for (uint32_t m = 1; m <= 1; m++) {
16943 GemmMicrokernelTester()
16944 .mr(1)
16945 .nr(8)
16946 .kr(1)
16947 .sr(1)
16948 .m(m)
16949 .n(n)
16950 .k(k)
16951 .iterations(1)
16952 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16953 }
16954 }
16955 }
16956 }
16957
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_div_4)16958 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_div_4) {
16959 TEST_REQUIRES_X86_SSE;
16960 for (size_t k = 8; k <= 40; k += 4) {
16961 GemmMicrokernelTester()
16962 .mr(1)
16963 .nr(8)
16964 .kr(1)
16965 .sr(1)
16966 .m(1)
16967 .n(8)
16968 .k(k)
16969 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16970 }
16971 }
16972
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,k_div_4_subtile)16973 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_div_4_subtile) {
16974 TEST_REQUIRES_X86_SSE;
16975 for (size_t k = 8; k <= 40; k += 4) {
16976 for (uint32_t n = 1; n <= 8; n++) {
16977 for (uint32_t m = 1; m <= 1; m++) {
16978 GemmMicrokernelTester()
16979 .mr(1)
16980 .nr(8)
16981 .kr(1)
16982 .sr(1)
16983 .m(m)
16984 .n(n)
16985 .k(k)
16986 .iterations(1)
16987 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
16988 }
16989 }
16990 }
16991 }
16992
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_gt_8)16993 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8) {
16994 TEST_REQUIRES_X86_SSE;
16995 for (uint32_t n = 9; n < 16; n++) {
16996 for (size_t k = 1; k <= 20; k += 5) {
16997 GemmMicrokernelTester()
16998 .mr(1)
16999 .nr(8)
17000 .kr(1)
17001 .sr(1)
17002 .m(1)
17003 .n(n)
17004 .k(k)
17005 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17006 }
17007 }
17008 }
17009
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_gt_8_strided_cn)17010 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8_strided_cn) {
17011 TEST_REQUIRES_X86_SSE;
17012 for (uint32_t n = 9; n < 16; n++) {
17013 for (size_t k = 1; k <= 20; k += 5) {
17014 GemmMicrokernelTester()
17015 .mr(1)
17016 .nr(8)
17017 .kr(1)
17018 .sr(1)
17019 .m(1)
17020 .n(n)
17021 .k(k)
17022 .cn_stride(11)
17023 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17024 }
17025 }
17026 }
17027
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_gt_8_subtile)17028 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8_subtile) {
17029 TEST_REQUIRES_X86_SSE;
17030 for (uint32_t n = 9; n < 16; n++) {
17031 for (size_t k = 1; k <= 20; k += 5) {
17032 for (uint32_t m = 1; m <= 1; m++) {
17033 GemmMicrokernelTester()
17034 .mr(1)
17035 .nr(8)
17036 .kr(1)
17037 .sr(1)
17038 .m(m)
17039 .n(n)
17040 .k(k)
17041 .iterations(1)
17042 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17043 }
17044 }
17045 }
17046 }
17047
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_div_8)17048 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8) {
17049 TEST_REQUIRES_X86_SSE;
17050 for (uint32_t n = 16; n <= 24; n += 8) {
17051 for (size_t k = 1; k <= 20; k += 5) {
17052 GemmMicrokernelTester()
17053 .mr(1)
17054 .nr(8)
17055 .kr(1)
17056 .sr(1)
17057 .m(1)
17058 .n(n)
17059 .k(k)
17060 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17061 }
17062 }
17063 }
17064
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_div_8_strided_cn)17065 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8_strided_cn) {
17066 TEST_REQUIRES_X86_SSE;
17067 for (uint32_t n = 16; n <= 24; n += 8) {
17068 for (size_t k = 1; k <= 20; k += 5) {
17069 GemmMicrokernelTester()
17070 .mr(1)
17071 .nr(8)
17072 .kr(1)
17073 .sr(1)
17074 .m(1)
17075 .n(n)
17076 .k(k)
17077 .cn_stride(11)
17078 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17079 }
17080 }
17081 }
17082
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_div_8_subtile)17083 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8_subtile) {
17084 TEST_REQUIRES_X86_SSE;
17085 for (uint32_t n = 16; n <= 24; n += 8) {
17086 for (size_t k = 1; k <= 20; k += 5) {
17087 for (uint32_t m = 1; m <= 1; m++) {
17088 GemmMicrokernelTester()
17089 .mr(1)
17090 .nr(8)
17091 .kr(1)
17092 .sr(1)
17093 .m(m)
17094 .n(n)
17095 .k(k)
17096 .iterations(1)
17097 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17098 }
17099 }
17100 }
17101 }
17102
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,small_kernel)17103 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, small_kernel) {
17104 TEST_REQUIRES_X86_SSE;
17105 for (size_t k = 1; k <= 20; k += 5) {
17106 GemmMicrokernelTester()
17107 .mr(1)
17108 .nr(8)
17109 .kr(1)
17110 .sr(1)
17111 .m(1)
17112 .n(8)
17113 .k(k)
17114 .ks(3)
17115 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17116 }
17117 }
17118
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,small_kernel_subtile)17119 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, small_kernel_subtile) {
17120 TEST_REQUIRES_X86_SSE;
17121 for (size_t k = 1; k <= 20; k += 5) {
17122 for (uint32_t n = 1; n <= 8; n++) {
17123 for (uint32_t m = 1; m <= 1; m++) {
17124 GemmMicrokernelTester()
17125 .mr(1)
17126 .nr(8)
17127 .kr(1)
17128 .sr(1)
17129 .m(m)
17130 .n(n)
17131 .k(k)
17132 .ks(3)
17133 .iterations(1)
17134 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17135 }
17136 }
17137 }
17138 }
17139
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_gt_8_small_kernel)17140 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8_small_kernel) {
17141 TEST_REQUIRES_X86_SSE;
17142 for (uint32_t n = 9; n < 16; n++) {
17143 for (size_t k = 1; k <= 20; k += 5) {
17144 GemmMicrokernelTester()
17145 .mr(1)
17146 .nr(8)
17147 .kr(1)
17148 .sr(1)
17149 .m(1)
17150 .n(n)
17151 .k(k)
17152 .ks(3)
17153 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17154 }
17155 }
17156 }
17157
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,n_div_8_small_kernel)17158 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8_small_kernel) {
17159 TEST_REQUIRES_X86_SSE;
17160 for (uint32_t n = 16; n <= 24; n += 8) {
17161 for (size_t k = 1; k <= 20; k += 5) {
17162 GemmMicrokernelTester()
17163 .mr(1)
17164 .nr(8)
17165 .kr(1)
17166 .sr(1)
17167 .m(1)
17168 .n(n)
17169 .k(k)
17170 .ks(3)
17171 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17172 }
17173 }
17174 }
17175
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,strided_cm_subtile)17176 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, strided_cm_subtile) {
17177 TEST_REQUIRES_X86_SSE;
17178 for (size_t k = 1; k <= 20; k += 5) {
17179 for (uint32_t n = 1; n <= 8; n++) {
17180 for (uint32_t m = 1; m <= 1; m++) {
17181 GemmMicrokernelTester()
17182 .mr(1)
17183 .nr(8)
17184 .kr(1)
17185 .sr(1)
17186 .m(m)
17187 .n(n)
17188 .k(k)
17189 .cm_stride(11)
17190 .iterations(1)
17191 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17192 }
17193 }
17194 }
17195 }
17196
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,a_offset)17197 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, a_offset) {
17198 TEST_REQUIRES_X86_SSE;
17199 for (size_t k = 1; k <= 20; k += 5) {
17200 GemmMicrokernelTester()
17201 .mr(1)
17202 .nr(8)
17203 .kr(1)
17204 .sr(1)
17205 .m(1)
17206 .n(8)
17207 .k(k)
17208 .ks(3)
17209 .a_offset(23)
17210 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17211 }
17212 }
17213
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,zero)17214 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, zero) {
17215 TEST_REQUIRES_X86_SSE;
17216 for (size_t k = 1; k <= 20; k += 5) {
17217 for (uint32_t mz = 0; mz < 1; mz++) {
17218 GemmMicrokernelTester()
17219 .mr(1)
17220 .nr(8)
17221 .kr(1)
17222 .sr(1)
17223 .m(1)
17224 .n(8)
17225 .k(k)
17226 .ks(3)
17227 .a_offset(23)
17228 .zero_index(mz)
17229 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17230 }
17231 }
17232 }
17233
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,qmin)17234 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, qmin) {
17235 TEST_REQUIRES_X86_SSE;
17236 GemmMicrokernelTester()
17237 .mr(1)
17238 .nr(8)
17239 .kr(1)
17240 .sr(1)
17241 .m(1)
17242 .n(8)
17243 .k(4)
17244 .qmin(128)
17245 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17246 }
17247
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,qmax)17248 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, qmax) {
17249 TEST_REQUIRES_X86_SSE;
17250 GemmMicrokernelTester()
17251 .mr(1)
17252 .nr(8)
17253 .kr(1)
17254 .sr(1)
17255 .m(1)
17256 .n(8)
17257 .k(4)
17258 .qmax(128)
17259 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17260 }
17261
TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP,strided_cm)17262 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, strided_cm) {
17263 TEST_REQUIRES_X86_SSE;
17264 GemmMicrokernelTester()
17265 .mr(1)
17266 .nr(8)
17267 .kr(1)
17268 .sr(1)
17269 .m(1)
17270 .n(8)
17271 .k(4)
17272 .cm_stride(11)
17273 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
17274 }
17275 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17276
17277
17278 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,k_eq_1)17279 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1) {
17280 TEST_REQUIRES_X86_SSE;
17281 GemmMicrokernelTester()
17282 .mr(1)
17283 .nr(8)
17284 .kr(1)
17285 .sr(1)
17286 .m(1)
17287 .n(8)
17288 .k(1)
17289 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17290 }
17291
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,strided_cn)17292 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, strided_cn) {
17293 TEST_REQUIRES_X86_SSE;
17294 GemmMicrokernelTester()
17295 .mr(1)
17296 .nr(8)
17297 .kr(1)
17298 .sr(1)
17299 .m(1)
17300 .n(8)
17301 .k(1)
17302 .cn_stride(11)
17303 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17304 }
17305
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,k_eq_1_subtile)17306 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile) {
17307 TEST_REQUIRES_X86_SSE;
17308 for (uint32_t n = 1; n <= 8; n++) {
17309 for (uint32_t m = 1; m <= 1; m++) {
17310 GemmMicrokernelTester()
17311 .mr(1)
17312 .nr(8)
17313 .kr(1)
17314 .sr(1)
17315 .m(m)
17316 .n(n)
17317 .k(1)
17318 .iterations(1)
17319 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17320 }
17321 }
17322 }
17323
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,k_eq_1_subtile_m)17324 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_m) {
17325 TEST_REQUIRES_X86_SSE;
17326 for (uint32_t m = 1; m <= 1; m++) {
17327 GemmMicrokernelTester()
17328 .mr(1)
17329 .nr(8)
17330 .kr(1)
17331 .sr(1)
17332 .m(m)
17333 .n(8)
17334 .k(1)
17335 .iterations(1)
17336 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17337 }
17338 }
17339
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,k_eq_1_subtile_n)17340 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_n) {
17341 TEST_REQUIRES_X86_SSE;
17342 for (uint32_t n = 1; n <= 8; n++) {
17343 GemmMicrokernelTester()
17344 .mr(1)
17345 .nr(8)
17346 .kr(1)
17347 .sr(1)
17348 .m(1)
17349 .n(n)
17350 .k(1)
17351 .iterations(1)
17352 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17353 }
17354 }
17355
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,k_gt_1)17356 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_gt_1) {
17357 TEST_REQUIRES_X86_SSE;
17358 for (size_t k = 2; k < 10; k++) {
17359 GemmMicrokernelTester()
17360 .mr(1)
17361 .nr(8)
17362 .kr(1)
17363 .sr(1)
17364 .m(1)
17365 .n(8)
17366 .k(k)
17367 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17368 }
17369 }
17370
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,k_gt_1_subtile)17371 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_gt_1_subtile) {
17372 TEST_REQUIRES_X86_SSE;
17373 for (size_t k = 2; k < 10; k++) {
17374 for (uint32_t n = 1; n <= 8; n++) {
17375 for (uint32_t m = 1; m <= 1; m++) {
17376 GemmMicrokernelTester()
17377 .mr(1)
17378 .nr(8)
17379 .kr(1)
17380 .sr(1)
17381 .m(m)
17382 .n(n)
17383 .k(k)
17384 .iterations(1)
17385 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17386 }
17387 }
17388 }
17389 }
17390
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_gt_8)17391 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8) {
17392 TEST_REQUIRES_X86_SSE;
17393 for (uint32_t n = 9; n < 16; n++) {
17394 for (size_t k = 1; k <= 5; k += 2) {
17395 GemmMicrokernelTester()
17396 .mr(1)
17397 .nr(8)
17398 .kr(1)
17399 .sr(1)
17400 .m(1)
17401 .n(n)
17402 .k(k)
17403 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17404 }
17405 }
17406 }
17407
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_gt_8_strided_cn)17408 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8_strided_cn) {
17409 TEST_REQUIRES_X86_SSE;
17410 for (uint32_t n = 9; n < 16; n++) {
17411 for (size_t k = 1; k <= 5; k += 2) {
17412 GemmMicrokernelTester()
17413 .mr(1)
17414 .nr(8)
17415 .kr(1)
17416 .sr(1)
17417 .m(1)
17418 .n(n)
17419 .k(k)
17420 .cn_stride(11)
17421 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17422 }
17423 }
17424 }
17425
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_gt_8_subtile)17426 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8_subtile) {
17427 TEST_REQUIRES_X86_SSE;
17428 for (uint32_t n = 9; n < 16; n++) {
17429 for (size_t k = 1; k <= 5; k += 2) {
17430 for (uint32_t m = 1; m <= 1; m++) {
17431 GemmMicrokernelTester()
17432 .mr(1)
17433 .nr(8)
17434 .kr(1)
17435 .sr(1)
17436 .m(m)
17437 .n(n)
17438 .k(k)
17439 .iterations(1)
17440 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17441 }
17442 }
17443 }
17444 }
17445
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_div_8)17446 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8) {
17447 TEST_REQUIRES_X86_SSE;
17448 for (uint32_t n = 16; n <= 24; n += 8) {
17449 for (size_t k = 1; k <= 5; k += 2) {
17450 GemmMicrokernelTester()
17451 .mr(1)
17452 .nr(8)
17453 .kr(1)
17454 .sr(1)
17455 .m(1)
17456 .n(n)
17457 .k(k)
17458 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17459 }
17460 }
17461 }
17462
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_div_8_strided_cn)17463 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8_strided_cn) {
17464 TEST_REQUIRES_X86_SSE;
17465 for (uint32_t n = 16; n <= 24; n += 8) {
17466 for (size_t k = 1; k <= 5; k += 2) {
17467 GemmMicrokernelTester()
17468 .mr(1)
17469 .nr(8)
17470 .kr(1)
17471 .sr(1)
17472 .m(1)
17473 .n(n)
17474 .k(k)
17475 .cn_stride(11)
17476 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17477 }
17478 }
17479 }
17480
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_div_8_subtile)17481 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8_subtile) {
17482 TEST_REQUIRES_X86_SSE;
17483 for (uint32_t n = 16; n <= 24; n += 8) {
17484 for (size_t k = 1; k <= 5; k += 2) {
17485 for (uint32_t m = 1; m <= 1; m++) {
17486 GemmMicrokernelTester()
17487 .mr(1)
17488 .nr(8)
17489 .kr(1)
17490 .sr(1)
17491 .m(m)
17492 .n(n)
17493 .k(k)
17494 .iterations(1)
17495 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17496 }
17497 }
17498 }
17499 }
17500
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,small_kernel)17501 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, small_kernel) {
17502 TEST_REQUIRES_X86_SSE;
17503 for (size_t k = 1; k <= 5; k += 2) {
17504 GemmMicrokernelTester()
17505 .mr(1)
17506 .nr(8)
17507 .kr(1)
17508 .sr(1)
17509 .m(1)
17510 .n(8)
17511 .k(k)
17512 .ks(3)
17513 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17514 }
17515 }
17516
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,small_kernel_subtile)17517 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, small_kernel_subtile) {
17518 TEST_REQUIRES_X86_SSE;
17519 for (size_t k = 1; k <= 5; k += 2) {
17520 for (uint32_t n = 1; n <= 8; n++) {
17521 for (uint32_t m = 1; m <= 1; m++) {
17522 GemmMicrokernelTester()
17523 .mr(1)
17524 .nr(8)
17525 .kr(1)
17526 .sr(1)
17527 .m(m)
17528 .n(n)
17529 .k(k)
17530 .ks(3)
17531 .iterations(1)
17532 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17533 }
17534 }
17535 }
17536 }
17537
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_gt_8_small_kernel)17538 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8_small_kernel) {
17539 TEST_REQUIRES_X86_SSE;
17540 for (uint32_t n = 9; n < 16; n++) {
17541 for (size_t k = 1; k <= 5; k += 2) {
17542 GemmMicrokernelTester()
17543 .mr(1)
17544 .nr(8)
17545 .kr(1)
17546 .sr(1)
17547 .m(1)
17548 .n(n)
17549 .k(k)
17550 .ks(3)
17551 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17552 }
17553 }
17554 }
17555
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,n_div_8_small_kernel)17556 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8_small_kernel) {
17557 TEST_REQUIRES_X86_SSE;
17558 for (uint32_t n = 16; n <= 24; n += 8) {
17559 for (size_t k = 1; k <= 5; k += 2) {
17560 GemmMicrokernelTester()
17561 .mr(1)
17562 .nr(8)
17563 .kr(1)
17564 .sr(1)
17565 .m(1)
17566 .n(n)
17567 .k(k)
17568 .ks(3)
17569 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17570 }
17571 }
17572 }
17573
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,strided_cm_subtile)17574 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, strided_cm_subtile) {
17575 TEST_REQUIRES_X86_SSE;
17576 for (size_t k = 1; k <= 5; k += 2) {
17577 for (uint32_t n = 1; n <= 8; n++) {
17578 for (uint32_t m = 1; m <= 1; m++) {
17579 GemmMicrokernelTester()
17580 .mr(1)
17581 .nr(8)
17582 .kr(1)
17583 .sr(1)
17584 .m(m)
17585 .n(n)
17586 .k(k)
17587 .cm_stride(11)
17588 .iterations(1)
17589 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17590 }
17591 }
17592 }
17593 }
17594
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,a_offset)17595 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, a_offset) {
17596 TEST_REQUIRES_X86_SSE;
17597 for (size_t k = 1; k <= 5; k += 2) {
17598 GemmMicrokernelTester()
17599 .mr(1)
17600 .nr(8)
17601 .kr(1)
17602 .sr(1)
17603 .m(1)
17604 .n(8)
17605 .k(k)
17606 .ks(3)
17607 .a_offset(7)
17608 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17609 }
17610 }
17611
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,zero)17612 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, zero) {
17613 TEST_REQUIRES_X86_SSE;
17614 for (size_t k = 1; k <= 5; k += 2) {
17615 for (uint32_t mz = 0; mz < 1; mz++) {
17616 GemmMicrokernelTester()
17617 .mr(1)
17618 .nr(8)
17619 .kr(1)
17620 .sr(1)
17621 .m(1)
17622 .n(8)
17623 .k(k)
17624 .ks(3)
17625 .a_offset(7)
17626 .zero_index(mz)
17627 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17628 }
17629 }
17630 }
17631
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,qmin)17632 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, qmin) {
17633 TEST_REQUIRES_X86_SSE;
17634 GemmMicrokernelTester()
17635 .mr(1)
17636 .nr(8)
17637 .kr(1)
17638 .sr(1)
17639 .m(1)
17640 .n(8)
17641 .k(1)
17642 .qmin(128)
17643 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17644 }
17645
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,qmax)17646 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, qmax) {
17647 TEST_REQUIRES_X86_SSE;
17648 GemmMicrokernelTester()
17649 .mr(1)
17650 .nr(8)
17651 .kr(1)
17652 .sr(1)
17653 .m(1)
17654 .n(8)
17655 .k(1)
17656 .qmax(128)
17657 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17658 }
17659
TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1,strided_cm)17660 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, strided_cm) {
17661 TEST_REQUIRES_X86_SSE;
17662 GemmMicrokernelTester()
17663 .mr(1)
17664 .nr(8)
17665 .kr(1)
17666 .sr(1)
17667 .m(1)
17668 .n(8)
17669 .k(1)
17670 .cm_stride(11)
17671 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
17672 }
17673 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17674
17675
17676 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_eq_4)17677 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4) {
17678 TEST_REQUIRES_X86_SSE;
17679 GemmMicrokernelTester()
17680 .mr(1)
17681 .nr(8)
17682 .kr(1)
17683 .sr(4)
17684 .m(1)
17685 .n(8)
17686 .k(4)
17687 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17688 }
17689
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,strided_cn)17690 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, strided_cn) {
17691 TEST_REQUIRES_X86_SSE;
17692 GemmMicrokernelTester()
17693 .mr(1)
17694 .nr(8)
17695 .kr(1)
17696 .sr(4)
17697 .m(1)
17698 .n(8)
17699 .k(4)
17700 .cn_stride(11)
17701 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17702 }
17703
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_eq_4_subtile)17704 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4_subtile) {
17705 TEST_REQUIRES_X86_SSE;
17706 for (uint32_t n = 1; n <= 8; n++) {
17707 for (uint32_t m = 1; m <= 1; m++) {
17708 GemmMicrokernelTester()
17709 .mr(1)
17710 .nr(8)
17711 .kr(1)
17712 .sr(4)
17713 .m(m)
17714 .n(n)
17715 .k(4)
17716 .iterations(1)
17717 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17718 }
17719 }
17720 }
17721
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_eq_4_subtile_m)17722 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4_subtile_m) {
17723 TEST_REQUIRES_X86_SSE;
17724 for (uint32_t m = 1; m <= 1; m++) {
17725 GemmMicrokernelTester()
17726 .mr(1)
17727 .nr(8)
17728 .kr(1)
17729 .sr(4)
17730 .m(m)
17731 .n(8)
17732 .k(4)
17733 .iterations(1)
17734 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17735 }
17736 }
17737
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_eq_4_subtile_n)17738 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4_subtile_n) {
17739 TEST_REQUIRES_X86_SSE;
17740 for (uint32_t n = 1; n <= 8; n++) {
17741 GemmMicrokernelTester()
17742 .mr(1)
17743 .nr(8)
17744 .kr(1)
17745 .sr(4)
17746 .m(1)
17747 .n(n)
17748 .k(4)
17749 .iterations(1)
17750 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17751 }
17752 }
17753
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_lt_4)17754 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_lt_4) {
17755 TEST_REQUIRES_X86_SSE;
17756 for (size_t k = 1; k < 4; k++) {
17757 GemmMicrokernelTester()
17758 .mr(1)
17759 .nr(8)
17760 .kr(1)
17761 .sr(4)
17762 .m(1)
17763 .n(8)
17764 .k(k)
17765 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17766 }
17767 }
17768
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_lt_4_subtile)17769 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_lt_4_subtile) {
17770 TEST_REQUIRES_X86_SSE;
17771 for (size_t k = 1; k < 4; k++) {
17772 for (uint32_t n = 1; n <= 8; n++) {
17773 for (uint32_t m = 1; m <= 1; m++) {
17774 GemmMicrokernelTester()
17775 .mr(1)
17776 .nr(8)
17777 .kr(1)
17778 .sr(4)
17779 .m(m)
17780 .n(n)
17781 .k(k)
17782 .iterations(1)
17783 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17784 }
17785 }
17786 }
17787 }
17788
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_gt_4)17789 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_gt_4) {
17790 TEST_REQUIRES_X86_SSE;
17791 for (size_t k = 5; k < 8; k++) {
17792 GemmMicrokernelTester()
17793 .mr(1)
17794 .nr(8)
17795 .kr(1)
17796 .sr(4)
17797 .m(1)
17798 .n(8)
17799 .k(k)
17800 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17801 }
17802 }
17803
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_gt_4_subtile)17804 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_gt_4_subtile) {
17805 TEST_REQUIRES_X86_SSE;
17806 for (size_t k = 5; k < 8; k++) {
17807 for (uint32_t n = 1; n <= 8; n++) {
17808 for (uint32_t m = 1; m <= 1; m++) {
17809 GemmMicrokernelTester()
17810 .mr(1)
17811 .nr(8)
17812 .kr(1)
17813 .sr(4)
17814 .m(m)
17815 .n(n)
17816 .k(k)
17817 .iterations(1)
17818 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17819 }
17820 }
17821 }
17822 }
17823
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_div_4)17824 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_div_4) {
17825 TEST_REQUIRES_X86_SSE;
17826 for (size_t k = 8; k <= 40; k += 4) {
17827 GemmMicrokernelTester()
17828 .mr(1)
17829 .nr(8)
17830 .kr(1)
17831 .sr(4)
17832 .m(1)
17833 .n(8)
17834 .k(k)
17835 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17836 }
17837 }
17838
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,k_div_4_subtile)17839 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_div_4_subtile) {
17840 TEST_REQUIRES_X86_SSE;
17841 for (size_t k = 8; k <= 40; k += 4) {
17842 for (uint32_t n = 1; n <= 8; n++) {
17843 for (uint32_t m = 1; m <= 1; m++) {
17844 GemmMicrokernelTester()
17845 .mr(1)
17846 .nr(8)
17847 .kr(1)
17848 .sr(4)
17849 .m(m)
17850 .n(n)
17851 .k(k)
17852 .iterations(1)
17853 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17854 }
17855 }
17856 }
17857 }
17858
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_gt_8)17859 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8) {
17860 TEST_REQUIRES_X86_SSE;
17861 for (uint32_t n = 9; n < 16; n++) {
17862 for (size_t k = 1; k <= 20; k += 5) {
17863 GemmMicrokernelTester()
17864 .mr(1)
17865 .nr(8)
17866 .kr(1)
17867 .sr(4)
17868 .m(1)
17869 .n(n)
17870 .k(k)
17871 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17872 }
17873 }
17874 }
17875
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_gt_8_strided_cn)17876 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8_strided_cn) {
17877 TEST_REQUIRES_X86_SSE;
17878 for (uint32_t n = 9; n < 16; n++) {
17879 for (size_t k = 1; k <= 20; k += 5) {
17880 GemmMicrokernelTester()
17881 .mr(1)
17882 .nr(8)
17883 .kr(1)
17884 .sr(4)
17885 .m(1)
17886 .n(n)
17887 .k(k)
17888 .cn_stride(11)
17889 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17890 }
17891 }
17892 }
17893
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_gt_8_subtile)17894 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8_subtile) {
17895 TEST_REQUIRES_X86_SSE;
17896 for (uint32_t n = 9; n < 16; n++) {
17897 for (size_t k = 1; k <= 20; k += 5) {
17898 for (uint32_t m = 1; m <= 1; m++) {
17899 GemmMicrokernelTester()
17900 .mr(1)
17901 .nr(8)
17902 .kr(1)
17903 .sr(4)
17904 .m(m)
17905 .n(n)
17906 .k(k)
17907 .iterations(1)
17908 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17909 }
17910 }
17911 }
17912 }
17913
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_div_8)17914 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8) {
17915 TEST_REQUIRES_X86_SSE;
17916 for (uint32_t n = 16; n <= 24; n += 8) {
17917 for (size_t k = 1; k <= 20; k += 5) {
17918 GemmMicrokernelTester()
17919 .mr(1)
17920 .nr(8)
17921 .kr(1)
17922 .sr(4)
17923 .m(1)
17924 .n(n)
17925 .k(k)
17926 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17927 }
17928 }
17929 }
17930
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_div_8_strided_cn)17931 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8_strided_cn) {
17932 TEST_REQUIRES_X86_SSE;
17933 for (uint32_t n = 16; n <= 24; n += 8) {
17934 for (size_t k = 1; k <= 20; k += 5) {
17935 GemmMicrokernelTester()
17936 .mr(1)
17937 .nr(8)
17938 .kr(1)
17939 .sr(4)
17940 .m(1)
17941 .n(n)
17942 .k(k)
17943 .cn_stride(11)
17944 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17945 }
17946 }
17947 }
17948
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_div_8_subtile)17949 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8_subtile) {
17950 TEST_REQUIRES_X86_SSE;
17951 for (uint32_t n = 16; n <= 24; n += 8) {
17952 for (size_t k = 1; k <= 20; k += 5) {
17953 for (uint32_t m = 1; m <= 1; m++) {
17954 GemmMicrokernelTester()
17955 .mr(1)
17956 .nr(8)
17957 .kr(1)
17958 .sr(4)
17959 .m(m)
17960 .n(n)
17961 .k(k)
17962 .iterations(1)
17963 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17964 }
17965 }
17966 }
17967 }
17968
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,small_kernel)17969 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, small_kernel) {
17970 TEST_REQUIRES_X86_SSE;
17971 for (size_t k = 1; k <= 20; k += 5) {
17972 GemmMicrokernelTester()
17973 .mr(1)
17974 .nr(8)
17975 .kr(1)
17976 .sr(4)
17977 .m(1)
17978 .n(8)
17979 .k(k)
17980 .ks(3)
17981 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
17982 }
17983 }
17984
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,small_kernel_subtile)17985 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, small_kernel_subtile) {
17986 TEST_REQUIRES_X86_SSE;
17987 for (size_t k = 1; k <= 20; k += 5) {
17988 for (uint32_t n = 1; n <= 8; n++) {
17989 for (uint32_t m = 1; m <= 1; m++) {
17990 GemmMicrokernelTester()
17991 .mr(1)
17992 .nr(8)
17993 .kr(1)
17994 .sr(4)
17995 .m(m)
17996 .n(n)
17997 .k(k)
17998 .ks(3)
17999 .iterations(1)
18000 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18001 }
18002 }
18003 }
18004 }
18005
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_gt_8_small_kernel)18006 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8_small_kernel) {
18007 TEST_REQUIRES_X86_SSE;
18008 for (uint32_t n = 9; n < 16; n++) {
18009 for (size_t k = 1; k <= 20; k += 5) {
18010 GemmMicrokernelTester()
18011 .mr(1)
18012 .nr(8)
18013 .kr(1)
18014 .sr(4)
18015 .m(1)
18016 .n(n)
18017 .k(k)
18018 .ks(3)
18019 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18020 }
18021 }
18022 }
18023
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,n_div_8_small_kernel)18024 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8_small_kernel) {
18025 TEST_REQUIRES_X86_SSE;
18026 for (uint32_t n = 16; n <= 24; n += 8) {
18027 for (size_t k = 1; k <= 20; k += 5) {
18028 GemmMicrokernelTester()
18029 .mr(1)
18030 .nr(8)
18031 .kr(1)
18032 .sr(4)
18033 .m(1)
18034 .n(n)
18035 .k(k)
18036 .ks(3)
18037 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18038 }
18039 }
18040 }
18041
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,strided_cm_subtile)18042 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, strided_cm_subtile) {
18043 TEST_REQUIRES_X86_SSE;
18044 for (size_t k = 1; k <= 20; k += 5) {
18045 for (uint32_t n = 1; n <= 8; n++) {
18046 for (uint32_t m = 1; m <= 1; m++) {
18047 GemmMicrokernelTester()
18048 .mr(1)
18049 .nr(8)
18050 .kr(1)
18051 .sr(4)
18052 .m(m)
18053 .n(n)
18054 .k(k)
18055 .cm_stride(11)
18056 .iterations(1)
18057 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18058 }
18059 }
18060 }
18061 }
18062
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,a_offset)18063 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, a_offset) {
18064 TEST_REQUIRES_X86_SSE;
18065 for (size_t k = 1; k <= 20; k += 5) {
18066 GemmMicrokernelTester()
18067 .mr(1)
18068 .nr(8)
18069 .kr(1)
18070 .sr(4)
18071 .m(1)
18072 .n(8)
18073 .k(k)
18074 .ks(3)
18075 .a_offset(23)
18076 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18077 }
18078 }
18079
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,zero)18080 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, zero) {
18081 TEST_REQUIRES_X86_SSE;
18082 for (size_t k = 1; k <= 20; k += 5) {
18083 for (uint32_t mz = 0; mz < 1; mz++) {
18084 GemmMicrokernelTester()
18085 .mr(1)
18086 .nr(8)
18087 .kr(1)
18088 .sr(4)
18089 .m(1)
18090 .n(8)
18091 .k(k)
18092 .ks(3)
18093 .a_offset(23)
18094 .zero_index(mz)
18095 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18096 }
18097 }
18098 }
18099
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,qmin)18100 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, qmin) {
18101 TEST_REQUIRES_X86_SSE;
18102 GemmMicrokernelTester()
18103 .mr(1)
18104 .nr(8)
18105 .kr(1)
18106 .sr(4)
18107 .m(1)
18108 .n(8)
18109 .k(4)
18110 .qmin(128)
18111 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18112 }
18113
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,qmax)18114 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, qmax) {
18115 TEST_REQUIRES_X86_SSE;
18116 GemmMicrokernelTester()
18117 .mr(1)
18118 .nr(8)
18119 .kr(1)
18120 .sr(4)
18121 .m(1)
18122 .n(8)
18123 .k(4)
18124 .qmax(128)
18125 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18126 }
18127
TEST(F32_IGEMM_MINMAX_1X8S4__SSE,strided_cm)18128 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, strided_cm) {
18129 TEST_REQUIRES_X86_SSE;
18130 GemmMicrokernelTester()
18131 .mr(1)
18132 .nr(8)
18133 .kr(1)
18134 .sr(4)
18135 .m(1)
18136 .n(8)
18137 .k(4)
18138 .cm_stride(11)
18139 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
18140 }
18141 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18142
18143
18144 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_eq_4)18145 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4) {
18146 TEST_REQUIRES_X86_SSE2;
18147 GemmMicrokernelTester()
18148 .mr(3)
18149 .nr(8)
18150 .kr(1)
18151 .sr(1)
18152 .m(3)
18153 .n(8)
18154 .k(4)
18155 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18156 }
18157
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,strided_cn)18158 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, strided_cn) {
18159 TEST_REQUIRES_X86_SSE2;
18160 GemmMicrokernelTester()
18161 .mr(3)
18162 .nr(8)
18163 .kr(1)
18164 .sr(1)
18165 .m(3)
18166 .n(8)
18167 .k(4)
18168 .cn_stride(11)
18169 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18170 }
18171
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_eq_4_subtile)18172 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile) {
18173 TEST_REQUIRES_X86_SSE2;
18174 for (uint32_t n = 1; n <= 8; n++) {
18175 for (uint32_t m = 1; m <= 3; m++) {
18176 GemmMicrokernelTester()
18177 .mr(3)
18178 .nr(8)
18179 .kr(1)
18180 .sr(1)
18181 .m(m)
18182 .n(n)
18183 .k(4)
18184 .iterations(1)
18185 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18186 }
18187 }
18188 }
18189
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_eq_4_subtile_m)18190 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_m) {
18191 TEST_REQUIRES_X86_SSE2;
18192 for (uint32_t m = 1; m <= 3; m++) {
18193 GemmMicrokernelTester()
18194 .mr(3)
18195 .nr(8)
18196 .kr(1)
18197 .sr(1)
18198 .m(m)
18199 .n(8)
18200 .k(4)
18201 .iterations(1)
18202 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18203 }
18204 }
18205
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_eq_4_subtile_n)18206 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_n) {
18207 TEST_REQUIRES_X86_SSE2;
18208 for (uint32_t n = 1; n <= 8; n++) {
18209 GemmMicrokernelTester()
18210 .mr(3)
18211 .nr(8)
18212 .kr(1)
18213 .sr(1)
18214 .m(3)
18215 .n(n)
18216 .k(4)
18217 .iterations(1)
18218 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18219 }
18220 }
18221
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_lt_4)18222 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_lt_4) {
18223 TEST_REQUIRES_X86_SSE2;
18224 for (size_t k = 1; k < 4; k++) {
18225 GemmMicrokernelTester()
18226 .mr(3)
18227 .nr(8)
18228 .kr(1)
18229 .sr(1)
18230 .m(3)
18231 .n(8)
18232 .k(k)
18233 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18234 }
18235 }
18236
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_lt_4_subtile)18237 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_lt_4_subtile) {
18238 TEST_REQUIRES_X86_SSE2;
18239 for (size_t k = 1; k < 4; k++) {
18240 for (uint32_t n = 1; n <= 8; n++) {
18241 for (uint32_t m = 1; m <= 3; m++) {
18242 GemmMicrokernelTester()
18243 .mr(3)
18244 .nr(8)
18245 .kr(1)
18246 .sr(1)
18247 .m(m)
18248 .n(n)
18249 .k(k)
18250 .iterations(1)
18251 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18252 }
18253 }
18254 }
18255 }
18256
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_gt_4)18257 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_gt_4) {
18258 TEST_REQUIRES_X86_SSE2;
18259 for (size_t k = 5; k < 8; k++) {
18260 GemmMicrokernelTester()
18261 .mr(3)
18262 .nr(8)
18263 .kr(1)
18264 .sr(1)
18265 .m(3)
18266 .n(8)
18267 .k(k)
18268 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18269 }
18270 }
18271
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_gt_4_subtile)18272 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_gt_4_subtile) {
18273 TEST_REQUIRES_X86_SSE2;
18274 for (size_t k = 5; k < 8; k++) {
18275 for (uint32_t n = 1; n <= 8; n++) {
18276 for (uint32_t m = 1; m <= 3; m++) {
18277 GemmMicrokernelTester()
18278 .mr(3)
18279 .nr(8)
18280 .kr(1)
18281 .sr(1)
18282 .m(m)
18283 .n(n)
18284 .k(k)
18285 .iterations(1)
18286 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18287 }
18288 }
18289 }
18290 }
18291
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_div_4)18292 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_div_4) {
18293 TEST_REQUIRES_X86_SSE2;
18294 for (size_t k = 8; k <= 40; k += 4) {
18295 GemmMicrokernelTester()
18296 .mr(3)
18297 .nr(8)
18298 .kr(1)
18299 .sr(1)
18300 .m(3)
18301 .n(8)
18302 .k(k)
18303 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18304 }
18305 }
18306
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,k_div_4_subtile)18307 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_div_4_subtile) {
18308 TEST_REQUIRES_X86_SSE2;
18309 for (size_t k = 8; k <= 40; k += 4) {
18310 for (uint32_t n = 1; n <= 8; n++) {
18311 for (uint32_t m = 1; m <= 3; m++) {
18312 GemmMicrokernelTester()
18313 .mr(3)
18314 .nr(8)
18315 .kr(1)
18316 .sr(1)
18317 .m(m)
18318 .n(n)
18319 .k(k)
18320 .iterations(1)
18321 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18322 }
18323 }
18324 }
18325 }
18326
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_gt_8)18327 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8) {
18328 TEST_REQUIRES_X86_SSE2;
18329 for (uint32_t n = 9; n < 16; n++) {
18330 for (size_t k = 1; k <= 20; k += 5) {
18331 GemmMicrokernelTester()
18332 .mr(3)
18333 .nr(8)
18334 .kr(1)
18335 .sr(1)
18336 .m(3)
18337 .n(n)
18338 .k(k)
18339 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18340 }
18341 }
18342 }
18343
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_gt_8_strided_cn)18344 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8_strided_cn) {
18345 TEST_REQUIRES_X86_SSE2;
18346 for (uint32_t n = 9; n < 16; n++) {
18347 for (size_t k = 1; k <= 20; k += 5) {
18348 GemmMicrokernelTester()
18349 .mr(3)
18350 .nr(8)
18351 .kr(1)
18352 .sr(1)
18353 .m(3)
18354 .n(n)
18355 .k(k)
18356 .cn_stride(11)
18357 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18358 }
18359 }
18360 }
18361
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_gt_8_subtile)18362 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8_subtile) {
18363 TEST_REQUIRES_X86_SSE2;
18364 for (uint32_t n = 9; n < 16; n++) {
18365 for (size_t k = 1; k <= 20; k += 5) {
18366 for (uint32_t m = 1; m <= 3; m++) {
18367 GemmMicrokernelTester()
18368 .mr(3)
18369 .nr(8)
18370 .kr(1)
18371 .sr(1)
18372 .m(m)
18373 .n(n)
18374 .k(k)
18375 .iterations(1)
18376 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18377 }
18378 }
18379 }
18380 }
18381
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_div_8)18382 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8) {
18383 TEST_REQUIRES_X86_SSE2;
18384 for (uint32_t n = 16; n <= 24; n += 8) {
18385 for (size_t k = 1; k <= 20; k += 5) {
18386 GemmMicrokernelTester()
18387 .mr(3)
18388 .nr(8)
18389 .kr(1)
18390 .sr(1)
18391 .m(3)
18392 .n(n)
18393 .k(k)
18394 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18395 }
18396 }
18397 }
18398
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_div_8_strided_cn)18399 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8_strided_cn) {
18400 TEST_REQUIRES_X86_SSE2;
18401 for (uint32_t n = 16; n <= 24; n += 8) {
18402 for (size_t k = 1; k <= 20; k += 5) {
18403 GemmMicrokernelTester()
18404 .mr(3)
18405 .nr(8)
18406 .kr(1)
18407 .sr(1)
18408 .m(3)
18409 .n(n)
18410 .k(k)
18411 .cn_stride(11)
18412 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18413 }
18414 }
18415 }
18416
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_div_8_subtile)18417 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8_subtile) {
18418 TEST_REQUIRES_X86_SSE2;
18419 for (uint32_t n = 16; n <= 24; n += 8) {
18420 for (size_t k = 1; k <= 20; k += 5) {
18421 for (uint32_t m = 1; m <= 3; m++) {
18422 GemmMicrokernelTester()
18423 .mr(3)
18424 .nr(8)
18425 .kr(1)
18426 .sr(1)
18427 .m(m)
18428 .n(n)
18429 .k(k)
18430 .iterations(1)
18431 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18432 }
18433 }
18434 }
18435 }
18436
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,small_kernel)18437 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, small_kernel) {
18438 TEST_REQUIRES_X86_SSE2;
18439 for (size_t k = 1; k <= 20; k += 5) {
18440 GemmMicrokernelTester()
18441 .mr(3)
18442 .nr(8)
18443 .kr(1)
18444 .sr(1)
18445 .m(3)
18446 .n(8)
18447 .k(k)
18448 .ks(3)
18449 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18450 }
18451 }
18452
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,small_kernel_subtile)18453 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, small_kernel_subtile) {
18454 TEST_REQUIRES_X86_SSE2;
18455 for (size_t k = 1; k <= 20; k += 5) {
18456 for (uint32_t n = 1; n <= 8; n++) {
18457 for (uint32_t m = 1; m <= 3; m++) {
18458 GemmMicrokernelTester()
18459 .mr(3)
18460 .nr(8)
18461 .kr(1)
18462 .sr(1)
18463 .m(m)
18464 .n(n)
18465 .k(k)
18466 .ks(3)
18467 .iterations(1)
18468 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18469 }
18470 }
18471 }
18472 }
18473
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_gt_8_small_kernel)18474 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8_small_kernel) {
18475 TEST_REQUIRES_X86_SSE2;
18476 for (uint32_t n = 9; n < 16; n++) {
18477 for (size_t k = 1; k <= 20; k += 5) {
18478 GemmMicrokernelTester()
18479 .mr(3)
18480 .nr(8)
18481 .kr(1)
18482 .sr(1)
18483 .m(3)
18484 .n(n)
18485 .k(k)
18486 .ks(3)
18487 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18488 }
18489 }
18490 }
18491
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,n_div_8_small_kernel)18492 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8_small_kernel) {
18493 TEST_REQUIRES_X86_SSE2;
18494 for (uint32_t n = 16; n <= 24; n += 8) {
18495 for (size_t k = 1; k <= 20; k += 5) {
18496 GemmMicrokernelTester()
18497 .mr(3)
18498 .nr(8)
18499 .kr(1)
18500 .sr(1)
18501 .m(3)
18502 .n(n)
18503 .k(k)
18504 .ks(3)
18505 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18506 }
18507 }
18508 }
18509
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,strided_cm_subtile)18510 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, strided_cm_subtile) {
18511 TEST_REQUIRES_X86_SSE2;
18512 for (size_t k = 1; k <= 20; k += 5) {
18513 for (uint32_t n = 1; n <= 8; n++) {
18514 for (uint32_t m = 1; m <= 3; m++) {
18515 GemmMicrokernelTester()
18516 .mr(3)
18517 .nr(8)
18518 .kr(1)
18519 .sr(1)
18520 .m(m)
18521 .n(n)
18522 .k(k)
18523 .cm_stride(11)
18524 .iterations(1)
18525 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18526 }
18527 }
18528 }
18529 }
18530
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,a_offset)18531 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, a_offset) {
18532 TEST_REQUIRES_X86_SSE2;
18533 for (size_t k = 1; k <= 20; k += 5) {
18534 GemmMicrokernelTester()
18535 .mr(3)
18536 .nr(8)
18537 .kr(1)
18538 .sr(1)
18539 .m(3)
18540 .n(8)
18541 .k(k)
18542 .ks(3)
18543 .a_offset(67)
18544 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18545 }
18546 }
18547
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,zero)18548 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, zero) {
18549 TEST_REQUIRES_X86_SSE2;
18550 for (size_t k = 1; k <= 20; k += 5) {
18551 for (uint32_t mz = 0; mz < 3; mz++) {
18552 GemmMicrokernelTester()
18553 .mr(3)
18554 .nr(8)
18555 .kr(1)
18556 .sr(1)
18557 .m(3)
18558 .n(8)
18559 .k(k)
18560 .ks(3)
18561 .a_offset(67)
18562 .zero_index(mz)
18563 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18564 }
18565 }
18566 }
18567
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,qmin)18568 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, qmin) {
18569 TEST_REQUIRES_X86_SSE2;
18570 GemmMicrokernelTester()
18571 .mr(3)
18572 .nr(8)
18573 .kr(1)
18574 .sr(1)
18575 .m(3)
18576 .n(8)
18577 .k(4)
18578 .qmin(128)
18579 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18580 }
18581
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,qmax)18582 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, qmax) {
18583 TEST_REQUIRES_X86_SSE2;
18584 GemmMicrokernelTester()
18585 .mr(3)
18586 .nr(8)
18587 .kr(1)
18588 .sr(1)
18589 .m(3)
18590 .n(8)
18591 .k(4)
18592 .qmax(128)
18593 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18594 }
18595
TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP,strided_cm)18596 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, strided_cm) {
18597 TEST_REQUIRES_X86_SSE2;
18598 GemmMicrokernelTester()
18599 .mr(3)
18600 .nr(8)
18601 .kr(1)
18602 .sr(1)
18603 .m(3)
18604 .n(8)
18605 .k(4)
18606 .cm_stride(11)
18607 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18608 }
18609 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18610
18611
18612 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_eq_4)18613 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4) {
18614 TEST_REQUIRES_X86_SSE;
18615 GemmMicrokernelTester()
18616 .mr(4)
18617 .nr(2)
18618 .kr(4)
18619 .sr(1)
18620 .m(4)
18621 .n(2)
18622 .k(4)
18623 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18624 }
18625
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,strided_cn)18626 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, strided_cn) {
18627 TEST_REQUIRES_X86_SSE;
18628 GemmMicrokernelTester()
18629 .mr(4)
18630 .nr(2)
18631 .kr(4)
18632 .sr(1)
18633 .m(4)
18634 .n(2)
18635 .k(4)
18636 .cn_stride(5)
18637 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18638 }
18639
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_eq_4_subtile)18640 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4_subtile) {
18641 TEST_REQUIRES_X86_SSE;
18642 for (uint32_t n = 1; n <= 2; n++) {
18643 for (uint32_t m = 1; m <= 4; m++) {
18644 GemmMicrokernelTester()
18645 .mr(4)
18646 .nr(2)
18647 .kr(4)
18648 .sr(1)
18649 .m(m)
18650 .n(n)
18651 .k(4)
18652 .iterations(1)
18653 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18654 }
18655 }
18656 }
18657
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_eq_4_subtile_m)18658 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4_subtile_m) {
18659 TEST_REQUIRES_X86_SSE;
18660 for (uint32_t m = 1; m <= 4; m++) {
18661 GemmMicrokernelTester()
18662 .mr(4)
18663 .nr(2)
18664 .kr(4)
18665 .sr(1)
18666 .m(m)
18667 .n(2)
18668 .k(4)
18669 .iterations(1)
18670 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18671 }
18672 }
18673
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_eq_4_subtile_n)18674 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4_subtile_n) {
18675 TEST_REQUIRES_X86_SSE;
18676 for (uint32_t n = 1; n <= 2; n++) {
18677 GemmMicrokernelTester()
18678 .mr(4)
18679 .nr(2)
18680 .kr(4)
18681 .sr(1)
18682 .m(4)
18683 .n(n)
18684 .k(4)
18685 .iterations(1)
18686 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18687 }
18688 }
18689
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_lt_4)18690 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_lt_4) {
18691 TEST_REQUIRES_X86_SSE;
18692 for (size_t k = 1; k < 4; k++) {
18693 GemmMicrokernelTester()
18694 .mr(4)
18695 .nr(2)
18696 .kr(4)
18697 .sr(1)
18698 .m(4)
18699 .n(2)
18700 .k(k)
18701 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18702 }
18703 }
18704
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_lt_4_subtile)18705 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_lt_4_subtile) {
18706 TEST_REQUIRES_X86_SSE;
18707 for (size_t k = 1; k < 4; k++) {
18708 for (uint32_t n = 1; n <= 2; n++) {
18709 for (uint32_t m = 1; m <= 4; m++) {
18710 GemmMicrokernelTester()
18711 .mr(4)
18712 .nr(2)
18713 .kr(4)
18714 .sr(1)
18715 .m(m)
18716 .n(n)
18717 .k(k)
18718 .iterations(1)
18719 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18720 }
18721 }
18722 }
18723 }
18724
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_gt_4)18725 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_gt_4) {
18726 TEST_REQUIRES_X86_SSE;
18727 for (size_t k = 5; k < 8; k++) {
18728 GemmMicrokernelTester()
18729 .mr(4)
18730 .nr(2)
18731 .kr(4)
18732 .sr(1)
18733 .m(4)
18734 .n(2)
18735 .k(k)
18736 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18737 }
18738 }
18739
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_gt_4_subtile)18740 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_gt_4_subtile) {
18741 TEST_REQUIRES_X86_SSE;
18742 for (size_t k = 5; k < 8; k++) {
18743 for (uint32_t n = 1; n <= 2; n++) {
18744 for (uint32_t m = 1; m <= 4; m++) {
18745 GemmMicrokernelTester()
18746 .mr(4)
18747 .nr(2)
18748 .kr(4)
18749 .sr(1)
18750 .m(m)
18751 .n(n)
18752 .k(k)
18753 .iterations(1)
18754 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18755 }
18756 }
18757 }
18758 }
18759
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_div_4)18760 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_div_4) {
18761 TEST_REQUIRES_X86_SSE;
18762 for (size_t k = 8; k <= 40; k += 4) {
18763 GemmMicrokernelTester()
18764 .mr(4)
18765 .nr(2)
18766 .kr(4)
18767 .sr(1)
18768 .m(4)
18769 .n(2)
18770 .k(k)
18771 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18772 }
18773 }
18774
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,k_div_4_subtile)18775 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_div_4_subtile) {
18776 TEST_REQUIRES_X86_SSE;
18777 for (size_t k = 8; k <= 40; k += 4) {
18778 for (uint32_t n = 1; n <= 2; n++) {
18779 for (uint32_t m = 1; m <= 4; m++) {
18780 GemmMicrokernelTester()
18781 .mr(4)
18782 .nr(2)
18783 .kr(4)
18784 .sr(1)
18785 .m(m)
18786 .n(n)
18787 .k(k)
18788 .iterations(1)
18789 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18790 }
18791 }
18792 }
18793 }
18794
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_gt_2)18795 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2) {
18796 TEST_REQUIRES_X86_SSE;
18797 for (uint32_t n = 3; n < 4; n++) {
18798 for (size_t k = 1; k <= 20; k += 5) {
18799 GemmMicrokernelTester()
18800 .mr(4)
18801 .nr(2)
18802 .kr(4)
18803 .sr(1)
18804 .m(4)
18805 .n(n)
18806 .k(k)
18807 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18808 }
18809 }
18810 }
18811
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_gt_2_strided_cn)18812 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2_strided_cn) {
18813 TEST_REQUIRES_X86_SSE;
18814 for (uint32_t n = 3; n < 4; n++) {
18815 for (size_t k = 1; k <= 20; k += 5) {
18816 GemmMicrokernelTester()
18817 .mr(4)
18818 .nr(2)
18819 .kr(4)
18820 .sr(1)
18821 .m(4)
18822 .n(n)
18823 .k(k)
18824 .cn_stride(5)
18825 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18826 }
18827 }
18828 }
18829
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_gt_2_subtile)18830 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2_subtile) {
18831 TEST_REQUIRES_X86_SSE;
18832 for (uint32_t n = 3; n < 4; n++) {
18833 for (size_t k = 1; k <= 20; k += 5) {
18834 for (uint32_t m = 1; m <= 4; m++) {
18835 GemmMicrokernelTester()
18836 .mr(4)
18837 .nr(2)
18838 .kr(4)
18839 .sr(1)
18840 .m(m)
18841 .n(n)
18842 .k(k)
18843 .iterations(1)
18844 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18845 }
18846 }
18847 }
18848 }
18849
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_div_2)18850 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2) {
18851 TEST_REQUIRES_X86_SSE;
18852 for (uint32_t n = 4; n <= 6; n += 2) {
18853 for (size_t k = 1; k <= 20; k += 5) {
18854 GemmMicrokernelTester()
18855 .mr(4)
18856 .nr(2)
18857 .kr(4)
18858 .sr(1)
18859 .m(4)
18860 .n(n)
18861 .k(k)
18862 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18863 }
18864 }
18865 }
18866
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_div_2_strided_cn)18867 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2_strided_cn) {
18868 TEST_REQUIRES_X86_SSE;
18869 for (uint32_t n = 4; n <= 6; n += 2) {
18870 for (size_t k = 1; k <= 20; k += 5) {
18871 GemmMicrokernelTester()
18872 .mr(4)
18873 .nr(2)
18874 .kr(4)
18875 .sr(1)
18876 .m(4)
18877 .n(n)
18878 .k(k)
18879 .cn_stride(5)
18880 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18881 }
18882 }
18883 }
18884
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_div_2_subtile)18885 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2_subtile) {
18886 TEST_REQUIRES_X86_SSE;
18887 for (uint32_t n = 4; n <= 6; n += 2) {
18888 for (size_t k = 1; k <= 20; k += 5) {
18889 for (uint32_t m = 1; m <= 4; m++) {
18890 GemmMicrokernelTester()
18891 .mr(4)
18892 .nr(2)
18893 .kr(4)
18894 .sr(1)
18895 .m(m)
18896 .n(n)
18897 .k(k)
18898 .iterations(1)
18899 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18900 }
18901 }
18902 }
18903 }
18904
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,small_kernel)18905 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, small_kernel) {
18906 TEST_REQUIRES_X86_SSE;
18907 for (size_t k = 1; k <= 20; k += 5) {
18908 GemmMicrokernelTester()
18909 .mr(4)
18910 .nr(2)
18911 .kr(4)
18912 .sr(1)
18913 .m(4)
18914 .n(2)
18915 .k(k)
18916 .ks(3)
18917 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18918 }
18919 }
18920
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,small_kernel_subtile)18921 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, small_kernel_subtile) {
18922 TEST_REQUIRES_X86_SSE;
18923 for (size_t k = 1; k <= 20; k += 5) {
18924 for (uint32_t n = 1; n <= 2; n++) {
18925 for (uint32_t m = 1; m <= 4; m++) {
18926 GemmMicrokernelTester()
18927 .mr(4)
18928 .nr(2)
18929 .kr(4)
18930 .sr(1)
18931 .m(m)
18932 .n(n)
18933 .k(k)
18934 .ks(3)
18935 .iterations(1)
18936 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18937 }
18938 }
18939 }
18940 }
18941
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_gt_2_small_kernel)18942 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2_small_kernel) {
18943 TEST_REQUIRES_X86_SSE;
18944 for (uint32_t n = 3; n < 4; n++) {
18945 for (size_t k = 1; k <= 20; k += 5) {
18946 GemmMicrokernelTester()
18947 .mr(4)
18948 .nr(2)
18949 .kr(4)
18950 .sr(1)
18951 .m(4)
18952 .n(n)
18953 .k(k)
18954 .ks(3)
18955 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18956 }
18957 }
18958 }
18959
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,n_div_2_small_kernel)18960 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2_small_kernel) {
18961 TEST_REQUIRES_X86_SSE;
18962 for (uint32_t n = 4; n <= 6; n += 2) {
18963 for (size_t k = 1; k <= 20; k += 5) {
18964 GemmMicrokernelTester()
18965 .mr(4)
18966 .nr(2)
18967 .kr(4)
18968 .sr(1)
18969 .m(4)
18970 .n(n)
18971 .k(k)
18972 .ks(3)
18973 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18974 }
18975 }
18976 }
18977
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,strided_cm_subtile)18978 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, strided_cm_subtile) {
18979 TEST_REQUIRES_X86_SSE;
18980 for (size_t k = 1; k <= 20; k += 5) {
18981 for (uint32_t n = 1; n <= 2; n++) {
18982 for (uint32_t m = 1; m <= 4; m++) {
18983 GemmMicrokernelTester()
18984 .mr(4)
18985 .nr(2)
18986 .kr(4)
18987 .sr(1)
18988 .m(m)
18989 .n(n)
18990 .k(k)
18991 .cm_stride(5)
18992 .iterations(1)
18993 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
18994 }
18995 }
18996 }
18997 }
18998
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,a_offset)18999 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, a_offset) {
19000 TEST_REQUIRES_X86_SSE;
19001 for (size_t k = 1; k <= 20; k += 5) {
19002 GemmMicrokernelTester()
19003 .mr(4)
19004 .nr(2)
19005 .kr(4)
19006 .sr(1)
19007 .m(4)
19008 .n(2)
19009 .k(k)
19010 .ks(3)
19011 .a_offset(83)
19012 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
19013 }
19014 }
19015
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,zero)19016 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, zero) {
19017 TEST_REQUIRES_X86_SSE;
19018 for (size_t k = 1; k <= 20; k += 5) {
19019 for (uint32_t mz = 0; mz < 4; mz++) {
19020 GemmMicrokernelTester()
19021 .mr(4)
19022 .nr(2)
19023 .kr(4)
19024 .sr(1)
19025 .m(4)
19026 .n(2)
19027 .k(k)
19028 .ks(3)
19029 .a_offset(83)
19030 .zero_index(mz)
19031 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
19032 }
19033 }
19034 }
19035
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,qmin)19036 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, qmin) {
19037 TEST_REQUIRES_X86_SSE;
19038 GemmMicrokernelTester()
19039 .mr(4)
19040 .nr(2)
19041 .kr(4)
19042 .sr(1)
19043 .m(4)
19044 .n(2)
19045 .k(4)
19046 .qmin(128)
19047 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
19048 }
19049
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,qmax)19050 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, qmax) {
19051 TEST_REQUIRES_X86_SSE;
19052 GemmMicrokernelTester()
19053 .mr(4)
19054 .nr(2)
19055 .kr(4)
19056 .sr(1)
19057 .m(4)
19058 .n(2)
19059 .k(4)
19060 .qmax(128)
19061 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
19062 }
19063
TEST(F32_IGEMM_MINMAX_4X2C4__SSE,strided_cm)19064 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, strided_cm) {
19065 TEST_REQUIRES_X86_SSE;
19066 GemmMicrokernelTester()
19067 .mr(4)
19068 .nr(2)
19069 .kr(4)
19070 .sr(1)
19071 .m(4)
19072 .n(2)
19073 .k(4)
19074 .cm_stride(5)
19075 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
19076 }
19077 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19078
19079
19080 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_eq_4)19081 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4) {
19082 TEST_REQUIRES_X86_SSE;
19083 GemmMicrokernelTester()
19084 .mr(4)
19085 .nr(8)
19086 .kr(1)
19087 .sr(1)
19088 .m(4)
19089 .n(8)
19090 .k(4)
19091 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19092 }
19093
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,strided_cn)19094 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, strided_cn) {
19095 TEST_REQUIRES_X86_SSE;
19096 GemmMicrokernelTester()
19097 .mr(4)
19098 .nr(8)
19099 .kr(1)
19100 .sr(1)
19101 .m(4)
19102 .n(8)
19103 .k(4)
19104 .cn_stride(11)
19105 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19106 }
19107
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_eq_4_subtile)19108 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4_subtile) {
19109 TEST_REQUIRES_X86_SSE;
19110 for (uint32_t n = 1; n <= 8; n++) {
19111 for (uint32_t m = 1; m <= 4; m++) {
19112 GemmMicrokernelTester()
19113 .mr(4)
19114 .nr(8)
19115 .kr(1)
19116 .sr(1)
19117 .m(m)
19118 .n(n)
19119 .k(4)
19120 .iterations(1)
19121 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19122 }
19123 }
19124 }
19125
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_eq_4_subtile_m)19126 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_m) {
19127 TEST_REQUIRES_X86_SSE;
19128 for (uint32_t m = 1; m <= 4; m++) {
19129 GemmMicrokernelTester()
19130 .mr(4)
19131 .nr(8)
19132 .kr(1)
19133 .sr(1)
19134 .m(m)
19135 .n(8)
19136 .k(4)
19137 .iterations(1)
19138 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19139 }
19140 }
19141
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_eq_4_subtile_n)19142 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_n) {
19143 TEST_REQUIRES_X86_SSE;
19144 for (uint32_t n = 1; n <= 8; n++) {
19145 GemmMicrokernelTester()
19146 .mr(4)
19147 .nr(8)
19148 .kr(1)
19149 .sr(1)
19150 .m(4)
19151 .n(n)
19152 .k(4)
19153 .iterations(1)
19154 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19155 }
19156 }
19157
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_lt_4)19158 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_lt_4) {
19159 TEST_REQUIRES_X86_SSE;
19160 for (size_t k = 1; k < 4; k++) {
19161 GemmMicrokernelTester()
19162 .mr(4)
19163 .nr(8)
19164 .kr(1)
19165 .sr(1)
19166 .m(4)
19167 .n(8)
19168 .k(k)
19169 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19170 }
19171 }
19172
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_lt_4_subtile)19173 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_lt_4_subtile) {
19174 TEST_REQUIRES_X86_SSE;
19175 for (size_t k = 1; k < 4; k++) {
19176 for (uint32_t n = 1; n <= 8; n++) {
19177 for (uint32_t m = 1; m <= 4; m++) {
19178 GemmMicrokernelTester()
19179 .mr(4)
19180 .nr(8)
19181 .kr(1)
19182 .sr(1)
19183 .m(m)
19184 .n(n)
19185 .k(k)
19186 .iterations(1)
19187 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19188 }
19189 }
19190 }
19191 }
19192
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_gt_4)19193 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_gt_4) {
19194 TEST_REQUIRES_X86_SSE;
19195 for (size_t k = 5; k < 8; k++) {
19196 GemmMicrokernelTester()
19197 .mr(4)
19198 .nr(8)
19199 .kr(1)
19200 .sr(1)
19201 .m(4)
19202 .n(8)
19203 .k(k)
19204 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19205 }
19206 }
19207
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_gt_4_subtile)19208 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_gt_4_subtile) {
19209 TEST_REQUIRES_X86_SSE;
19210 for (size_t k = 5; k < 8; k++) {
19211 for (uint32_t n = 1; n <= 8; n++) {
19212 for (uint32_t m = 1; m <= 4; m++) {
19213 GemmMicrokernelTester()
19214 .mr(4)
19215 .nr(8)
19216 .kr(1)
19217 .sr(1)
19218 .m(m)
19219 .n(n)
19220 .k(k)
19221 .iterations(1)
19222 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19223 }
19224 }
19225 }
19226 }
19227
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_div_4)19228 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_div_4) {
19229 TEST_REQUIRES_X86_SSE;
19230 for (size_t k = 8; k <= 40; k += 4) {
19231 GemmMicrokernelTester()
19232 .mr(4)
19233 .nr(8)
19234 .kr(1)
19235 .sr(1)
19236 .m(4)
19237 .n(8)
19238 .k(k)
19239 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19240 }
19241 }
19242
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,k_div_4_subtile)19243 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_div_4_subtile) {
19244 TEST_REQUIRES_X86_SSE;
19245 for (size_t k = 8; k <= 40; k += 4) {
19246 for (uint32_t n = 1; n <= 8; n++) {
19247 for (uint32_t m = 1; m <= 4; m++) {
19248 GemmMicrokernelTester()
19249 .mr(4)
19250 .nr(8)
19251 .kr(1)
19252 .sr(1)
19253 .m(m)
19254 .n(n)
19255 .k(k)
19256 .iterations(1)
19257 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19258 }
19259 }
19260 }
19261 }
19262
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_gt_8)19263 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8) {
19264 TEST_REQUIRES_X86_SSE;
19265 for (uint32_t n = 9; n < 16; n++) {
19266 for (size_t k = 1; k <= 20; k += 5) {
19267 GemmMicrokernelTester()
19268 .mr(4)
19269 .nr(8)
19270 .kr(1)
19271 .sr(1)
19272 .m(4)
19273 .n(n)
19274 .k(k)
19275 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19276 }
19277 }
19278 }
19279
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_gt_8_strided_cn)19280 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8_strided_cn) {
19281 TEST_REQUIRES_X86_SSE;
19282 for (uint32_t n = 9; n < 16; n++) {
19283 for (size_t k = 1; k <= 20; k += 5) {
19284 GemmMicrokernelTester()
19285 .mr(4)
19286 .nr(8)
19287 .kr(1)
19288 .sr(1)
19289 .m(4)
19290 .n(n)
19291 .k(k)
19292 .cn_stride(11)
19293 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19294 }
19295 }
19296 }
19297
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_gt_8_subtile)19298 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8_subtile) {
19299 TEST_REQUIRES_X86_SSE;
19300 for (uint32_t n = 9; n < 16; n++) {
19301 for (size_t k = 1; k <= 20; k += 5) {
19302 for (uint32_t m = 1; m <= 4; m++) {
19303 GemmMicrokernelTester()
19304 .mr(4)
19305 .nr(8)
19306 .kr(1)
19307 .sr(1)
19308 .m(m)
19309 .n(n)
19310 .k(k)
19311 .iterations(1)
19312 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19313 }
19314 }
19315 }
19316 }
19317
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_div_8)19318 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8) {
19319 TEST_REQUIRES_X86_SSE;
19320 for (uint32_t n = 16; n <= 24; n += 8) {
19321 for (size_t k = 1; k <= 20; k += 5) {
19322 GemmMicrokernelTester()
19323 .mr(4)
19324 .nr(8)
19325 .kr(1)
19326 .sr(1)
19327 .m(4)
19328 .n(n)
19329 .k(k)
19330 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19331 }
19332 }
19333 }
19334
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_div_8_strided_cn)19335 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8_strided_cn) {
19336 TEST_REQUIRES_X86_SSE;
19337 for (uint32_t n = 16; n <= 24; n += 8) {
19338 for (size_t k = 1; k <= 20; k += 5) {
19339 GemmMicrokernelTester()
19340 .mr(4)
19341 .nr(8)
19342 .kr(1)
19343 .sr(1)
19344 .m(4)
19345 .n(n)
19346 .k(k)
19347 .cn_stride(11)
19348 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19349 }
19350 }
19351 }
19352
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_div_8_subtile)19353 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8_subtile) {
19354 TEST_REQUIRES_X86_SSE;
19355 for (uint32_t n = 16; n <= 24; n += 8) {
19356 for (size_t k = 1; k <= 20; k += 5) {
19357 for (uint32_t m = 1; m <= 4; m++) {
19358 GemmMicrokernelTester()
19359 .mr(4)
19360 .nr(8)
19361 .kr(1)
19362 .sr(1)
19363 .m(m)
19364 .n(n)
19365 .k(k)
19366 .iterations(1)
19367 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19368 }
19369 }
19370 }
19371 }
19372
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,small_kernel)19373 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, small_kernel) {
19374 TEST_REQUIRES_X86_SSE;
19375 for (size_t k = 1; k <= 20; k += 5) {
19376 GemmMicrokernelTester()
19377 .mr(4)
19378 .nr(8)
19379 .kr(1)
19380 .sr(1)
19381 .m(4)
19382 .n(8)
19383 .k(k)
19384 .ks(3)
19385 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19386 }
19387 }
19388
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,small_kernel_subtile)19389 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, small_kernel_subtile) {
19390 TEST_REQUIRES_X86_SSE;
19391 for (size_t k = 1; k <= 20; k += 5) {
19392 for (uint32_t n = 1; n <= 8; n++) {
19393 for (uint32_t m = 1; m <= 4; m++) {
19394 GemmMicrokernelTester()
19395 .mr(4)
19396 .nr(8)
19397 .kr(1)
19398 .sr(1)
19399 .m(m)
19400 .n(n)
19401 .k(k)
19402 .ks(3)
19403 .iterations(1)
19404 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19405 }
19406 }
19407 }
19408 }
19409
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_gt_8_small_kernel)19410 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8_small_kernel) {
19411 TEST_REQUIRES_X86_SSE;
19412 for (uint32_t n = 9; n < 16; n++) {
19413 for (size_t k = 1; k <= 20; k += 5) {
19414 GemmMicrokernelTester()
19415 .mr(4)
19416 .nr(8)
19417 .kr(1)
19418 .sr(1)
19419 .m(4)
19420 .n(n)
19421 .k(k)
19422 .ks(3)
19423 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19424 }
19425 }
19426 }
19427
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,n_div_8_small_kernel)19428 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8_small_kernel) {
19429 TEST_REQUIRES_X86_SSE;
19430 for (uint32_t n = 16; n <= 24; n += 8) {
19431 for (size_t k = 1; k <= 20; k += 5) {
19432 GemmMicrokernelTester()
19433 .mr(4)
19434 .nr(8)
19435 .kr(1)
19436 .sr(1)
19437 .m(4)
19438 .n(n)
19439 .k(k)
19440 .ks(3)
19441 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19442 }
19443 }
19444 }
19445
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,strided_cm_subtile)19446 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, strided_cm_subtile) {
19447 TEST_REQUIRES_X86_SSE;
19448 for (size_t k = 1; k <= 20; k += 5) {
19449 for (uint32_t n = 1; n <= 8; n++) {
19450 for (uint32_t m = 1; m <= 4; m++) {
19451 GemmMicrokernelTester()
19452 .mr(4)
19453 .nr(8)
19454 .kr(1)
19455 .sr(1)
19456 .m(m)
19457 .n(n)
19458 .k(k)
19459 .cm_stride(11)
19460 .iterations(1)
19461 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19462 }
19463 }
19464 }
19465 }
19466
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,a_offset)19467 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, a_offset) {
19468 TEST_REQUIRES_X86_SSE;
19469 for (size_t k = 1; k <= 20; k += 5) {
19470 GemmMicrokernelTester()
19471 .mr(4)
19472 .nr(8)
19473 .kr(1)
19474 .sr(1)
19475 .m(4)
19476 .n(8)
19477 .k(k)
19478 .ks(3)
19479 .a_offset(83)
19480 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19481 }
19482 }
19483
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,zero)19484 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, zero) {
19485 TEST_REQUIRES_X86_SSE;
19486 for (size_t k = 1; k <= 20; k += 5) {
19487 for (uint32_t mz = 0; mz < 4; mz++) {
19488 GemmMicrokernelTester()
19489 .mr(4)
19490 .nr(8)
19491 .kr(1)
19492 .sr(1)
19493 .m(4)
19494 .n(8)
19495 .k(k)
19496 .ks(3)
19497 .a_offset(83)
19498 .zero_index(mz)
19499 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19500 }
19501 }
19502 }
19503
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,qmin)19504 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, qmin) {
19505 TEST_REQUIRES_X86_SSE;
19506 GemmMicrokernelTester()
19507 .mr(4)
19508 .nr(8)
19509 .kr(1)
19510 .sr(1)
19511 .m(4)
19512 .n(8)
19513 .k(4)
19514 .qmin(128)
19515 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19516 }
19517
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,qmax)19518 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, qmax) {
19519 TEST_REQUIRES_X86_SSE;
19520 GemmMicrokernelTester()
19521 .mr(4)
19522 .nr(8)
19523 .kr(1)
19524 .sr(1)
19525 .m(4)
19526 .n(8)
19527 .k(4)
19528 .qmax(128)
19529 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19530 }
19531
TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP,strided_cm)19532 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, strided_cm) {
19533 TEST_REQUIRES_X86_SSE;
19534 GemmMicrokernelTester()
19535 .mr(4)
19536 .nr(8)
19537 .kr(1)
19538 .sr(1)
19539 .m(4)
19540 .n(8)
19541 .k(4)
19542 .cm_stride(11)
19543 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
19544 }
19545 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19546
19547
19548 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_eq_4)19549 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4) {
19550 TEST_REQUIRES_X86_SSE2;
19551 GemmMicrokernelTester()
19552 .mr(4)
19553 .nr(8)
19554 .kr(1)
19555 .sr(1)
19556 .m(4)
19557 .n(8)
19558 .k(4)
19559 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19560 }
19561
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,strided_cn)19562 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, strided_cn) {
19563 TEST_REQUIRES_X86_SSE2;
19564 GemmMicrokernelTester()
19565 .mr(4)
19566 .nr(8)
19567 .kr(1)
19568 .sr(1)
19569 .m(4)
19570 .n(8)
19571 .k(4)
19572 .cn_stride(11)
19573 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19574 }
19575
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_eq_4_subtile)19576 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile) {
19577 TEST_REQUIRES_X86_SSE2;
19578 for (uint32_t n = 1; n <= 8; n++) {
19579 for (uint32_t m = 1; m <= 4; m++) {
19580 GemmMicrokernelTester()
19581 .mr(4)
19582 .nr(8)
19583 .kr(1)
19584 .sr(1)
19585 .m(m)
19586 .n(n)
19587 .k(4)
19588 .iterations(1)
19589 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19590 }
19591 }
19592 }
19593
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_eq_4_subtile_m)19594 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_m) {
19595 TEST_REQUIRES_X86_SSE2;
19596 for (uint32_t m = 1; m <= 4; m++) {
19597 GemmMicrokernelTester()
19598 .mr(4)
19599 .nr(8)
19600 .kr(1)
19601 .sr(1)
19602 .m(m)
19603 .n(8)
19604 .k(4)
19605 .iterations(1)
19606 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19607 }
19608 }
19609
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_eq_4_subtile_n)19610 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_n) {
19611 TEST_REQUIRES_X86_SSE2;
19612 for (uint32_t n = 1; n <= 8; n++) {
19613 GemmMicrokernelTester()
19614 .mr(4)
19615 .nr(8)
19616 .kr(1)
19617 .sr(1)
19618 .m(4)
19619 .n(n)
19620 .k(4)
19621 .iterations(1)
19622 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19623 }
19624 }
19625
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_lt_4)19626 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_lt_4) {
19627 TEST_REQUIRES_X86_SSE2;
19628 for (size_t k = 1; k < 4; k++) {
19629 GemmMicrokernelTester()
19630 .mr(4)
19631 .nr(8)
19632 .kr(1)
19633 .sr(1)
19634 .m(4)
19635 .n(8)
19636 .k(k)
19637 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19638 }
19639 }
19640
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_lt_4_subtile)19641 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_lt_4_subtile) {
19642 TEST_REQUIRES_X86_SSE2;
19643 for (size_t k = 1; k < 4; k++) {
19644 for (uint32_t n = 1; n <= 8; n++) {
19645 for (uint32_t m = 1; m <= 4; m++) {
19646 GemmMicrokernelTester()
19647 .mr(4)
19648 .nr(8)
19649 .kr(1)
19650 .sr(1)
19651 .m(m)
19652 .n(n)
19653 .k(k)
19654 .iterations(1)
19655 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19656 }
19657 }
19658 }
19659 }
19660
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_gt_4)19661 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_gt_4) {
19662 TEST_REQUIRES_X86_SSE2;
19663 for (size_t k = 5; k < 8; k++) {
19664 GemmMicrokernelTester()
19665 .mr(4)
19666 .nr(8)
19667 .kr(1)
19668 .sr(1)
19669 .m(4)
19670 .n(8)
19671 .k(k)
19672 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19673 }
19674 }
19675
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_gt_4_subtile)19676 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_gt_4_subtile) {
19677 TEST_REQUIRES_X86_SSE2;
19678 for (size_t k = 5; k < 8; k++) {
19679 for (uint32_t n = 1; n <= 8; n++) {
19680 for (uint32_t m = 1; m <= 4; m++) {
19681 GemmMicrokernelTester()
19682 .mr(4)
19683 .nr(8)
19684 .kr(1)
19685 .sr(1)
19686 .m(m)
19687 .n(n)
19688 .k(k)
19689 .iterations(1)
19690 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19691 }
19692 }
19693 }
19694 }
19695
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_div_4)19696 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_div_4) {
19697 TEST_REQUIRES_X86_SSE2;
19698 for (size_t k = 8; k <= 40; k += 4) {
19699 GemmMicrokernelTester()
19700 .mr(4)
19701 .nr(8)
19702 .kr(1)
19703 .sr(1)
19704 .m(4)
19705 .n(8)
19706 .k(k)
19707 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19708 }
19709 }
19710
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,k_div_4_subtile)19711 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_div_4_subtile) {
19712 TEST_REQUIRES_X86_SSE2;
19713 for (size_t k = 8; k <= 40; k += 4) {
19714 for (uint32_t n = 1; n <= 8; n++) {
19715 for (uint32_t m = 1; m <= 4; m++) {
19716 GemmMicrokernelTester()
19717 .mr(4)
19718 .nr(8)
19719 .kr(1)
19720 .sr(1)
19721 .m(m)
19722 .n(n)
19723 .k(k)
19724 .iterations(1)
19725 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19726 }
19727 }
19728 }
19729 }
19730
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_gt_8)19731 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8) {
19732 TEST_REQUIRES_X86_SSE2;
19733 for (uint32_t n = 9; n < 16; n++) {
19734 for (size_t k = 1; k <= 20; k += 5) {
19735 GemmMicrokernelTester()
19736 .mr(4)
19737 .nr(8)
19738 .kr(1)
19739 .sr(1)
19740 .m(4)
19741 .n(n)
19742 .k(k)
19743 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19744 }
19745 }
19746 }
19747
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_gt_8_strided_cn)19748 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8_strided_cn) {
19749 TEST_REQUIRES_X86_SSE2;
19750 for (uint32_t n = 9; n < 16; n++) {
19751 for (size_t k = 1; k <= 20; k += 5) {
19752 GemmMicrokernelTester()
19753 .mr(4)
19754 .nr(8)
19755 .kr(1)
19756 .sr(1)
19757 .m(4)
19758 .n(n)
19759 .k(k)
19760 .cn_stride(11)
19761 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19762 }
19763 }
19764 }
19765
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_gt_8_subtile)19766 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8_subtile) {
19767 TEST_REQUIRES_X86_SSE2;
19768 for (uint32_t n = 9; n < 16; n++) {
19769 for (size_t k = 1; k <= 20; k += 5) {
19770 for (uint32_t m = 1; m <= 4; m++) {
19771 GemmMicrokernelTester()
19772 .mr(4)
19773 .nr(8)
19774 .kr(1)
19775 .sr(1)
19776 .m(m)
19777 .n(n)
19778 .k(k)
19779 .iterations(1)
19780 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19781 }
19782 }
19783 }
19784 }
19785
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_div_8)19786 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8) {
19787 TEST_REQUIRES_X86_SSE2;
19788 for (uint32_t n = 16; n <= 24; n += 8) {
19789 for (size_t k = 1; k <= 20; k += 5) {
19790 GemmMicrokernelTester()
19791 .mr(4)
19792 .nr(8)
19793 .kr(1)
19794 .sr(1)
19795 .m(4)
19796 .n(n)
19797 .k(k)
19798 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19799 }
19800 }
19801 }
19802
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_div_8_strided_cn)19803 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8_strided_cn) {
19804 TEST_REQUIRES_X86_SSE2;
19805 for (uint32_t n = 16; n <= 24; n += 8) {
19806 for (size_t k = 1; k <= 20; k += 5) {
19807 GemmMicrokernelTester()
19808 .mr(4)
19809 .nr(8)
19810 .kr(1)
19811 .sr(1)
19812 .m(4)
19813 .n(n)
19814 .k(k)
19815 .cn_stride(11)
19816 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19817 }
19818 }
19819 }
19820
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_div_8_subtile)19821 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8_subtile) {
19822 TEST_REQUIRES_X86_SSE2;
19823 for (uint32_t n = 16; n <= 24; n += 8) {
19824 for (size_t k = 1; k <= 20; k += 5) {
19825 for (uint32_t m = 1; m <= 4; m++) {
19826 GemmMicrokernelTester()
19827 .mr(4)
19828 .nr(8)
19829 .kr(1)
19830 .sr(1)
19831 .m(m)
19832 .n(n)
19833 .k(k)
19834 .iterations(1)
19835 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19836 }
19837 }
19838 }
19839 }
19840
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,small_kernel)19841 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, small_kernel) {
19842 TEST_REQUIRES_X86_SSE2;
19843 for (size_t k = 1; k <= 20; k += 5) {
19844 GemmMicrokernelTester()
19845 .mr(4)
19846 .nr(8)
19847 .kr(1)
19848 .sr(1)
19849 .m(4)
19850 .n(8)
19851 .k(k)
19852 .ks(3)
19853 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19854 }
19855 }
19856
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,small_kernel_subtile)19857 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, small_kernel_subtile) {
19858 TEST_REQUIRES_X86_SSE2;
19859 for (size_t k = 1; k <= 20; k += 5) {
19860 for (uint32_t n = 1; n <= 8; n++) {
19861 for (uint32_t m = 1; m <= 4; m++) {
19862 GemmMicrokernelTester()
19863 .mr(4)
19864 .nr(8)
19865 .kr(1)
19866 .sr(1)
19867 .m(m)
19868 .n(n)
19869 .k(k)
19870 .ks(3)
19871 .iterations(1)
19872 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19873 }
19874 }
19875 }
19876 }
19877
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_gt_8_small_kernel)19878 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8_small_kernel) {
19879 TEST_REQUIRES_X86_SSE2;
19880 for (uint32_t n = 9; n < 16; n++) {
19881 for (size_t k = 1; k <= 20; k += 5) {
19882 GemmMicrokernelTester()
19883 .mr(4)
19884 .nr(8)
19885 .kr(1)
19886 .sr(1)
19887 .m(4)
19888 .n(n)
19889 .k(k)
19890 .ks(3)
19891 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19892 }
19893 }
19894 }
19895
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,n_div_8_small_kernel)19896 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8_small_kernel) {
19897 TEST_REQUIRES_X86_SSE2;
19898 for (uint32_t n = 16; n <= 24; n += 8) {
19899 for (size_t k = 1; k <= 20; k += 5) {
19900 GemmMicrokernelTester()
19901 .mr(4)
19902 .nr(8)
19903 .kr(1)
19904 .sr(1)
19905 .m(4)
19906 .n(n)
19907 .k(k)
19908 .ks(3)
19909 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19910 }
19911 }
19912 }
19913
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,strided_cm_subtile)19914 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, strided_cm_subtile) {
19915 TEST_REQUIRES_X86_SSE2;
19916 for (size_t k = 1; k <= 20; k += 5) {
19917 for (uint32_t n = 1; n <= 8; n++) {
19918 for (uint32_t m = 1; m <= 4; m++) {
19919 GemmMicrokernelTester()
19920 .mr(4)
19921 .nr(8)
19922 .kr(1)
19923 .sr(1)
19924 .m(m)
19925 .n(n)
19926 .k(k)
19927 .cm_stride(11)
19928 .iterations(1)
19929 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19930 }
19931 }
19932 }
19933 }
19934
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,a_offset)19935 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, a_offset) {
19936 TEST_REQUIRES_X86_SSE2;
19937 for (size_t k = 1; k <= 20; k += 5) {
19938 GemmMicrokernelTester()
19939 .mr(4)
19940 .nr(8)
19941 .kr(1)
19942 .sr(1)
19943 .m(4)
19944 .n(8)
19945 .k(k)
19946 .ks(3)
19947 .a_offset(83)
19948 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19949 }
19950 }
19951
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,zero)19952 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, zero) {
19953 TEST_REQUIRES_X86_SSE2;
19954 for (size_t k = 1; k <= 20; k += 5) {
19955 for (uint32_t mz = 0; mz < 4; mz++) {
19956 GemmMicrokernelTester()
19957 .mr(4)
19958 .nr(8)
19959 .kr(1)
19960 .sr(1)
19961 .m(4)
19962 .n(8)
19963 .k(k)
19964 .ks(3)
19965 .a_offset(83)
19966 .zero_index(mz)
19967 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19968 }
19969 }
19970 }
19971
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,qmin)19972 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, qmin) {
19973 TEST_REQUIRES_X86_SSE2;
19974 GemmMicrokernelTester()
19975 .mr(4)
19976 .nr(8)
19977 .kr(1)
19978 .sr(1)
19979 .m(4)
19980 .n(8)
19981 .k(4)
19982 .qmin(128)
19983 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19984 }
19985
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,qmax)19986 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, qmax) {
19987 TEST_REQUIRES_X86_SSE2;
19988 GemmMicrokernelTester()
19989 .mr(4)
19990 .nr(8)
19991 .kr(1)
19992 .sr(1)
19993 .m(4)
19994 .n(8)
19995 .k(4)
19996 .qmax(128)
19997 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
19998 }
19999
TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP,strided_cm)20000 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, strided_cm) {
20001 TEST_REQUIRES_X86_SSE2;
20002 GemmMicrokernelTester()
20003 .mr(4)
20004 .nr(8)
20005 .kr(1)
20006 .sr(1)
20007 .m(4)
20008 .n(8)
20009 .k(4)
20010 .cm_stride(11)
20011 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
20012 }
20013 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20014
20015
20016 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_eq_4)20017 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4) {
20018 TEST_REQUIRES_X86_SSE;
20019 GemmMicrokernelTester()
20020 .mr(4)
20021 .nr(8)
20022 .kr(1)
20023 .sr(4)
20024 .m(4)
20025 .n(8)
20026 .k(4)
20027 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20028 }
20029
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,strided_cn)20030 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, strided_cn) {
20031 TEST_REQUIRES_X86_SSE;
20032 GemmMicrokernelTester()
20033 .mr(4)
20034 .nr(8)
20035 .kr(1)
20036 .sr(4)
20037 .m(4)
20038 .n(8)
20039 .k(4)
20040 .cn_stride(11)
20041 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20042 }
20043
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_eq_4_subtile)20044 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4_subtile) {
20045 TEST_REQUIRES_X86_SSE;
20046 for (uint32_t n = 1; n <= 8; n++) {
20047 for (uint32_t m = 1; m <= 4; m++) {
20048 GemmMicrokernelTester()
20049 .mr(4)
20050 .nr(8)
20051 .kr(1)
20052 .sr(4)
20053 .m(m)
20054 .n(n)
20055 .k(4)
20056 .iterations(1)
20057 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20058 }
20059 }
20060 }
20061
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_eq_4_subtile_m)20062 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4_subtile_m) {
20063 TEST_REQUIRES_X86_SSE;
20064 for (uint32_t m = 1; m <= 4; m++) {
20065 GemmMicrokernelTester()
20066 .mr(4)
20067 .nr(8)
20068 .kr(1)
20069 .sr(4)
20070 .m(m)
20071 .n(8)
20072 .k(4)
20073 .iterations(1)
20074 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20075 }
20076 }
20077
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_eq_4_subtile_n)20078 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4_subtile_n) {
20079 TEST_REQUIRES_X86_SSE;
20080 for (uint32_t n = 1; n <= 8; n++) {
20081 GemmMicrokernelTester()
20082 .mr(4)
20083 .nr(8)
20084 .kr(1)
20085 .sr(4)
20086 .m(4)
20087 .n(n)
20088 .k(4)
20089 .iterations(1)
20090 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20091 }
20092 }
20093
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_lt_4)20094 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_lt_4) {
20095 TEST_REQUIRES_X86_SSE;
20096 for (size_t k = 1; k < 4; k++) {
20097 GemmMicrokernelTester()
20098 .mr(4)
20099 .nr(8)
20100 .kr(1)
20101 .sr(4)
20102 .m(4)
20103 .n(8)
20104 .k(k)
20105 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20106 }
20107 }
20108
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_lt_4_subtile)20109 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_lt_4_subtile) {
20110 TEST_REQUIRES_X86_SSE;
20111 for (size_t k = 1; k < 4; k++) {
20112 for (uint32_t n = 1; n <= 8; n++) {
20113 for (uint32_t m = 1; m <= 4; m++) {
20114 GemmMicrokernelTester()
20115 .mr(4)
20116 .nr(8)
20117 .kr(1)
20118 .sr(4)
20119 .m(m)
20120 .n(n)
20121 .k(k)
20122 .iterations(1)
20123 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20124 }
20125 }
20126 }
20127 }
20128
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_gt_4)20129 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_gt_4) {
20130 TEST_REQUIRES_X86_SSE;
20131 for (size_t k = 5; k < 8; k++) {
20132 GemmMicrokernelTester()
20133 .mr(4)
20134 .nr(8)
20135 .kr(1)
20136 .sr(4)
20137 .m(4)
20138 .n(8)
20139 .k(k)
20140 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20141 }
20142 }
20143
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_gt_4_subtile)20144 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_gt_4_subtile) {
20145 TEST_REQUIRES_X86_SSE;
20146 for (size_t k = 5; k < 8; k++) {
20147 for (uint32_t n = 1; n <= 8; n++) {
20148 for (uint32_t m = 1; m <= 4; m++) {
20149 GemmMicrokernelTester()
20150 .mr(4)
20151 .nr(8)
20152 .kr(1)
20153 .sr(4)
20154 .m(m)
20155 .n(n)
20156 .k(k)
20157 .iterations(1)
20158 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20159 }
20160 }
20161 }
20162 }
20163
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_div_4)20164 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_div_4) {
20165 TEST_REQUIRES_X86_SSE;
20166 for (size_t k = 8; k <= 40; k += 4) {
20167 GemmMicrokernelTester()
20168 .mr(4)
20169 .nr(8)
20170 .kr(1)
20171 .sr(4)
20172 .m(4)
20173 .n(8)
20174 .k(k)
20175 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20176 }
20177 }
20178
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,k_div_4_subtile)20179 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_div_4_subtile) {
20180 TEST_REQUIRES_X86_SSE;
20181 for (size_t k = 8; k <= 40; k += 4) {
20182 for (uint32_t n = 1; n <= 8; n++) {
20183 for (uint32_t m = 1; m <= 4; m++) {
20184 GemmMicrokernelTester()
20185 .mr(4)
20186 .nr(8)
20187 .kr(1)
20188 .sr(4)
20189 .m(m)
20190 .n(n)
20191 .k(k)
20192 .iterations(1)
20193 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20194 }
20195 }
20196 }
20197 }
20198
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_gt_8)20199 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8) {
20200 TEST_REQUIRES_X86_SSE;
20201 for (uint32_t n = 9; n < 16; n++) {
20202 for (size_t k = 1; k <= 20; k += 5) {
20203 GemmMicrokernelTester()
20204 .mr(4)
20205 .nr(8)
20206 .kr(1)
20207 .sr(4)
20208 .m(4)
20209 .n(n)
20210 .k(k)
20211 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20212 }
20213 }
20214 }
20215
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_gt_8_strided_cn)20216 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8_strided_cn) {
20217 TEST_REQUIRES_X86_SSE;
20218 for (uint32_t n = 9; n < 16; n++) {
20219 for (size_t k = 1; k <= 20; k += 5) {
20220 GemmMicrokernelTester()
20221 .mr(4)
20222 .nr(8)
20223 .kr(1)
20224 .sr(4)
20225 .m(4)
20226 .n(n)
20227 .k(k)
20228 .cn_stride(11)
20229 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20230 }
20231 }
20232 }
20233
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_gt_8_subtile)20234 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8_subtile) {
20235 TEST_REQUIRES_X86_SSE;
20236 for (uint32_t n = 9; n < 16; n++) {
20237 for (size_t k = 1; k <= 20; k += 5) {
20238 for (uint32_t m = 1; m <= 4; m++) {
20239 GemmMicrokernelTester()
20240 .mr(4)
20241 .nr(8)
20242 .kr(1)
20243 .sr(4)
20244 .m(m)
20245 .n(n)
20246 .k(k)
20247 .iterations(1)
20248 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20249 }
20250 }
20251 }
20252 }
20253
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_div_8)20254 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8) {
20255 TEST_REQUIRES_X86_SSE;
20256 for (uint32_t n = 16; n <= 24; n += 8) {
20257 for (size_t k = 1; k <= 20; k += 5) {
20258 GemmMicrokernelTester()
20259 .mr(4)
20260 .nr(8)
20261 .kr(1)
20262 .sr(4)
20263 .m(4)
20264 .n(n)
20265 .k(k)
20266 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20267 }
20268 }
20269 }
20270
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_div_8_strided_cn)20271 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8_strided_cn) {
20272 TEST_REQUIRES_X86_SSE;
20273 for (uint32_t n = 16; n <= 24; n += 8) {
20274 for (size_t k = 1; k <= 20; k += 5) {
20275 GemmMicrokernelTester()
20276 .mr(4)
20277 .nr(8)
20278 .kr(1)
20279 .sr(4)
20280 .m(4)
20281 .n(n)
20282 .k(k)
20283 .cn_stride(11)
20284 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20285 }
20286 }
20287 }
20288
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_div_8_subtile)20289 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8_subtile) {
20290 TEST_REQUIRES_X86_SSE;
20291 for (uint32_t n = 16; n <= 24; n += 8) {
20292 for (size_t k = 1; k <= 20; k += 5) {
20293 for (uint32_t m = 1; m <= 4; m++) {
20294 GemmMicrokernelTester()
20295 .mr(4)
20296 .nr(8)
20297 .kr(1)
20298 .sr(4)
20299 .m(m)
20300 .n(n)
20301 .k(k)
20302 .iterations(1)
20303 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20304 }
20305 }
20306 }
20307 }
20308
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,small_kernel)20309 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, small_kernel) {
20310 TEST_REQUIRES_X86_SSE;
20311 for (size_t k = 1; k <= 20; k += 5) {
20312 GemmMicrokernelTester()
20313 .mr(4)
20314 .nr(8)
20315 .kr(1)
20316 .sr(4)
20317 .m(4)
20318 .n(8)
20319 .k(k)
20320 .ks(3)
20321 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20322 }
20323 }
20324
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,small_kernel_subtile)20325 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, small_kernel_subtile) {
20326 TEST_REQUIRES_X86_SSE;
20327 for (size_t k = 1; k <= 20; k += 5) {
20328 for (uint32_t n = 1; n <= 8; n++) {
20329 for (uint32_t m = 1; m <= 4; m++) {
20330 GemmMicrokernelTester()
20331 .mr(4)
20332 .nr(8)
20333 .kr(1)
20334 .sr(4)
20335 .m(m)
20336 .n(n)
20337 .k(k)
20338 .ks(3)
20339 .iterations(1)
20340 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20341 }
20342 }
20343 }
20344 }
20345
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_gt_8_small_kernel)20346 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8_small_kernel) {
20347 TEST_REQUIRES_X86_SSE;
20348 for (uint32_t n = 9; n < 16; n++) {
20349 for (size_t k = 1; k <= 20; k += 5) {
20350 GemmMicrokernelTester()
20351 .mr(4)
20352 .nr(8)
20353 .kr(1)
20354 .sr(4)
20355 .m(4)
20356 .n(n)
20357 .k(k)
20358 .ks(3)
20359 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20360 }
20361 }
20362 }
20363
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,n_div_8_small_kernel)20364 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8_small_kernel) {
20365 TEST_REQUIRES_X86_SSE;
20366 for (uint32_t n = 16; n <= 24; n += 8) {
20367 for (size_t k = 1; k <= 20; k += 5) {
20368 GemmMicrokernelTester()
20369 .mr(4)
20370 .nr(8)
20371 .kr(1)
20372 .sr(4)
20373 .m(4)
20374 .n(n)
20375 .k(k)
20376 .ks(3)
20377 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20378 }
20379 }
20380 }
20381
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,strided_cm_subtile)20382 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, strided_cm_subtile) {
20383 TEST_REQUIRES_X86_SSE;
20384 for (size_t k = 1; k <= 20; k += 5) {
20385 for (uint32_t n = 1; n <= 8; n++) {
20386 for (uint32_t m = 1; m <= 4; m++) {
20387 GemmMicrokernelTester()
20388 .mr(4)
20389 .nr(8)
20390 .kr(1)
20391 .sr(4)
20392 .m(m)
20393 .n(n)
20394 .k(k)
20395 .cm_stride(11)
20396 .iterations(1)
20397 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20398 }
20399 }
20400 }
20401 }
20402
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,a_offset)20403 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, a_offset) {
20404 TEST_REQUIRES_X86_SSE;
20405 for (size_t k = 1; k <= 20; k += 5) {
20406 GemmMicrokernelTester()
20407 .mr(4)
20408 .nr(8)
20409 .kr(1)
20410 .sr(4)
20411 .m(4)
20412 .n(8)
20413 .k(k)
20414 .ks(3)
20415 .a_offset(83)
20416 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20417 }
20418 }
20419
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,zero)20420 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, zero) {
20421 TEST_REQUIRES_X86_SSE;
20422 for (size_t k = 1; k <= 20; k += 5) {
20423 for (uint32_t mz = 0; mz < 4; mz++) {
20424 GemmMicrokernelTester()
20425 .mr(4)
20426 .nr(8)
20427 .kr(1)
20428 .sr(4)
20429 .m(4)
20430 .n(8)
20431 .k(k)
20432 .ks(3)
20433 .a_offset(83)
20434 .zero_index(mz)
20435 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20436 }
20437 }
20438 }
20439
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,qmin)20440 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, qmin) {
20441 TEST_REQUIRES_X86_SSE;
20442 GemmMicrokernelTester()
20443 .mr(4)
20444 .nr(8)
20445 .kr(1)
20446 .sr(4)
20447 .m(4)
20448 .n(8)
20449 .k(4)
20450 .qmin(128)
20451 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20452 }
20453
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,qmax)20454 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, qmax) {
20455 TEST_REQUIRES_X86_SSE;
20456 GemmMicrokernelTester()
20457 .mr(4)
20458 .nr(8)
20459 .kr(1)
20460 .sr(4)
20461 .m(4)
20462 .n(8)
20463 .k(4)
20464 .qmax(128)
20465 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20466 }
20467
TEST(F32_IGEMM_MINMAX_4X8S4__SSE,strided_cm)20468 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, strided_cm) {
20469 TEST_REQUIRES_X86_SSE;
20470 GemmMicrokernelTester()
20471 .mr(4)
20472 .nr(8)
20473 .kr(1)
20474 .sr(4)
20475 .m(4)
20476 .n(8)
20477 .k(4)
20478 .cm_stride(11)
20479 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
20480 }
20481 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20482
20483
20484 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_eq_4)20485 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4) {
20486 TEST_REQUIRES_X86_SSE;
20487 GemmMicrokernelTester()
20488 .mr(5)
20489 .nr(8)
20490 .kr(1)
20491 .sr(4)
20492 .m(5)
20493 .n(8)
20494 .k(4)
20495 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20496 }
20497
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,strided_cn)20498 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, strided_cn) {
20499 TEST_REQUIRES_X86_SSE;
20500 GemmMicrokernelTester()
20501 .mr(5)
20502 .nr(8)
20503 .kr(1)
20504 .sr(4)
20505 .m(5)
20506 .n(8)
20507 .k(4)
20508 .cn_stride(11)
20509 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20510 }
20511
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_eq_4_subtile)20512 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4_subtile) {
20513 TEST_REQUIRES_X86_SSE;
20514 for (uint32_t n = 1; n <= 8; n++) {
20515 for (uint32_t m = 1; m <= 5; m++) {
20516 GemmMicrokernelTester()
20517 .mr(5)
20518 .nr(8)
20519 .kr(1)
20520 .sr(4)
20521 .m(m)
20522 .n(n)
20523 .k(4)
20524 .iterations(1)
20525 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20526 }
20527 }
20528 }
20529
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_eq_4_subtile_m)20530 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4_subtile_m) {
20531 TEST_REQUIRES_X86_SSE;
20532 for (uint32_t m = 1; m <= 5; m++) {
20533 GemmMicrokernelTester()
20534 .mr(5)
20535 .nr(8)
20536 .kr(1)
20537 .sr(4)
20538 .m(m)
20539 .n(8)
20540 .k(4)
20541 .iterations(1)
20542 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20543 }
20544 }
20545
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_eq_4_subtile_n)20546 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4_subtile_n) {
20547 TEST_REQUIRES_X86_SSE;
20548 for (uint32_t n = 1; n <= 8; n++) {
20549 GemmMicrokernelTester()
20550 .mr(5)
20551 .nr(8)
20552 .kr(1)
20553 .sr(4)
20554 .m(5)
20555 .n(n)
20556 .k(4)
20557 .iterations(1)
20558 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20559 }
20560 }
20561
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_lt_4)20562 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_lt_4) {
20563 TEST_REQUIRES_X86_SSE;
20564 for (size_t k = 1; k < 4; k++) {
20565 GemmMicrokernelTester()
20566 .mr(5)
20567 .nr(8)
20568 .kr(1)
20569 .sr(4)
20570 .m(5)
20571 .n(8)
20572 .k(k)
20573 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20574 }
20575 }
20576
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_lt_4_subtile)20577 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_lt_4_subtile) {
20578 TEST_REQUIRES_X86_SSE;
20579 for (size_t k = 1; k < 4; k++) {
20580 for (uint32_t n = 1; n <= 8; n++) {
20581 for (uint32_t m = 1; m <= 5; m++) {
20582 GemmMicrokernelTester()
20583 .mr(5)
20584 .nr(8)
20585 .kr(1)
20586 .sr(4)
20587 .m(m)
20588 .n(n)
20589 .k(k)
20590 .iterations(1)
20591 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20592 }
20593 }
20594 }
20595 }
20596
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_gt_4)20597 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_gt_4) {
20598 TEST_REQUIRES_X86_SSE;
20599 for (size_t k = 5; k < 8; k++) {
20600 GemmMicrokernelTester()
20601 .mr(5)
20602 .nr(8)
20603 .kr(1)
20604 .sr(4)
20605 .m(5)
20606 .n(8)
20607 .k(k)
20608 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20609 }
20610 }
20611
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_gt_4_subtile)20612 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_gt_4_subtile) {
20613 TEST_REQUIRES_X86_SSE;
20614 for (size_t k = 5; k < 8; k++) {
20615 for (uint32_t n = 1; n <= 8; n++) {
20616 for (uint32_t m = 1; m <= 5; m++) {
20617 GemmMicrokernelTester()
20618 .mr(5)
20619 .nr(8)
20620 .kr(1)
20621 .sr(4)
20622 .m(m)
20623 .n(n)
20624 .k(k)
20625 .iterations(1)
20626 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20627 }
20628 }
20629 }
20630 }
20631
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_div_4)20632 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_div_4) {
20633 TEST_REQUIRES_X86_SSE;
20634 for (size_t k = 8; k <= 40; k += 4) {
20635 GemmMicrokernelTester()
20636 .mr(5)
20637 .nr(8)
20638 .kr(1)
20639 .sr(4)
20640 .m(5)
20641 .n(8)
20642 .k(k)
20643 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20644 }
20645 }
20646
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,k_div_4_subtile)20647 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_div_4_subtile) {
20648 TEST_REQUIRES_X86_SSE;
20649 for (size_t k = 8; k <= 40; k += 4) {
20650 for (uint32_t n = 1; n <= 8; n++) {
20651 for (uint32_t m = 1; m <= 5; m++) {
20652 GemmMicrokernelTester()
20653 .mr(5)
20654 .nr(8)
20655 .kr(1)
20656 .sr(4)
20657 .m(m)
20658 .n(n)
20659 .k(k)
20660 .iterations(1)
20661 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20662 }
20663 }
20664 }
20665 }
20666
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_gt_8)20667 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8) {
20668 TEST_REQUIRES_X86_SSE;
20669 for (uint32_t n = 9; n < 16; n++) {
20670 for (size_t k = 1; k <= 20; k += 5) {
20671 GemmMicrokernelTester()
20672 .mr(5)
20673 .nr(8)
20674 .kr(1)
20675 .sr(4)
20676 .m(5)
20677 .n(n)
20678 .k(k)
20679 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20680 }
20681 }
20682 }
20683
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_gt_8_strided_cn)20684 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8_strided_cn) {
20685 TEST_REQUIRES_X86_SSE;
20686 for (uint32_t n = 9; n < 16; n++) {
20687 for (size_t k = 1; k <= 20; k += 5) {
20688 GemmMicrokernelTester()
20689 .mr(5)
20690 .nr(8)
20691 .kr(1)
20692 .sr(4)
20693 .m(5)
20694 .n(n)
20695 .k(k)
20696 .cn_stride(11)
20697 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20698 }
20699 }
20700 }
20701
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_gt_8_subtile)20702 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8_subtile) {
20703 TEST_REQUIRES_X86_SSE;
20704 for (uint32_t n = 9; n < 16; n++) {
20705 for (size_t k = 1; k <= 20; k += 5) {
20706 for (uint32_t m = 1; m <= 5; m++) {
20707 GemmMicrokernelTester()
20708 .mr(5)
20709 .nr(8)
20710 .kr(1)
20711 .sr(4)
20712 .m(m)
20713 .n(n)
20714 .k(k)
20715 .iterations(1)
20716 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20717 }
20718 }
20719 }
20720 }
20721
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_div_8)20722 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8) {
20723 TEST_REQUIRES_X86_SSE;
20724 for (uint32_t n = 16; n <= 24; n += 8) {
20725 for (size_t k = 1; k <= 20; k += 5) {
20726 GemmMicrokernelTester()
20727 .mr(5)
20728 .nr(8)
20729 .kr(1)
20730 .sr(4)
20731 .m(5)
20732 .n(n)
20733 .k(k)
20734 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20735 }
20736 }
20737 }
20738
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_div_8_strided_cn)20739 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8_strided_cn) {
20740 TEST_REQUIRES_X86_SSE;
20741 for (uint32_t n = 16; n <= 24; n += 8) {
20742 for (size_t k = 1; k <= 20; k += 5) {
20743 GemmMicrokernelTester()
20744 .mr(5)
20745 .nr(8)
20746 .kr(1)
20747 .sr(4)
20748 .m(5)
20749 .n(n)
20750 .k(k)
20751 .cn_stride(11)
20752 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20753 }
20754 }
20755 }
20756
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_div_8_subtile)20757 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8_subtile) {
20758 TEST_REQUIRES_X86_SSE;
20759 for (uint32_t n = 16; n <= 24; n += 8) {
20760 for (size_t k = 1; k <= 20; k += 5) {
20761 for (uint32_t m = 1; m <= 5; m++) {
20762 GemmMicrokernelTester()
20763 .mr(5)
20764 .nr(8)
20765 .kr(1)
20766 .sr(4)
20767 .m(m)
20768 .n(n)
20769 .k(k)
20770 .iterations(1)
20771 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20772 }
20773 }
20774 }
20775 }
20776
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,small_kernel)20777 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, small_kernel) {
20778 TEST_REQUIRES_X86_SSE;
20779 for (size_t k = 1; k <= 20; k += 5) {
20780 GemmMicrokernelTester()
20781 .mr(5)
20782 .nr(8)
20783 .kr(1)
20784 .sr(4)
20785 .m(5)
20786 .n(8)
20787 .k(k)
20788 .ks(3)
20789 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20790 }
20791 }
20792
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,small_kernel_subtile)20793 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, small_kernel_subtile) {
20794 TEST_REQUIRES_X86_SSE;
20795 for (size_t k = 1; k <= 20; k += 5) {
20796 for (uint32_t n = 1; n <= 8; n++) {
20797 for (uint32_t m = 1; m <= 5; m++) {
20798 GemmMicrokernelTester()
20799 .mr(5)
20800 .nr(8)
20801 .kr(1)
20802 .sr(4)
20803 .m(m)
20804 .n(n)
20805 .k(k)
20806 .ks(3)
20807 .iterations(1)
20808 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20809 }
20810 }
20811 }
20812 }
20813
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_gt_8_small_kernel)20814 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8_small_kernel) {
20815 TEST_REQUIRES_X86_SSE;
20816 for (uint32_t n = 9; n < 16; n++) {
20817 for (size_t k = 1; k <= 20; k += 5) {
20818 GemmMicrokernelTester()
20819 .mr(5)
20820 .nr(8)
20821 .kr(1)
20822 .sr(4)
20823 .m(5)
20824 .n(n)
20825 .k(k)
20826 .ks(3)
20827 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20828 }
20829 }
20830 }
20831
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,n_div_8_small_kernel)20832 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8_small_kernel) {
20833 TEST_REQUIRES_X86_SSE;
20834 for (uint32_t n = 16; n <= 24; n += 8) {
20835 for (size_t k = 1; k <= 20; k += 5) {
20836 GemmMicrokernelTester()
20837 .mr(5)
20838 .nr(8)
20839 .kr(1)
20840 .sr(4)
20841 .m(5)
20842 .n(n)
20843 .k(k)
20844 .ks(3)
20845 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20846 }
20847 }
20848 }
20849
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,strided_cm_subtile)20850 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, strided_cm_subtile) {
20851 TEST_REQUIRES_X86_SSE;
20852 for (size_t k = 1; k <= 20; k += 5) {
20853 for (uint32_t n = 1; n <= 8; n++) {
20854 for (uint32_t m = 1; m <= 5; m++) {
20855 GemmMicrokernelTester()
20856 .mr(5)
20857 .nr(8)
20858 .kr(1)
20859 .sr(4)
20860 .m(m)
20861 .n(n)
20862 .k(k)
20863 .cm_stride(11)
20864 .iterations(1)
20865 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20866 }
20867 }
20868 }
20869 }
20870
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,a_offset)20871 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, a_offset) {
20872 TEST_REQUIRES_X86_SSE;
20873 for (size_t k = 1; k <= 20; k += 5) {
20874 GemmMicrokernelTester()
20875 .mr(5)
20876 .nr(8)
20877 .kr(1)
20878 .sr(4)
20879 .m(5)
20880 .n(8)
20881 .k(k)
20882 .ks(3)
20883 .a_offset(103)
20884 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20885 }
20886 }
20887
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,zero)20888 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, zero) {
20889 TEST_REQUIRES_X86_SSE;
20890 for (size_t k = 1; k <= 20; k += 5) {
20891 for (uint32_t mz = 0; mz < 5; mz++) {
20892 GemmMicrokernelTester()
20893 .mr(5)
20894 .nr(8)
20895 .kr(1)
20896 .sr(4)
20897 .m(5)
20898 .n(8)
20899 .k(k)
20900 .ks(3)
20901 .a_offset(103)
20902 .zero_index(mz)
20903 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20904 }
20905 }
20906 }
20907
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,qmin)20908 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, qmin) {
20909 TEST_REQUIRES_X86_SSE;
20910 GemmMicrokernelTester()
20911 .mr(5)
20912 .nr(8)
20913 .kr(1)
20914 .sr(4)
20915 .m(5)
20916 .n(8)
20917 .k(4)
20918 .qmin(128)
20919 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20920 }
20921
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,qmax)20922 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, qmax) {
20923 TEST_REQUIRES_X86_SSE;
20924 GemmMicrokernelTester()
20925 .mr(5)
20926 .nr(8)
20927 .kr(1)
20928 .sr(4)
20929 .m(5)
20930 .n(8)
20931 .k(4)
20932 .qmax(128)
20933 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20934 }
20935
TEST(F32_IGEMM_MINMAX_5X8S4__SSE,strided_cm)20936 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, strided_cm) {
20937 TEST_REQUIRES_X86_SSE;
20938 GemmMicrokernelTester()
20939 .mr(5)
20940 .nr(8)
20941 .kr(1)
20942 .sr(4)
20943 .m(5)
20944 .n(8)
20945 .k(4)
20946 .cm_stride(11)
20947 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
20948 }
20949 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20950
20951
20952 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,k_eq_1)20953 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1) {
20954 TEST_REQUIRES_X86_AVX;
20955 GemmMicrokernelTester()
20956 .mr(3)
20957 .nr(16)
20958 .kr(1)
20959 .sr(1)
20960 .m(3)
20961 .n(16)
20962 .k(1)
20963 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20964 }
20965
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,strided_cn)20966 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, strided_cn) {
20967 TEST_REQUIRES_X86_AVX;
20968 GemmMicrokernelTester()
20969 .mr(3)
20970 .nr(16)
20971 .kr(1)
20972 .sr(1)
20973 .m(3)
20974 .n(16)
20975 .k(1)
20976 .cn_stride(19)
20977 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20978 }
20979
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,k_eq_1_subtile)20980 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile) {
20981 TEST_REQUIRES_X86_AVX;
20982 for (uint32_t n = 1; n <= 16; n++) {
20983 for (uint32_t m = 1; m <= 3; m++) {
20984 GemmMicrokernelTester()
20985 .mr(3)
20986 .nr(16)
20987 .kr(1)
20988 .sr(1)
20989 .m(m)
20990 .n(n)
20991 .k(1)
20992 .iterations(1)
20993 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20994 }
20995 }
20996 }
20997
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,k_eq_1_subtile_m)20998 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_m) {
20999 TEST_REQUIRES_X86_AVX;
21000 for (uint32_t m = 1; m <= 3; m++) {
21001 GemmMicrokernelTester()
21002 .mr(3)
21003 .nr(16)
21004 .kr(1)
21005 .sr(1)
21006 .m(m)
21007 .n(16)
21008 .k(1)
21009 .iterations(1)
21010 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21011 }
21012 }
21013
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,k_eq_1_subtile_n)21014 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_n) {
21015 TEST_REQUIRES_X86_AVX;
21016 for (uint32_t n = 1; n <= 16; n++) {
21017 GemmMicrokernelTester()
21018 .mr(3)
21019 .nr(16)
21020 .kr(1)
21021 .sr(1)
21022 .m(3)
21023 .n(n)
21024 .k(1)
21025 .iterations(1)
21026 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21027 }
21028 }
21029
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,k_gt_1)21030 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_gt_1) {
21031 TEST_REQUIRES_X86_AVX;
21032 for (size_t k = 2; k < 10; k++) {
21033 GemmMicrokernelTester()
21034 .mr(3)
21035 .nr(16)
21036 .kr(1)
21037 .sr(1)
21038 .m(3)
21039 .n(16)
21040 .k(k)
21041 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21042 }
21043 }
21044
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,k_gt_1_subtile)21045 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_gt_1_subtile) {
21046 TEST_REQUIRES_X86_AVX;
21047 for (size_t k = 2; k < 10; k++) {
21048 for (uint32_t n = 1; n <= 16; n++) {
21049 for (uint32_t m = 1; m <= 3; m++) {
21050 GemmMicrokernelTester()
21051 .mr(3)
21052 .nr(16)
21053 .kr(1)
21054 .sr(1)
21055 .m(m)
21056 .n(n)
21057 .k(k)
21058 .iterations(1)
21059 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21060 }
21061 }
21062 }
21063 }
21064
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_gt_16)21065 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16) {
21066 TEST_REQUIRES_X86_AVX;
21067 for (uint32_t n = 17; n < 32; n++) {
21068 for (size_t k = 1; k <= 5; k += 2) {
21069 GemmMicrokernelTester()
21070 .mr(3)
21071 .nr(16)
21072 .kr(1)
21073 .sr(1)
21074 .m(3)
21075 .n(n)
21076 .k(k)
21077 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21078 }
21079 }
21080 }
21081
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_gt_16_strided_cn)21082 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_cn) {
21083 TEST_REQUIRES_X86_AVX;
21084 for (uint32_t n = 17; n < 32; n++) {
21085 for (size_t k = 1; k <= 5; k += 2) {
21086 GemmMicrokernelTester()
21087 .mr(3)
21088 .nr(16)
21089 .kr(1)
21090 .sr(1)
21091 .m(3)
21092 .n(n)
21093 .k(k)
21094 .cn_stride(19)
21095 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21096 }
21097 }
21098 }
21099
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_gt_16_subtile)21100 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_subtile) {
21101 TEST_REQUIRES_X86_AVX;
21102 for (uint32_t n = 17; n < 32; n++) {
21103 for (size_t k = 1; k <= 5; k += 2) {
21104 for (uint32_t m = 1; m <= 3; m++) {
21105 GemmMicrokernelTester()
21106 .mr(3)
21107 .nr(16)
21108 .kr(1)
21109 .sr(1)
21110 .m(m)
21111 .n(n)
21112 .k(k)
21113 .iterations(1)
21114 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21115 }
21116 }
21117 }
21118 }
21119
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_div_16)21120 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16) {
21121 TEST_REQUIRES_X86_AVX;
21122 for (uint32_t n = 32; n <= 48; n += 16) {
21123 for (size_t k = 1; k <= 5; k += 2) {
21124 GemmMicrokernelTester()
21125 .mr(3)
21126 .nr(16)
21127 .kr(1)
21128 .sr(1)
21129 .m(3)
21130 .n(n)
21131 .k(k)
21132 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21133 }
21134 }
21135 }
21136
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_div_16_strided_cn)21137 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_cn) {
21138 TEST_REQUIRES_X86_AVX;
21139 for (uint32_t n = 32; n <= 48; n += 16) {
21140 for (size_t k = 1; k <= 5; k += 2) {
21141 GemmMicrokernelTester()
21142 .mr(3)
21143 .nr(16)
21144 .kr(1)
21145 .sr(1)
21146 .m(3)
21147 .n(n)
21148 .k(k)
21149 .cn_stride(19)
21150 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21151 }
21152 }
21153 }
21154
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_div_16_subtile)21155 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_subtile) {
21156 TEST_REQUIRES_X86_AVX;
21157 for (uint32_t n = 32; n <= 48; n += 16) {
21158 for (size_t k = 1; k <= 5; k += 2) {
21159 for (uint32_t m = 1; m <= 3; m++) {
21160 GemmMicrokernelTester()
21161 .mr(3)
21162 .nr(16)
21163 .kr(1)
21164 .sr(1)
21165 .m(m)
21166 .n(n)
21167 .k(k)
21168 .iterations(1)
21169 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21170 }
21171 }
21172 }
21173 }
21174
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,small_kernel)21175 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, small_kernel) {
21176 TEST_REQUIRES_X86_AVX;
21177 for (size_t k = 1; k <= 5; k += 2) {
21178 GemmMicrokernelTester()
21179 .mr(3)
21180 .nr(16)
21181 .kr(1)
21182 .sr(1)
21183 .m(3)
21184 .n(16)
21185 .k(k)
21186 .ks(3)
21187 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21188 }
21189 }
21190
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,small_kernel_subtile)21191 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, small_kernel_subtile) {
21192 TEST_REQUIRES_X86_AVX;
21193 for (size_t k = 1; k <= 5; k += 2) {
21194 for (uint32_t n = 1; n <= 16; n++) {
21195 for (uint32_t m = 1; m <= 3; m++) {
21196 GemmMicrokernelTester()
21197 .mr(3)
21198 .nr(16)
21199 .kr(1)
21200 .sr(1)
21201 .m(m)
21202 .n(n)
21203 .k(k)
21204 .ks(3)
21205 .iterations(1)
21206 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21207 }
21208 }
21209 }
21210 }
21211
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_gt_16_small_kernel)21212 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_small_kernel) {
21213 TEST_REQUIRES_X86_AVX;
21214 for (uint32_t n = 17; n < 32; n++) {
21215 for (size_t k = 1; k <= 5; k += 2) {
21216 GemmMicrokernelTester()
21217 .mr(3)
21218 .nr(16)
21219 .kr(1)
21220 .sr(1)
21221 .m(3)
21222 .n(n)
21223 .k(k)
21224 .ks(3)
21225 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21226 }
21227 }
21228 }
21229
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,n_div_16_small_kernel)21230 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_small_kernel) {
21231 TEST_REQUIRES_X86_AVX;
21232 for (uint32_t n = 32; n <= 48; n += 16) {
21233 for (size_t k = 1; k <= 5; k += 2) {
21234 GemmMicrokernelTester()
21235 .mr(3)
21236 .nr(16)
21237 .kr(1)
21238 .sr(1)
21239 .m(3)
21240 .n(n)
21241 .k(k)
21242 .ks(3)
21243 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21244 }
21245 }
21246 }
21247
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,strided_cm_subtile)21248 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, strided_cm_subtile) {
21249 TEST_REQUIRES_X86_AVX;
21250 for (size_t k = 1; k <= 5; k += 2) {
21251 for (uint32_t n = 1; n <= 16; n++) {
21252 for (uint32_t m = 1; m <= 3; m++) {
21253 GemmMicrokernelTester()
21254 .mr(3)
21255 .nr(16)
21256 .kr(1)
21257 .sr(1)
21258 .m(m)
21259 .n(n)
21260 .k(k)
21261 .cm_stride(19)
21262 .iterations(1)
21263 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21264 }
21265 }
21266 }
21267 }
21268
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,a_offset)21269 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, a_offset) {
21270 TEST_REQUIRES_X86_AVX;
21271 for (size_t k = 1; k <= 5; k += 2) {
21272 GemmMicrokernelTester()
21273 .mr(3)
21274 .nr(16)
21275 .kr(1)
21276 .sr(1)
21277 .m(3)
21278 .n(16)
21279 .k(k)
21280 .ks(3)
21281 .a_offset(17)
21282 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21283 }
21284 }
21285
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,zero)21286 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, zero) {
21287 TEST_REQUIRES_X86_AVX;
21288 for (size_t k = 1; k <= 5; k += 2) {
21289 for (uint32_t mz = 0; mz < 3; mz++) {
21290 GemmMicrokernelTester()
21291 .mr(3)
21292 .nr(16)
21293 .kr(1)
21294 .sr(1)
21295 .m(3)
21296 .n(16)
21297 .k(k)
21298 .ks(3)
21299 .a_offset(17)
21300 .zero_index(mz)
21301 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21302 }
21303 }
21304 }
21305
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,qmin)21306 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, qmin) {
21307 TEST_REQUIRES_X86_AVX;
21308 GemmMicrokernelTester()
21309 .mr(3)
21310 .nr(16)
21311 .kr(1)
21312 .sr(1)
21313 .m(3)
21314 .n(16)
21315 .k(1)
21316 .qmin(128)
21317 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21318 }
21319
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,qmax)21320 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, qmax) {
21321 TEST_REQUIRES_X86_AVX;
21322 GemmMicrokernelTester()
21323 .mr(3)
21324 .nr(16)
21325 .kr(1)
21326 .sr(1)
21327 .m(3)
21328 .n(16)
21329 .k(1)
21330 .qmax(128)
21331 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21332 }
21333
TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST,strided_cm)21334 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, strided_cm) {
21335 TEST_REQUIRES_X86_AVX;
21336 GemmMicrokernelTester()
21337 .mr(3)
21338 .nr(16)
21339 .kr(1)
21340 .sr(1)
21341 .m(3)
21342 .n(16)
21343 .k(1)
21344 .cm_stride(19)
21345 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
21346 }
21347 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21348
21349
21350 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,k_eq_1)21351 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1) {
21352 TEST_REQUIRES_X86_AVX;
21353 GemmMicrokernelTester()
21354 .mr(4)
21355 .nr(8)
21356 .kr(1)
21357 .sr(1)
21358 .m(4)
21359 .n(8)
21360 .k(1)
21361 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21362 }
21363
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,strided_cn)21364 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, strided_cn) {
21365 TEST_REQUIRES_X86_AVX;
21366 GemmMicrokernelTester()
21367 .mr(4)
21368 .nr(8)
21369 .kr(1)
21370 .sr(1)
21371 .m(4)
21372 .n(8)
21373 .k(1)
21374 .cn_stride(11)
21375 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21376 }
21377
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,k_eq_1_subtile)21378 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile) {
21379 TEST_REQUIRES_X86_AVX;
21380 for (uint32_t n = 1; n <= 8; n++) {
21381 for (uint32_t m = 1; m <= 4; m++) {
21382 GemmMicrokernelTester()
21383 .mr(4)
21384 .nr(8)
21385 .kr(1)
21386 .sr(1)
21387 .m(m)
21388 .n(n)
21389 .k(1)
21390 .iterations(1)
21391 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21392 }
21393 }
21394 }
21395
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,k_eq_1_subtile_m)21396 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_m) {
21397 TEST_REQUIRES_X86_AVX;
21398 for (uint32_t m = 1; m <= 4; m++) {
21399 GemmMicrokernelTester()
21400 .mr(4)
21401 .nr(8)
21402 .kr(1)
21403 .sr(1)
21404 .m(m)
21405 .n(8)
21406 .k(1)
21407 .iterations(1)
21408 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21409 }
21410 }
21411
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,k_eq_1_subtile_n)21412 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_n) {
21413 TEST_REQUIRES_X86_AVX;
21414 for (uint32_t n = 1; n <= 8; n++) {
21415 GemmMicrokernelTester()
21416 .mr(4)
21417 .nr(8)
21418 .kr(1)
21419 .sr(1)
21420 .m(4)
21421 .n(n)
21422 .k(1)
21423 .iterations(1)
21424 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21425 }
21426 }
21427
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,k_gt_1)21428 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_gt_1) {
21429 TEST_REQUIRES_X86_AVX;
21430 for (size_t k = 2; k < 10; k++) {
21431 GemmMicrokernelTester()
21432 .mr(4)
21433 .nr(8)
21434 .kr(1)
21435 .sr(1)
21436 .m(4)
21437 .n(8)
21438 .k(k)
21439 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21440 }
21441 }
21442
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,k_gt_1_subtile)21443 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_gt_1_subtile) {
21444 TEST_REQUIRES_X86_AVX;
21445 for (size_t k = 2; k < 10; k++) {
21446 for (uint32_t n = 1; n <= 8; n++) {
21447 for (uint32_t m = 1; m <= 4; m++) {
21448 GemmMicrokernelTester()
21449 .mr(4)
21450 .nr(8)
21451 .kr(1)
21452 .sr(1)
21453 .m(m)
21454 .n(n)
21455 .k(k)
21456 .iterations(1)
21457 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21458 }
21459 }
21460 }
21461 }
21462
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_gt_8)21463 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8) {
21464 TEST_REQUIRES_X86_AVX;
21465 for (uint32_t n = 9; n < 16; n++) {
21466 for (size_t k = 1; k <= 5; k += 2) {
21467 GemmMicrokernelTester()
21468 .mr(4)
21469 .nr(8)
21470 .kr(1)
21471 .sr(1)
21472 .m(4)
21473 .n(n)
21474 .k(k)
21475 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21476 }
21477 }
21478 }
21479
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_gt_8_strided_cn)21480 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8_strided_cn) {
21481 TEST_REQUIRES_X86_AVX;
21482 for (uint32_t n = 9; n < 16; n++) {
21483 for (size_t k = 1; k <= 5; k += 2) {
21484 GemmMicrokernelTester()
21485 .mr(4)
21486 .nr(8)
21487 .kr(1)
21488 .sr(1)
21489 .m(4)
21490 .n(n)
21491 .k(k)
21492 .cn_stride(11)
21493 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21494 }
21495 }
21496 }
21497
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_gt_8_subtile)21498 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8_subtile) {
21499 TEST_REQUIRES_X86_AVX;
21500 for (uint32_t n = 9; n < 16; n++) {
21501 for (size_t k = 1; k <= 5; k += 2) {
21502 for (uint32_t m = 1; m <= 4; m++) {
21503 GemmMicrokernelTester()
21504 .mr(4)
21505 .nr(8)
21506 .kr(1)
21507 .sr(1)
21508 .m(m)
21509 .n(n)
21510 .k(k)
21511 .iterations(1)
21512 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21513 }
21514 }
21515 }
21516 }
21517
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_div_8)21518 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8) {
21519 TEST_REQUIRES_X86_AVX;
21520 for (uint32_t n = 16; n <= 24; n += 8) {
21521 for (size_t k = 1; k <= 5; k += 2) {
21522 GemmMicrokernelTester()
21523 .mr(4)
21524 .nr(8)
21525 .kr(1)
21526 .sr(1)
21527 .m(4)
21528 .n(n)
21529 .k(k)
21530 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21531 }
21532 }
21533 }
21534
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_div_8_strided_cn)21535 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8_strided_cn) {
21536 TEST_REQUIRES_X86_AVX;
21537 for (uint32_t n = 16; n <= 24; n += 8) {
21538 for (size_t k = 1; k <= 5; k += 2) {
21539 GemmMicrokernelTester()
21540 .mr(4)
21541 .nr(8)
21542 .kr(1)
21543 .sr(1)
21544 .m(4)
21545 .n(n)
21546 .k(k)
21547 .cn_stride(11)
21548 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21549 }
21550 }
21551 }
21552
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_div_8_subtile)21553 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8_subtile) {
21554 TEST_REQUIRES_X86_AVX;
21555 for (uint32_t n = 16; n <= 24; n += 8) {
21556 for (size_t k = 1; k <= 5; k += 2) {
21557 for (uint32_t m = 1; m <= 4; m++) {
21558 GemmMicrokernelTester()
21559 .mr(4)
21560 .nr(8)
21561 .kr(1)
21562 .sr(1)
21563 .m(m)
21564 .n(n)
21565 .k(k)
21566 .iterations(1)
21567 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21568 }
21569 }
21570 }
21571 }
21572
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,small_kernel)21573 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, small_kernel) {
21574 TEST_REQUIRES_X86_AVX;
21575 for (size_t k = 1; k <= 5; k += 2) {
21576 GemmMicrokernelTester()
21577 .mr(4)
21578 .nr(8)
21579 .kr(1)
21580 .sr(1)
21581 .m(4)
21582 .n(8)
21583 .k(k)
21584 .ks(3)
21585 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21586 }
21587 }
21588
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,small_kernel_subtile)21589 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, small_kernel_subtile) {
21590 TEST_REQUIRES_X86_AVX;
21591 for (size_t k = 1; k <= 5; k += 2) {
21592 for (uint32_t n = 1; n <= 8; n++) {
21593 for (uint32_t m = 1; m <= 4; m++) {
21594 GemmMicrokernelTester()
21595 .mr(4)
21596 .nr(8)
21597 .kr(1)
21598 .sr(1)
21599 .m(m)
21600 .n(n)
21601 .k(k)
21602 .ks(3)
21603 .iterations(1)
21604 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21605 }
21606 }
21607 }
21608 }
21609
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_gt_8_small_kernel)21610 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8_small_kernel) {
21611 TEST_REQUIRES_X86_AVX;
21612 for (uint32_t n = 9; n < 16; n++) {
21613 for (size_t k = 1; k <= 5; k += 2) {
21614 GemmMicrokernelTester()
21615 .mr(4)
21616 .nr(8)
21617 .kr(1)
21618 .sr(1)
21619 .m(4)
21620 .n(n)
21621 .k(k)
21622 .ks(3)
21623 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21624 }
21625 }
21626 }
21627
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,n_div_8_small_kernel)21628 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8_small_kernel) {
21629 TEST_REQUIRES_X86_AVX;
21630 for (uint32_t n = 16; n <= 24; n += 8) {
21631 for (size_t k = 1; k <= 5; k += 2) {
21632 GemmMicrokernelTester()
21633 .mr(4)
21634 .nr(8)
21635 .kr(1)
21636 .sr(1)
21637 .m(4)
21638 .n(n)
21639 .k(k)
21640 .ks(3)
21641 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21642 }
21643 }
21644 }
21645
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,strided_cm_subtile)21646 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, strided_cm_subtile) {
21647 TEST_REQUIRES_X86_AVX;
21648 for (size_t k = 1; k <= 5; k += 2) {
21649 for (uint32_t n = 1; n <= 8; n++) {
21650 for (uint32_t m = 1; m <= 4; m++) {
21651 GemmMicrokernelTester()
21652 .mr(4)
21653 .nr(8)
21654 .kr(1)
21655 .sr(1)
21656 .m(m)
21657 .n(n)
21658 .k(k)
21659 .cm_stride(11)
21660 .iterations(1)
21661 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21662 }
21663 }
21664 }
21665 }
21666
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,a_offset)21667 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, a_offset) {
21668 TEST_REQUIRES_X86_AVX;
21669 for (size_t k = 1; k <= 5; k += 2) {
21670 GemmMicrokernelTester()
21671 .mr(4)
21672 .nr(8)
21673 .kr(1)
21674 .sr(1)
21675 .m(4)
21676 .n(8)
21677 .k(k)
21678 .ks(3)
21679 .a_offset(23)
21680 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21681 }
21682 }
21683
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,zero)21684 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, zero) {
21685 TEST_REQUIRES_X86_AVX;
21686 for (size_t k = 1; k <= 5; k += 2) {
21687 for (uint32_t mz = 0; mz < 4; mz++) {
21688 GemmMicrokernelTester()
21689 .mr(4)
21690 .nr(8)
21691 .kr(1)
21692 .sr(1)
21693 .m(4)
21694 .n(8)
21695 .k(k)
21696 .ks(3)
21697 .a_offset(23)
21698 .zero_index(mz)
21699 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21700 }
21701 }
21702 }
21703
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,qmin)21704 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, qmin) {
21705 TEST_REQUIRES_X86_AVX;
21706 GemmMicrokernelTester()
21707 .mr(4)
21708 .nr(8)
21709 .kr(1)
21710 .sr(1)
21711 .m(4)
21712 .n(8)
21713 .k(1)
21714 .qmin(128)
21715 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21716 }
21717
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,qmax)21718 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, qmax) {
21719 TEST_REQUIRES_X86_AVX;
21720 GemmMicrokernelTester()
21721 .mr(4)
21722 .nr(8)
21723 .kr(1)
21724 .sr(1)
21725 .m(4)
21726 .n(8)
21727 .k(1)
21728 .qmax(128)
21729 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21730 }
21731
TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST,strided_cm)21732 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, strided_cm) {
21733 TEST_REQUIRES_X86_AVX;
21734 GemmMicrokernelTester()
21735 .mr(4)
21736 .nr(8)
21737 .kr(1)
21738 .sr(1)
21739 .m(4)
21740 .n(8)
21741 .k(1)
21742 .cm_stride(11)
21743 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21744 }
21745 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21746
21747
21748 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,k_eq_1)21749 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1) {
21750 TEST_REQUIRES_X86_AVX;
21751 GemmMicrokernelTester()
21752 .mr(7)
21753 .nr(8)
21754 .kr(1)
21755 .sr(1)
21756 .m(7)
21757 .n(8)
21758 .k(1)
21759 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21760 }
21761
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,strided_cn)21762 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, strided_cn) {
21763 TEST_REQUIRES_X86_AVX;
21764 GemmMicrokernelTester()
21765 .mr(7)
21766 .nr(8)
21767 .kr(1)
21768 .sr(1)
21769 .m(7)
21770 .n(8)
21771 .k(1)
21772 .cn_stride(11)
21773 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21774 }
21775
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,k_eq_1_subtile)21776 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile) {
21777 TEST_REQUIRES_X86_AVX;
21778 for (uint32_t n = 1; n <= 8; n++) {
21779 for (uint32_t m = 1; m <= 7; m++) {
21780 GemmMicrokernelTester()
21781 .mr(7)
21782 .nr(8)
21783 .kr(1)
21784 .sr(1)
21785 .m(m)
21786 .n(n)
21787 .k(1)
21788 .iterations(1)
21789 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21790 }
21791 }
21792 }
21793
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,k_eq_1_subtile_m)21794 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_m) {
21795 TEST_REQUIRES_X86_AVX;
21796 for (uint32_t m = 1; m <= 7; m++) {
21797 GemmMicrokernelTester()
21798 .mr(7)
21799 .nr(8)
21800 .kr(1)
21801 .sr(1)
21802 .m(m)
21803 .n(8)
21804 .k(1)
21805 .iterations(1)
21806 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21807 }
21808 }
21809
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,k_eq_1_subtile_n)21810 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_n) {
21811 TEST_REQUIRES_X86_AVX;
21812 for (uint32_t n = 1; n <= 8; n++) {
21813 GemmMicrokernelTester()
21814 .mr(7)
21815 .nr(8)
21816 .kr(1)
21817 .sr(1)
21818 .m(7)
21819 .n(n)
21820 .k(1)
21821 .iterations(1)
21822 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21823 }
21824 }
21825
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,k_gt_1)21826 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_gt_1) {
21827 TEST_REQUIRES_X86_AVX;
21828 for (size_t k = 2; k < 10; k++) {
21829 GemmMicrokernelTester()
21830 .mr(7)
21831 .nr(8)
21832 .kr(1)
21833 .sr(1)
21834 .m(7)
21835 .n(8)
21836 .k(k)
21837 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21838 }
21839 }
21840
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,k_gt_1_subtile)21841 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_gt_1_subtile) {
21842 TEST_REQUIRES_X86_AVX;
21843 for (size_t k = 2; k < 10; k++) {
21844 for (uint32_t n = 1; n <= 8; n++) {
21845 for (uint32_t m = 1; m <= 7; m++) {
21846 GemmMicrokernelTester()
21847 .mr(7)
21848 .nr(8)
21849 .kr(1)
21850 .sr(1)
21851 .m(m)
21852 .n(n)
21853 .k(k)
21854 .iterations(1)
21855 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21856 }
21857 }
21858 }
21859 }
21860
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_gt_8)21861 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8) {
21862 TEST_REQUIRES_X86_AVX;
21863 for (uint32_t n = 9; n < 16; n++) {
21864 for (size_t k = 1; k <= 5; k += 2) {
21865 GemmMicrokernelTester()
21866 .mr(7)
21867 .nr(8)
21868 .kr(1)
21869 .sr(1)
21870 .m(7)
21871 .n(n)
21872 .k(k)
21873 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21874 }
21875 }
21876 }
21877
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_gt_8_strided_cn)21878 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8_strided_cn) {
21879 TEST_REQUIRES_X86_AVX;
21880 for (uint32_t n = 9; n < 16; n++) {
21881 for (size_t k = 1; k <= 5; k += 2) {
21882 GemmMicrokernelTester()
21883 .mr(7)
21884 .nr(8)
21885 .kr(1)
21886 .sr(1)
21887 .m(7)
21888 .n(n)
21889 .k(k)
21890 .cn_stride(11)
21891 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21892 }
21893 }
21894 }
21895
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_gt_8_subtile)21896 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8_subtile) {
21897 TEST_REQUIRES_X86_AVX;
21898 for (uint32_t n = 9; n < 16; n++) {
21899 for (size_t k = 1; k <= 5; k += 2) {
21900 for (uint32_t m = 1; m <= 7; m++) {
21901 GemmMicrokernelTester()
21902 .mr(7)
21903 .nr(8)
21904 .kr(1)
21905 .sr(1)
21906 .m(m)
21907 .n(n)
21908 .k(k)
21909 .iterations(1)
21910 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21911 }
21912 }
21913 }
21914 }
21915
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_div_8)21916 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8) {
21917 TEST_REQUIRES_X86_AVX;
21918 for (uint32_t n = 16; n <= 24; n += 8) {
21919 for (size_t k = 1; k <= 5; k += 2) {
21920 GemmMicrokernelTester()
21921 .mr(7)
21922 .nr(8)
21923 .kr(1)
21924 .sr(1)
21925 .m(7)
21926 .n(n)
21927 .k(k)
21928 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21929 }
21930 }
21931 }
21932
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_div_8_strided_cn)21933 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8_strided_cn) {
21934 TEST_REQUIRES_X86_AVX;
21935 for (uint32_t n = 16; n <= 24; n += 8) {
21936 for (size_t k = 1; k <= 5; k += 2) {
21937 GemmMicrokernelTester()
21938 .mr(7)
21939 .nr(8)
21940 .kr(1)
21941 .sr(1)
21942 .m(7)
21943 .n(n)
21944 .k(k)
21945 .cn_stride(11)
21946 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21947 }
21948 }
21949 }
21950
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_div_8_subtile)21951 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8_subtile) {
21952 TEST_REQUIRES_X86_AVX;
21953 for (uint32_t n = 16; n <= 24; n += 8) {
21954 for (size_t k = 1; k <= 5; k += 2) {
21955 for (uint32_t m = 1; m <= 7; m++) {
21956 GemmMicrokernelTester()
21957 .mr(7)
21958 .nr(8)
21959 .kr(1)
21960 .sr(1)
21961 .m(m)
21962 .n(n)
21963 .k(k)
21964 .iterations(1)
21965 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21966 }
21967 }
21968 }
21969 }
21970
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,small_kernel)21971 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, small_kernel) {
21972 TEST_REQUIRES_X86_AVX;
21973 for (size_t k = 1; k <= 5; k += 2) {
21974 GemmMicrokernelTester()
21975 .mr(7)
21976 .nr(8)
21977 .kr(1)
21978 .sr(1)
21979 .m(7)
21980 .n(8)
21981 .k(k)
21982 .ks(3)
21983 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21984 }
21985 }
21986
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,small_kernel_subtile)21987 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, small_kernel_subtile) {
21988 TEST_REQUIRES_X86_AVX;
21989 for (size_t k = 1; k <= 5; k += 2) {
21990 for (uint32_t n = 1; n <= 8; n++) {
21991 for (uint32_t m = 1; m <= 7; m++) {
21992 GemmMicrokernelTester()
21993 .mr(7)
21994 .nr(8)
21995 .kr(1)
21996 .sr(1)
21997 .m(m)
21998 .n(n)
21999 .k(k)
22000 .ks(3)
22001 .iterations(1)
22002 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22003 }
22004 }
22005 }
22006 }
22007
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_gt_8_small_kernel)22008 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8_small_kernel) {
22009 TEST_REQUIRES_X86_AVX;
22010 for (uint32_t n = 9; n < 16; n++) {
22011 for (size_t k = 1; k <= 5; k += 2) {
22012 GemmMicrokernelTester()
22013 .mr(7)
22014 .nr(8)
22015 .kr(1)
22016 .sr(1)
22017 .m(7)
22018 .n(n)
22019 .k(k)
22020 .ks(3)
22021 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22022 }
22023 }
22024 }
22025
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,n_div_8_small_kernel)22026 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8_small_kernel) {
22027 TEST_REQUIRES_X86_AVX;
22028 for (uint32_t n = 16; n <= 24; n += 8) {
22029 for (size_t k = 1; k <= 5; k += 2) {
22030 GemmMicrokernelTester()
22031 .mr(7)
22032 .nr(8)
22033 .kr(1)
22034 .sr(1)
22035 .m(7)
22036 .n(n)
22037 .k(k)
22038 .ks(3)
22039 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22040 }
22041 }
22042 }
22043
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,strided_cm_subtile)22044 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, strided_cm_subtile) {
22045 TEST_REQUIRES_X86_AVX;
22046 for (size_t k = 1; k <= 5; k += 2) {
22047 for (uint32_t n = 1; n <= 8; n++) {
22048 for (uint32_t m = 1; m <= 7; m++) {
22049 GemmMicrokernelTester()
22050 .mr(7)
22051 .nr(8)
22052 .kr(1)
22053 .sr(1)
22054 .m(m)
22055 .n(n)
22056 .k(k)
22057 .cm_stride(11)
22058 .iterations(1)
22059 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22060 }
22061 }
22062 }
22063 }
22064
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,a_offset)22065 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, a_offset) {
22066 TEST_REQUIRES_X86_AVX;
22067 for (size_t k = 1; k <= 5; k += 2) {
22068 GemmMicrokernelTester()
22069 .mr(7)
22070 .nr(8)
22071 .kr(1)
22072 .sr(1)
22073 .m(7)
22074 .n(8)
22075 .k(k)
22076 .ks(3)
22077 .a_offset(37)
22078 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22079 }
22080 }
22081
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,zero)22082 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, zero) {
22083 TEST_REQUIRES_X86_AVX;
22084 for (size_t k = 1; k <= 5; k += 2) {
22085 for (uint32_t mz = 0; mz < 7; mz++) {
22086 GemmMicrokernelTester()
22087 .mr(7)
22088 .nr(8)
22089 .kr(1)
22090 .sr(1)
22091 .m(7)
22092 .n(8)
22093 .k(k)
22094 .ks(3)
22095 .a_offset(37)
22096 .zero_index(mz)
22097 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22098 }
22099 }
22100 }
22101
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,qmin)22102 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, qmin) {
22103 TEST_REQUIRES_X86_AVX;
22104 GemmMicrokernelTester()
22105 .mr(7)
22106 .nr(8)
22107 .kr(1)
22108 .sr(1)
22109 .m(7)
22110 .n(8)
22111 .k(1)
22112 .qmin(128)
22113 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22114 }
22115
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,qmax)22116 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, qmax) {
22117 TEST_REQUIRES_X86_AVX;
22118 GemmMicrokernelTester()
22119 .mr(7)
22120 .nr(8)
22121 .kr(1)
22122 .sr(1)
22123 .m(7)
22124 .n(8)
22125 .k(1)
22126 .qmax(128)
22127 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22128 }
22129
TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST,strided_cm)22130 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, strided_cm) {
22131 TEST_REQUIRES_X86_AVX;
22132 GemmMicrokernelTester()
22133 .mr(7)
22134 .nr(8)
22135 .kr(1)
22136 .sr(1)
22137 .m(7)
22138 .n(8)
22139 .k(1)
22140 .cm_stride(11)
22141 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
22142 }
22143 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22144
22145
22146 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,k_eq_1)22147 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1) {
22148 TEST_REQUIRES_X86_FMA3;
22149 GemmMicrokernelTester()
22150 .mr(1)
22151 .nr(8)
22152 .kr(1)
22153 .sr(1)
22154 .m(1)
22155 .n(8)
22156 .k(1)
22157 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22158 }
22159
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,strided_cn)22160 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cn) {
22161 TEST_REQUIRES_X86_FMA3;
22162 GemmMicrokernelTester()
22163 .mr(1)
22164 .nr(8)
22165 .kr(1)
22166 .sr(1)
22167 .m(1)
22168 .n(8)
22169 .k(1)
22170 .cn_stride(11)
22171 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22172 }
22173
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,k_eq_1_subtile)22174 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile) {
22175 TEST_REQUIRES_X86_FMA3;
22176 for (uint32_t n = 1; n <= 8; n++) {
22177 for (uint32_t m = 1; m <= 1; m++) {
22178 GemmMicrokernelTester()
22179 .mr(1)
22180 .nr(8)
22181 .kr(1)
22182 .sr(1)
22183 .m(m)
22184 .n(n)
22185 .k(1)
22186 .iterations(1)
22187 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22188 }
22189 }
22190 }
22191
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,k_eq_1_subtile_m)22192 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
22193 TEST_REQUIRES_X86_FMA3;
22194 for (uint32_t m = 1; m <= 1; m++) {
22195 GemmMicrokernelTester()
22196 .mr(1)
22197 .nr(8)
22198 .kr(1)
22199 .sr(1)
22200 .m(m)
22201 .n(8)
22202 .k(1)
22203 .iterations(1)
22204 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22205 }
22206 }
22207
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,k_eq_1_subtile_n)22208 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
22209 TEST_REQUIRES_X86_FMA3;
22210 for (uint32_t n = 1; n <= 8; n++) {
22211 GemmMicrokernelTester()
22212 .mr(1)
22213 .nr(8)
22214 .kr(1)
22215 .sr(1)
22216 .m(1)
22217 .n(n)
22218 .k(1)
22219 .iterations(1)
22220 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22221 }
22222 }
22223
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,k_gt_1)22224 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_gt_1) {
22225 TEST_REQUIRES_X86_FMA3;
22226 for (size_t k = 2; k < 10; k++) {
22227 GemmMicrokernelTester()
22228 .mr(1)
22229 .nr(8)
22230 .kr(1)
22231 .sr(1)
22232 .m(1)
22233 .n(8)
22234 .k(k)
22235 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22236 }
22237 }
22238
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,k_gt_1_subtile)22239 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_subtile) {
22240 TEST_REQUIRES_X86_FMA3;
22241 for (size_t k = 2; k < 10; k++) {
22242 for (uint32_t n = 1; n <= 8; n++) {
22243 for (uint32_t m = 1; m <= 1; m++) {
22244 GemmMicrokernelTester()
22245 .mr(1)
22246 .nr(8)
22247 .kr(1)
22248 .sr(1)
22249 .m(m)
22250 .n(n)
22251 .k(k)
22252 .iterations(1)
22253 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22254 }
22255 }
22256 }
22257 }
22258
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_gt_8)22259 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8) {
22260 TEST_REQUIRES_X86_FMA3;
22261 for (uint32_t n = 9; n < 16; n++) {
22262 for (size_t k = 1; k <= 5; k += 2) {
22263 GemmMicrokernelTester()
22264 .mr(1)
22265 .nr(8)
22266 .kr(1)
22267 .sr(1)
22268 .m(1)
22269 .n(n)
22270 .k(k)
22271 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22272 }
22273 }
22274 }
22275
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_gt_8_strided_cn)22276 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
22277 TEST_REQUIRES_X86_FMA3;
22278 for (uint32_t n = 9; n < 16; n++) {
22279 for (size_t k = 1; k <= 5; k += 2) {
22280 GemmMicrokernelTester()
22281 .mr(1)
22282 .nr(8)
22283 .kr(1)
22284 .sr(1)
22285 .m(1)
22286 .n(n)
22287 .k(k)
22288 .cn_stride(11)
22289 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22290 }
22291 }
22292 }
22293
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_gt_8_subtile)22294 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_subtile) {
22295 TEST_REQUIRES_X86_FMA3;
22296 for (uint32_t n = 9; n < 16; n++) {
22297 for (size_t k = 1; k <= 5; k += 2) {
22298 for (uint32_t m = 1; m <= 1; m++) {
22299 GemmMicrokernelTester()
22300 .mr(1)
22301 .nr(8)
22302 .kr(1)
22303 .sr(1)
22304 .m(m)
22305 .n(n)
22306 .k(k)
22307 .iterations(1)
22308 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22309 }
22310 }
22311 }
22312 }
22313
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_div_8)22314 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8) {
22315 TEST_REQUIRES_X86_FMA3;
22316 for (uint32_t n = 16; n <= 24; n += 8) {
22317 for (size_t k = 1; k <= 5; k += 2) {
22318 GemmMicrokernelTester()
22319 .mr(1)
22320 .nr(8)
22321 .kr(1)
22322 .sr(1)
22323 .m(1)
22324 .n(n)
22325 .k(k)
22326 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22327 }
22328 }
22329 }
22330
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_div_8_strided_cn)22331 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_cn) {
22332 TEST_REQUIRES_X86_FMA3;
22333 for (uint32_t n = 16; n <= 24; n += 8) {
22334 for (size_t k = 1; k <= 5; k += 2) {
22335 GemmMicrokernelTester()
22336 .mr(1)
22337 .nr(8)
22338 .kr(1)
22339 .sr(1)
22340 .m(1)
22341 .n(n)
22342 .k(k)
22343 .cn_stride(11)
22344 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22345 }
22346 }
22347 }
22348
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_div_8_subtile)22349 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_subtile) {
22350 TEST_REQUIRES_X86_FMA3;
22351 for (uint32_t n = 16; n <= 24; n += 8) {
22352 for (size_t k = 1; k <= 5; k += 2) {
22353 for (uint32_t m = 1; m <= 1; m++) {
22354 GemmMicrokernelTester()
22355 .mr(1)
22356 .nr(8)
22357 .kr(1)
22358 .sr(1)
22359 .m(m)
22360 .n(n)
22361 .k(k)
22362 .iterations(1)
22363 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22364 }
22365 }
22366 }
22367 }
22368
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,small_kernel)22369 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, small_kernel) {
22370 TEST_REQUIRES_X86_FMA3;
22371 for (size_t k = 1; k <= 5; k += 2) {
22372 GemmMicrokernelTester()
22373 .mr(1)
22374 .nr(8)
22375 .kr(1)
22376 .sr(1)
22377 .m(1)
22378 .n(8)
22379 .k(k)
22380 .ks(3)
22381 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22382 }
22383 }
22384
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,small_kernel_subtile)22385 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, small_kernel_subtile) {
22386 TEST_REQUIRES_X86_FMA3;
22387 for (size_t k = 1; k <= 5; k += 2) {
22388 for (uint32_t n = 1; n <= 8; n++) {
22389 for (uint32_t m = 1; m <= 1; m++) {
22390 GemmMicrokernelTester()
22391 .mr(1)
22392 .nr(8)
22393 .kr(1)
22394 .sr(1)
22395 .m(m)
22396 .n(n)
22397 .k(k)
22398 .ks(3)
22399 .iterations(1)
22400 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22401 }
22402 }
22403 }
22404 }
22405
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_gt_8_small_kernel)22406 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
22407 TEST_REQUIRES_X86_FMA3;
22408 for (uint32_t n = 9; n < 16; n++) {
22409 for (size_t k = 1; k <= 5; k += 2) {
22410 GemmMicrokernelTester()
22411 .mr(1)
22412 .nr(8)
22413 .kr(1)
22414 .sr(1)
22415 .m(1)
22416 .n(n)
22417 .k(k)
22418 .ks(3)
22419 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22420 }
22421 }
22422 }
22423
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,n_div_8_small_kernel)22424 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_small_kernel) {
22425 TEST_REQUIRES_X86_FMA3;
22426 for (uint32_t n = 16; n <= 24; n += 8) {
22427 for (size_t k = 1; k <= 5; k += 2) {
22428 GemmMicrokernelTester()
22429 .mr(1)
22430 .nr(8)
22431 .kr(1)
22432 .sr(1)
22433 .m(1)
22434 .n(n)
22435 .k(k)
22436 .ks(3)
22437 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22438 }
22439 }
22440 }
22441
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,strided_cm_subtile)22442 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cm_subtile) {
22443 TEST_REQUIRES_X86_FMA3;
22444 for (size_t k = 1; k <= 5; k += 2) {
22445 for (uint32_t n = 1; n <= 8; n++) {
22446 for (uint32_t m = 1; m <= 1; m++) {
22447 GemmMicrokernelTester()
22448 .mr(1)
22449 .nr(8)
22450 .kr(1)
22451 .sr(1)
22452 .m(m)
22453 .n(n)
22454 .k(k)
22455 .cm_stride(11)
22456 .iterations(1)
22457 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22458 }
22459 }
22460 }
22461 }
22462
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,a_offset)22463 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, a_offset) {
22464 TEST_REQUIRES_X86_FMA3;
22465 for (size_t k = 1; k <= 5; k += 2) {
22466 GemmMicrokernelTester()
22467 .mr(1)
22468 .nr(8)
22469 .kr(1)
22470 .sr(1)
22471 .m(1)
22472 .n(8)
22473 .k(k)
22474 .ks(3)
22475 .a_offset(7)
22476 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22477 }
22478 }
22479
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,zero)22480 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, zero) {
22481 TEST_REQUIRES_X86_FMA3;
22482 for (size_t k = 1; k <= 5; k += 2) {
22483 for (uint32_t mz = 0; mz < 1; mz++) {
22484 GemmMicrokernelTester()
22485 .mr(1)
22486 .nr(8)
22487 .kr(1)
22488 .sr(1)
22489 .m(1)
22490 .n(8)
22491 .k(k)
22492 .ks(3)
22493 .a_offset(7)
22494 .zero_index(mz)
22495 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22496 }
22497 }
22498 }
22499
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,qmin)22500 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, qmin) {
22501 TEST_REQUIRES_X86_FMA3;
22502 GemmMicrokernelTester()
22503 .mr(1)
22504 .nr(8)
22505 .kr(1)
22506 .sr(1)
22507 .m(1)
22508 .n(8)
22509 .k(1)
22510 .qmin(128)
22511 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22512 }
22513
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,qmax)22514 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, qmax) {
22515 TEST_REQUIRES_X86_FMA3;
22516 GemmMicrokernelTester()
22517 .mr(1)
22518 .nr(8)
22519 .kr(1)
22520 .sr(1)
22521 .m(1)
22522 .n(8)
22523 .k(1)
22524 .qmax(128)
22525 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22526 }
22527
TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST,strided_cm)22528 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cm) {
22529 TEST_REQUIRES_X86_FMA3;
22530 GemmMicrokernelTester()
22531 .mr(1)
22532 .nr(8)
22533 .kr(1)
22534 .sr(1)
22535 .m(1)
22536 .n(8)
22537 .k(1)
22538 .cm_stride(11)
22539 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22540 }
22541 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22542
22543
22544 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,k_eq_1)22545 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1) {
22546 TEST_REQUIRES_X86_FMA3;
22547 GemmMicrokernelTester()
22548 .mr(1)
22549 .nr(16)
22550 .kr(1)
22551 .sr(1)
22552 .m(1)
22553 .n(16)
22554 .k(1)
22555 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22556 }
22557
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,strided_cn)22558 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cn) {
22559 TEST_REQUIRES_X86_FMA3;
22560 GemmMicrokernelTester()
22561 .mr(1)
22562 .nr(16)
22563 .kr(1)
22564 .sr(1)
22565 .m(1)
22566 .n(16)
22567 .k(1)
22568 .cn_stride(19)
22569 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22570 }
22571
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,k_eq_1_subtile)22572 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile) {
22573 TEST_REQUIRES_X86_FMA3;
22574 for (uint32_t n = 1; n <= 16; n++) {
22575 for (uint32_t m = 1; m <= 1; m++) {
22576 GemmMicrokernelTester()
22577 .mr(1)
22578 .nr(16)
22579 .kr(1)
22580 .sr(1)
22581 .m(m)
22582 .n(n)
22583 .k(1)
22584 .iterations(1)
22585 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22586 }
22587 }
22588 }
22589
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,k_eq_1_subtile_m)22590 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
22591 TEST_REQUIRES_X86_FMA3;
22592 for (uint32_t m = 1; m <= 1; m++) {
22593 GemmMicrokernelTester()
22594 .mr(1)
22595 .nr(16)
22596 .kr(1)
22597 .sr(1)
22598 .m(m)
22599 .n(16)
22600 .k(1)
22601 .iterations(1)
22602 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22603 }
22604 }
22605
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,k_eq_1_subtile_n)22606 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
22607 TEST_REQUIRES_X86_FMA3;
22608 for (uint32_t n = 1; n <= 16; n++) {
22609 GemmMicrokernelTester()
22610 .mr(1)
22611 .nr(16)
22612 .kr(1)
22613 .sr(1)
22614 .m(1)
22615 .n(n)
22616 .k(1)
22617 .iterations(1)
22618 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22619 }
22620 }
22621
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,k_gt_1)22622 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_gt_1) {
22623 TEST_REQUIRES_X86_FMA3;
22624 for (size_t k = 2; k < 10; k++) {
22625 GemmMicrokernelTester()
22626 .mr(1)
22627 .nr(16)
22628 .kr(1)
22629 .sr(1)
22630 .m(1)
22631 .n(16)
22632 .k(k)
22633 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22634 }
22635 }
22636
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,k_gt_1_subtile)22637 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_subtile) {
22638 TEST_REQUIRES_X86_FMA3;
22639 for (size_t k = 2; k < 10; k++) {
22640 for (uint32_t n = 1; n <= 16; n++) {
22641 for (uint32_t m = 1; m <= 1; m++) {
22642 GemmMicrokernelTester()
22643 .mr(1)
22644 .nr(16)
22645 .kr(1)
22646 .sr(1)
22647 .m(m)
22648 .n(n)
22649 .k(k)
22650 .iterations(1)
22651 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22652 }
22653 }
22654 }
22655 }
22656
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_gt_16)22657 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16) {
22658 TEST_REQUIRES_X86_FMA3;
22659 for (uint32_t n = 17; n < 32; n++) {
22660 for (size_t k = 1; k <= 5; k += 2) {
22661 GemmMicrokernelTester()
22662 .mr(1)
22663 .nr(16)
22664 .kr(1)
22665 .sr(1)
22666 .m(1)
22667 .n(n)
22668 .k(k)
22669 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22670 }
22671 }
22672 }
22673
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_gt_16_strided_cn)22674 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
22675 TEST_REQUIRES_X86_FMA3;
22676 for (uint32_t n = 17; n < 32; n++) {
22677 for (size_t k = 1; k <= 5; k += 2) {
22678 GemmMicrokernelTester()
22679 .mr(1)
22680 .nr(16)
22681 .kr(1)
22682 .sr(1)
22683 .m(1)
22684 .n(n)
22685 .k(k)
22686 .cn_stride(19)
22687 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22688 }
22689 }
22690 }
22691
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_gt_16_subtile)22692 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_subtile) {
22693 TEST_REQUIRES_X86_FMA3;
22694 for (uint32_t n = 17; n < 32; n++) {
22695 for (size_t k = 1; k <= 5; k += 2) {
22696 for (uint32_t m = 1; m <= 1; m++) {
22697 GemmMicrokernelTester()
22698 .mr(1)
22699 .nr(16)
22700 .kr(1)
22701 .sr(1)
22702 .m(m)
22703 .n(n)
22704 .k(k)
22705 .iterations(1)
22706 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22707 }
22708 }
22709 }
22710 }
22711
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_div_16)22712 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16) {
22713 TEST_REQUIRES_X86_FMA3;
22714 for (uint32_t n = 32; n <= 48; n += 16) {
22715 for (size_t k = 1; k <= 5; k += 2) {
22716 GemmMicrokernelTester()
22717 .mr(1)
22718 .nr(16)
22719 .kr(1)
22720 .sr(1)
22721 .m(1)
22722 .n(n)
22723 .k(k)
22724 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22725 }
22726 }
22727 }
22728
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_div_16_strided_cn)22729 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_cn) {
22730 TEST_REQUIRES_X86_FMA3;
22731 for (uint32_t n = 32; n <= 48; n += 16) {
22732 for (size_t k = 1; k <= 5; k += 2) {
22733 GemmMicrokernelTester()
22734 .mr(1)
22735 .nr(16)
22736 .kr(1)
22737 .sr(1)
22738 .m(1)
22739 .n(n)
22740 .k(k)
22741 .cn_stride(19)
22742 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22743 }
22744 }
22745 }
22746
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_div_16_subtile)22747 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_subtile) {
22748 TEST_REQUIRES_X86_FMA3;
22749 for (uint32_t n = 32; n <= 48; n += 16) {
22750 for (size_t k = 1; k <= 5; k += 2) {
22751 for (uint32_t m = 1; m <= 1; m++) {
22752 GemmMicrokernelTester()
22753 .mr(1)
22754 .nr(16)
22755 .kr(1)
22756 .sr(1)
22757 .m(m)
22758 .n(n)
22759 .k(k)
22760 .iterations(1)
22761 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22762 }
22763 }
22764 }
22765 }
22766
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,small_kernel)22767 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, small_kernel) {
22768 TEST_REQUIRES_X86_FMA3;
22769 for (size_t k = 1; k <= 5; k += 2) {
22770 GemmMicrokernelTester()
22771 .mr(1)
22772 .nr(16)
22773 .kr(1)
22774 .sr(1)
22775 .m(1)
22776 .n(16)
22777 .k(k)
22778 .ks(3)
22779 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22780 }
22781 }
22782
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,small_kernel_subtile)22783 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, small_kernel_subtile) {
22784 TEST_REQUIRES_X86_FMA3;
22785 for (size_t k = 1; k <= 5; k += 2) {
22786 for (uint32_t n = 1; n <= 16; n++) {
22787 for (uint32_t m = 1; m <= 1; m++) {
22788 GemmMicrokernelTester()
22789 .mr(1)
22790 .nr(16)
22791 .kr(1)
22792 .sr(1)
22793 .m(m)
22794 .n(n)
22795 .k(k)
22796 .ks(3)
22797 .iterations(1)
22798 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22799 }
22800 }
22801 }
22802 }
22803
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_gt_16_small_kernel)22804 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
22805 TEST_REQUIRES_X86_FMA3;
22806 for (uint32_t n = 17; n < 32; n++) {
22807 for (size_t k = 1; k <= 5; k += 2) {
22808 GemmMicrokernelTester()
22809 .mr(1)
22810 .nr(16)
22811 .kr(1)
22812 .sr(1)
22813 .m(1)
22814 .n(n)
22815 .k(k)
22816 .ks(3)
22817 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22818 }
22819 }
22820 }
22821
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,n_div_16_small_kernel)22822 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_small_kernel) {
22823 TEST_REQUIRES_X86_FMA3;
22824 for (uint32_t n = 32; n <= 48; n += 16) {
22825 for (size_t k = 1; k <= 5; k += 2) {
22826 GemmMicrokernelTester()
22827 .mr(1)
22828 .nr(16)
22829 .kr(1)
22830 .sr(1)
22831 .m(1)
22832 .n(n)
22833 .k(k)
22834 .ks(3)
22835 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22836 }
22837 }
22838 }
22839
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,strided_cm_subtile)22840 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cm_subtile) {
22841 TEST_REQUIRES_X86_FMA3;
22842 for (size_t k = 1; k <= 5; k += 2) {
22843 for (uint32_t n = 1; n <= 16; n++) {
22844 for (uint32_t m = 1; m <= 1; m++) {
22845 GemmMicrokernelTester()
22846 .mr(1)
22847 .nr(16)
22848 .kr(1)
22849 .sr(1)
22850 .m(m)
22851 .n(n)
22852 .k(k)
22853 .cm_stride(19)
22854 .iterations(1)
22855 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22856 }
22857 }
22858 }
22859 }
22860
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,a_offset)22861 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, a_offset) {
22862 TEST_REQUIRES_X86_FMA3;
22863 for (size_t k = 1; k <= 5; k += 2) {
22864 GemmMicrokernelTester()
22865 .mr(1)
22866 .nr(16)
22867 .kr(1)
22868 .sr(1)
22869 .m(1)
22870 .n(16)
22871 .k(k)
22872 .ks(3)
22873 .a_offset(7)
22874 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22875 }
22876 }
22877
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,zero)22878 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, zero) {
22879 TEST_REQUIRES_X86_FMA3;
22880 for (size_t k = 1; k <= 5; k += 2) {
22881 for (uint32_t mz = 0; mz < 1; mz++) {
22882 GemmMicrokernelTester()
22883 .mr(1)
22884 .nr(16)
22885 .kr(1)
22886 .sr(1)
22887 .m(1)
22888 .n(16)
22889 .k(k)
22890 .ks(3)
22891 .a_offset(7)
22892 .zero_index(mz)
22893 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22894 }
22895 }
22896 }
22897
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,qmin)22898 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, qmin) {
22899 TEST_REQUIRES_X86_FMA3;
22900 GemmMicrokernelTester()
22901 .mr(1)
22902 .nr(16)
22903 .kr(1)
22904 .sr(1)
22905 .m(1)
22906 .n(16)
22907 .k(1)
22908 .qmin(128)
22909 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22910 }
22911
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,qmax)22912 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, qmax) {
22913 TEST_REQUIRES_X86_FMA3;
22914 GemmMicrokernelTester()
22915 .mr(1)
22916 .nr(16)
22917 .kr(1)
22918 .sr(1)
22919 .m(1)
22920 .n(16)
22921 .k(1)
22922 .qmax(128)
22923 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22924 }
22925
TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST,strided_cm)22926 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cm) {
22927 TEST_REQUIRES_X86_FMA3;
22928 GemmMicrokernelTester()
22929 .mr(1)
22930 .nr(16)
22931 .kr(1)
22932 .sr(1)
22933 .m(1)
22934 .n(16)
22935 .k(1)
22936 .cm_stride(19)
22937 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22938 }
22939 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22940
22941
22942 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4)22943 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4) {
22944 TEST_REQUIRES_X86_FMA3;
22945 GemmMicrokernelTester()
22946 .mr(1)
22947 .nr(16)
22948 .kr(1)
22949 .sr(4)
22950 .m(1)
22951 .n(16)
22952 .k(4)
22953 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22954 }
22955
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,strided_cn)22956 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, strided_cn) {
22957 TEST_REQUIRES_X86_FMA3;
22958 GemmMicrokernelTester()
22959 .mr(1)
22960 .nr(16)
22961 .kr(1)
22962 .sr(4)
22963 .m(1)
22964 .n(16)
22965 .k(4)
22966 .cn_stride(19)
22967 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22968 }
22969
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4_subtile)22970 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
22971 TEST_REQUIRES_X86_FMA3;
22972 for (uint32_t n = 1; n <= 16; n++) {
22973 for (uint32_t m = 1; m <= 1; m++) {
22974 GemmMicrokernelTester()
22975 .mr(1)
22976 .nr(16)
22977 .kr(1)
22978 .sr(4)
22979 .m(m)
22980 .n(n)
22981 .k(4)
22982 .iterations(1)
22983 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22984 }
22985 }
22986 }
22987
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)22988 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
22989 TEST_REQUIRES_X86_FMA3;
22990 for (uint32_t m = 1; m <= 1; m++) {
22991 GemmMicrokernelTester()
22992 .mr(1)
22993 .nr(16)
22994 .kr(1)
22995 .sr(4)
22996 .m(m)
22997 .n(16)
22998 .k(4)
22999 .iterations(1)
23000 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23001 }
23002 }
23003
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)23004 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
23005 TEST_REQUIRES_X86_FMA3;
23006 for (uint32_t n = 1; n <= 16; n++) {
23007 GemmMicrokernelTester()
23008 .mr(1)
23009 .nr(16)
23010 .kr(1)
23011 .sr(4)
23012 .m(1)
23013 .n(n)
23014 .k(4)
23015 .iterations(1)
23016 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23017 }
23018 }
23019
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_lt_4)23020 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4) {
23021 TEST_REQUIRES_X86_FMA3;
23022 for (size_t k = 1; k < 4; k++) {
23023 GemmMicrokernelTester()
23024 .mr(1)
23025 .nr(16)
23026 .kr(1)
23027 .sr(4)
23028 .m(1)
23029 .n(16)
23030 .k(k)
23031 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23032 }
23033 }
23034
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_lt_4_subtile)23035 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
23036 TEST_REQUIRES_X86_FMA3;
23037 for (size_t k = 1; k < 4; k++) {
23038 for (uint32_t n = 1; n <= 16; n++) {
23039 for (uint32_t m = 1; m <= 1; m++) {
23040 GemmMicrokernelTester()
23041 .mr(1)
23042 .nr(16)
23043 .kr(1)
23044 .sr(4)
23045 .m(m)
23046 .n(n)
23047 .k(k)
23048 .iterations(1)
23049 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23050 }
23051 }
23052 }
23053 }
23054
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_gt_4)23055 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4) {
23056 TEST_REQUIRES_X86_FMA3;
23057 for (size_t k = 5; k < 8; k++) {
23058 GemmMicrokernelTester()
23059 .mr(1)
23060 .nr(16)
23061 .kr(1)
23062 .sr(4)
23063 .m(1)
23064 .n(16)
23065 .k(k)
23066 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23067 }
23068 }
23069
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_gt_4_subtile)23070 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
23071 TEST_REQUIRES_X86_FMA3;
23072 for (size_t k = 5; k < 8; k++) {
23073 for (uint32_t n = 1; n <= 16; n++) {
23074 for (uint32_t m = 1; m <= 1; m++) {
23075 GemmMicrokernelTester()
23076 .mr(1)
23077 .nr(16)
23078 .kr(1)
23079 .sr(4)
23080 .m(m)
23081 .n(n)
23082 .k(k)
23083 .iterations(1)
23084 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23085 }
23086 }
23087 }
23088 }
23089
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_div_4)23090 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4) {
23091 TEST_REQUIRES_X86_FMA3;
23092 for (size_t k = 8; k <= 40; k += 4) {
23093 GemmMicrokernelTester()
23094 .mr(1)
23095 .nr(16)
23096 .kr(1)
23097 .sr(4)
23098 .m(1)
23099 .n(16)
23100 .k(k)
23101 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23102 }
23103 }
23104
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,k_div_4_subtile)23105 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4_subtile) {
23106 TEST_REQUIRES_X86_FMA3;
23107 for (size_t k = 8; k <= 40; k += 4) {
23108 for (uint32_t n = 1; n <= 16; n++) {
23109 for (uint32_t m = 1; m <= 1; m++) {
23110 GemmMicrokernelTester()
23111 .mr(1)
23112 .nr(16)
23113 .kr(1)
23114 .sr(4)
23115 .m(m)
23116 .n(n)
23117 .k(k)
23118 .iterations(1)
23119 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23120 }
23121 }
23122 }
23123 }
23124
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16)23125 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16) {
23126 TEST_REQUIRES_X86_FMA3;
23127 for (uint32_t n = 17; n < 32; n++) {
23128 for (size_t k = 1; k <= 20; k += 5) {
23129 GemmMicrokernelTester()
23130 .mr(1)
23131 .nr(16)
23132 .kr(1)
23133 .sr(4)
23134 .m(1)
23135 .n(n)
23136 .k(k)
23137 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23138 }
23139 }
23140 }
23141
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)23142 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
23143 TEST_REQUIRES_X86_FMA3;
23144 for (uint32_t n = 17; n < 32; n++) {
23145 for (size_t k = 1; k <= 20; k += 5) {
23146 GemmMicrokernelTester()
23147 .mr(1)
23148 .nr(16)
23149 .kr(1)
23150 .sr(4)
23151 .m(1)
23152 .n(n)
23153 .k(k)
23154 .cn_stride(19)
23155 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23156 }
23157 }
23158 }
23159
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16_subtile)23160 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
23161 TEST_REQUIRES_X86_FMA3;
23162 for (uint32_t n = 17; n < 32; n++) {
23163 for (size_t k = 1; k <= 20; k += 5) {
23164 for (uint32_t m = 1; m <= 1; m++) {
23165 GemmMicrokernelTester()
23166 .mr(1)
23167 .nr(16)
23168 .kr(1)
23169 .sr(4)
23170 .m(m)
23171 .n(n)
23172 .k(k)
23173 .iterations(1)
23174 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23175 }
23176 }
23177 }
23178 }
23179
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16)23180 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16) {
23181 TEST_REQUIRES_X86_FMA3;
23182 for (uint32_t n = 32; n <= 48; n += 16) {
23183 for (size_t k = 1; k <= 20; k += 5) {
23184 GemmMicrokernelTester()
23185 .mr(1)
23186 .nr(16)
23187 .kr(1)
23188 .sr(4)
23189 .m(1)
23190 .n(n)
23191 .k(k)
23192 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23193 }
23194 }
23195 }
23196
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16_strided_cn)23197 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
23198 TEST_REQUIRES_X86_FMA3;
23199 for (uint32_t n = 32; n <= 48; n += 16) {
23200 for (size_t k = 1; k <= 20; k += 5) {
23201 GemmMicrokernelTester()
23202 .mr(1)
23203 .nr(16)
23204 .kr(1)
23205 .sr(4)
23206 .m(1)
23207 .n(n)
23208 .k(k)
23209 .cn_stride(19)
23210 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23211 }
23212 }
23213 }
23214
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16_subtile)23215 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_subtile) {
23216 TEST_REQUIRES_X86_FMA3;
23217 for (uint32_t n = 32; n <= 48; n += 16) {
23218 for (size_t k = 1; k <= 20; k += 5) {
23219 for (uint32_t m = 1; m <= 1; m++) {
23220 GemmMicrokernelTester()
23221 .mr(1)
23222 .nr(16)
23223 .kr(1)
23224 .sr(4)
23225 .m(m)
23226 .n(n)
23227 .k(k)
23228 .iterations(1)
23229 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23230 }
23231 }
23232 }
23233 }
23234
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,small_kernel)23235 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, small_kernel) {
23236 TEST_REQUIRES_X86_FMA3;
23237 for (size_t k = 1; k <= 20; k += 5) {
23238 GemmMicrokernelTester()
23239 .mr(1)
23240 .nr(16)
23241 .kr(1)
23242 .sr(4)
23243 .m(1)
23244 .n(16)
23245 .k(k)
23246 .ks(3)
23247 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23248 }
23249 }
23250
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,small_kernel_subtile)23251 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, small_kernel_subtile) {
23252 TEST_REQUIRES_X86_FMA3;
23253 for (size_t k = 1; k <= 20; k += 5) {
23254 for (uint32_t n = 1; n <= 16; n++) {
23255 for (uint32_t m = 1; m <= 1; m++) {
23256 GemmMicrokernelTester()
23257 .mr(1)
23258 .nr(16)
23259 .kr(1)
23260 .sr(4)
23261 .m(m)
23262 .n(n)
23263 .k(k)
23264 .ks(3)
23265 .iterations(1)
23266 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23267 }
23268 }
23269 }
23270 }
23271
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16_small_kernel)23272 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
23273 TEST_REQUIRES_X86_FMA3;
23274 for (uint32_t n = 17; n < 32; n++) {
23275 for (size_t k = 1; k <= 20; k += 5) {
23276 GemmMicrokernelTester()
23277 .mr(1)
23278 .nr(16)
23279 .kr(1)
23280 .sr(4)
23281 .m(1)
23282 .n(n)
23283 .k(k)
23284 .ks(3)
23285 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23286 }
23287 }
23288 }
23289
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16_small_kernel)23290 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
23291 TEST_REQUIRES_X86_FMA3;
23292 for (uint32_t n = 32; n <= 48; n += 16) {
23293 for (size_t k = 1; k <= 20; k += 5) {
23294 GemmMicrokernelTester()
23295 .mr(1)
23296 .nr(16)
23297 .kr(1)
23298 .sr(4)
23299 .m(1)
23300 .n(n)
23301 .k(k)
23302 .ks(3)
23303 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23304 }
23305 }
23306 }
23307
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,strided_cm_subtile)23308 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm_subtile) {
23309 TEST_REQUIRES_X86_FMA3;
23310 for (size_t k = 1; k <= 20; k += 5) {
23311 for (uint32_t n = 1; n <= 16; n++) {
23312 for (uint32_t m = 1; m <= 1; m++) {
23313 GemmMicrokernelTester()
23314 .mr(1)
23315 .nr(16)
23316 .kr(1)
23317 .sr(4)
23318 .m(m)
23319 .n(n)
23320 .k(k)
23321 .cm_stride(19)
23322 .iterations(1)
23323 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23324 }
23325 }
23326 }
23327 }
23328
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,a_offset)23329 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, a_offset) {
23330 TEST_REQUIRES_X86_FMA3;
23331 for (size_t k = 1; k <= 20; k += 5) {
23332 GemmMicrokernelTester()
23333 .mr(1)
23334 .nr(16)
23335 .kr(1)
23336 .sr(4)
23337 .m(1)
23338 .n(16)
23339 .k(k)
23340 .ks(3)
23341 .a_offset(23)
23342 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23343 }
23344 }
23345
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,zero)23346 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, zero) {
23347 TEST_REQUIRES_X86_FMA3;
23348 for (size_t k = 1; k <= 20; k += 5) {
23349 for (uint32_t mz = 0; mz < 1; mz++) {
23350 GemmMicrokernelTester()
23351 .mr(1)
23352 .nr(16)
23353 .kr(1)
23354 .sr(4)
23355 .m(1)
23356 .n(16)
23357 .k(k)
23358 .ks(3)
23359 .a_offset(23)
23360 .zero_index(mz)
23361 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23362 }
23363 }
23364 }
23365
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,qmin)23366 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, qmin) {
23367 TEST_REQUIRES_X86_FMA3;
23368 GemmMicrokernelTester()
23369 .mr(1)
23370 .nr(16)
23371 .kr(1)
23372 .sr(4)
23373 .m(1)
23374 .n(16)
23375 .k(4)
23376 .qmin(128)
23377 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23378 }
23379
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,qmax)23380 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, qmax) {
23381 TEST_REQUIRES_X86_FMA3;
23382 GemmMicrokernelTester()
23383 .mr(1)
23384 .nr(16)
23385 .kr(1)
23386 .sr(4)
23387 .m(1)
23388 .n(16)
23389 .k(4)
23390 .qmax(128)
23391 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23392 }
23393
TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST,strided_cm)23394 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm) {
23395 TEST_REQUIRES_X86_FMA3;
23396 GemmMicrokernelTester()
23397 .mr(1)
23398 .nr(16)
23399 .kr(1)
23400 .sr(4)
23401 .m(1)
23402 .n(16)
23403 .k(4)
23404 .cm_stride(19)
23405 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23406 }
23407 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23408
23409
23410 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,k_eq_1)23411 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1) {
23412 TEST_REQUIRES_X86_FMA3;
23413 GemmMicrokernelTester()
23414 .mr(3)
23415 .nr(16)
23416 .kr(1)
23417 .sr(1)
23418 .m(3)
23419 .n(16)
23420 .k(1)
23421 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23422 }
23423
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,strided_cn)23424 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cn) {
23425 TEST_REQUIRES_X86_FMA3;
23426 GemmMicrokernelTester()
23427 .mr(3)
23428 .nr(16)
23429 .kr(1)
23430 .sr(1)
23431 .m(3)
23432 .n(16)
23433 .k(1)
23434 .cn_stride(19)
23435 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23436 }
23437
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,k_eq_1_subtile)23438 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile) {
23439 TEST_REQUIRES_X86_FMA3;
23440 for (uint32_t n = 1; n <= 16; n++) {
23441 for (uint32_t m = 1; m <= 3; m++) {
23442 GemmMicrokernelTester()
23443 .mr(3)
23444 .nr(16)
23445 .kr(1)
23446 .sr(1)
23447 .m(m)
23448 .n(n)
23449 .k(1)
23450 .iterations(1)
23451 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23452 }
23453 }
23454 }
23455
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,k_eq_1_subtile_m)23456 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
23457 TEST_REQUIRES_X86_FMA3;
23458 for (uint32_t m = 1; m <= 3; m++) {
23459 GemmMicrokernelTester()
23460 .mr(3)
23461 .nr(16)
23462 .kr(1)
23463 .sr(1)
23464 .m(m)
23465 .n(16)
23466 .k(1)
23467 .iterations(1)
23468 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23469 }
23470 }
23471
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,k_eq_1_subtile_n)23472 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
23473 TEST_REQUIRES_X86_FMA3;
23474 for (uint32_t n = 1; n <= 16; n++) {
23475 GemmMicrokernelTester()
23476 .mr(3)
23477 .nr(16)
23478 .kr(1)
23479 .sr(1)
23480 .m(3)
23481 .n(n)
23482 .k(1)
23483 .iterations(1)
23484 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23485 }
23486 }
23487
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,k_gt_1)23488 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_gt_1) {
23489 TEST_REQUIRES_X86_FMA3;
23490 for (size_t k = 2; k < 10; k++) {
23491 GemmMicrokernelTester()
23492 .mr(3)
23493 .nr(16)
23494 .kr(1)
23495 .sr(1)
23496 .m(3)
23497 .n(16)
23498 .k(k)
23499 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23500 }
23501 }
23502
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,k_gt_1_subtile)23503 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_subtile) {
23504 TEST_REQUIRES_X86_FMA3;
23505 for (size_t k = 2; k < 10; k++) {
23506 for (uint32_t n = 1; n <= 16; n++) {
23507 for (uint32_t m = 1; m <= 3; m++) {
23508 GemmMicrokernelTester()
23509 .mr(3)
23510 .nr(16)
23511 .kr(1)
23512 .sr(1)
23513 .m(m)
23514 .n(n)
23515 .k(k)
23516 .iterations(1)
23517 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23518 }
23519 }
23520 }
23521 }
23522
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_gt_16)23523 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16) {
23524 TEST_REQUIRES_X86_FMA3;
23525 for (uint32_t n = 17; n < 32; n++) {
23526 for (size_t k = 1; k <= 5; k += 2) {
23527 GemmMicrokernelTester()
23528 .mr(3)
23529 .nr(16)
23530 .kr(1)
23531 .sr(1)
23532 .m(3)
23533 .n(n)
23534 .k(k)
23535 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23536 }
23537 }
23538 }
23539
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_gt_16_strided_cn)23540 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
23541 TEST_REQUIRES_X86_FMA3;
23542 for (uint32_t n = 17; n < 32; n++) {
23543 for (size_t k = 1; k <= 5; k += 2) {
23544 GemmMicrokernelTester()
23545 .mr(3)
23546 .nr(16)
23547 .kr(1)
23548 .sr(1)
23549 .m(3)
23550 .n(n)
23551 .k(k)
23552 .cn_stride(19)
23553 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23554 }
23555 }
23556 }
23557
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_gt_16_subtile)23558 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_subtile) {
23559 TEST_REQUIRES_X86_FMA3;
23560 for (uint32_t n = 17; n < 32; n++) {
23561 for (size_t k = 1; k <= 5; k += 2) {
23562 for (uint32_t m = 1; m <= 3; m++) {
23563 GemmMicrokernelTester()
23564 .mr(3)
23565 .nr(16)
23566 .kr(1)
23567 .sr(1)
23568 .m(m)
23569 .n(n)
23570 .k(k)
23571 .iterations(1)
23572 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23573 }
23574 }
23575 }
23576 }
23577
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_div_16)23578 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16) {
23579 TEST_REQUIRES_X86_FMA3;
23580 for (uint32_t n = 32; n <= 48; n += 16) {
23581 for (size_t k = 1; k <= 5; k += 2) {
23582 GemmMicrokernelTester()
23583 .mr(3)
23584 .nr(16)
23585 .kr(1)
23586 .sr(1)
23587 .m(3)
23588 .n(n)
23589 .k(k)
23590 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23591 }
23592 }
23593 }
23594
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_div_16_strided_cn)23595 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_cn) {
23596 TEST_REQUIRES_X86_FMA3;
23597 for (uint32_t n = 32; n <= 48; n += 16) {
23598 for (size_t k = 1; k <= 5; k += 2) {
23599 GemmMicrokernelTester()
23600 .mr(3)
23601 .nr(16)
23602 .kr(1)
23603 .sr(1)
23604 .m(3)
23605 .n(n)
23606 .k(k)
23607 .cn_stride(19)
23608 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23609 }
23610 }
23611 }
23612
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_div_16_subtile)23613 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_subtile) {
23614 TEST_REQUIRES_X86_FMA3;
23615 for (uint32_t n = 32; n <= 48; n += 16) {
23616 for (size_t k = 1; k <= 5; k += 2) {
23617 for (uint32_t m = 1; m <= 3; m++) {
23618 GemmMicrokernelTester()
23619 .mr(3)
23620 .nr(16)
23621 .kr(1)
23622 .sr(1)
23623 .m(m)
23624 .n(n)
23625 .k(k)
23626 .iterations(1)
23627 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23628 }
23629 }
23630 }
23631 }
23632
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,small_kernel)23633 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, small_kernel) {
23634 TEST_REQUIRES_X86_FMA3;
23635 for (size_t k = 1; k <= 5; k += 2) {
23636 GemmMicrokernelTester()
23637 .mr(3)
23638 .nr(16)
23639 .kr(1)
23640 .sr(1)
23641 .m(3)
23642 .n(16)
23643 .k(k)
23644 .ks(3)
23645 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23646 }
23647 }
23648
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,small_kernel_subtile)23649 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, small_kernel_subtile) {
23650 TEST_REQUIRES_X86_FMA3;
23651 for (size_t k = 1; k <= 5; k += 2) {
23652 for (uint32_t n = 1; n <= 16; n++) {
23653 for (uint32_t m = 1; m <= 3; m++) {
23654 GemmMicrokernelTester()
23655 .mr(3)
23656 .nr(16)
23657 .kr(1)
23658 .sr(1)
23659 .m(m)
23660 .n(n)
23661 .k(k)
23662 .ks(3)
23663 .iterations(1)
23664 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23665 }
23666 }
23667 }
23668 }
23669
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_gt_16_small_kernel)23670 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
23671 TEST_REQUIRES_X86_FMA3;
23672 for (uint32_t n = 17; n < 32; n++) {
23673 for (size_t k = 1; k <= 5; k += 2) {
23674 GemmMicrokernelTester()
23675 .mr(3)
23676 .nr(16)
23677 .kr(1)
23678 .sr(1)
23679 .m(3)
23680 .n(n)
23681 .k(k)
23682 .ks(3)
23683 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23684 }
23685 }
23686 }
23687
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,n_div_16_small_kernel)23688 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_small_kernel) {
23689 TEST_REQUIRES_X86_FMA3;
23690 for (uint32_t n = 32; n <= 48; n += 16) {
23691 for (size_t k = 1; k <= 5; k += 2) {
23692 GemmMicrokernelTester()
23693 .mr(3)
23694 .nr(16)
23695 .kr(1)
23696 .sr(1)
23697 .m(3)
23698 .n(n)
23699 .k(k)
23700 .ks(3)
23701 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23702 }
23703 }
23704 }
23705
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,strided_cm_subtile)23706 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cm_subtile) {
23707 TEST_REQUIRES_X86_FMA3;
23708 for (size_t k = 1; k <= 5; k += 2) {
23709 for (uint32_t n = 1; n <= 16; n++) {
23710 for (uint32_t m = 1; m <= 3; m++) {
23711 GemmMicrokernelTester()
23712 .mr(3)
23713 .nr(16)
23714 .kr(1)
23715 .sr(1)
23716 .m(m)
23717 .n(n)
23718 .k(k)
23719 .cm_stride(19)
23720 .iterations(1)
23721 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23722 }
23723 }
23724 }
23725 }
23726
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,a_offset)23727 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, a_offset) {
23728 TEST_REQUIRES_X86_FMA3;
23729 for (size_t k = 1; k <= 5; k += 2) {
23730 GemmMicrokernelTester()
23731 .mr(3)
23732 .nr(16)
23733 .kr(1)
23734 .sr(1)
23735 .m(3)
23736 .n(16)
23737 .k(k)
23738 .ks(3)
23739 .a_offset(17)
23740 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23741 }
23742 }
23743
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,zero)23744 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, zero) {
23745 TEST_REQUIRES_X86_FMA3;
23746 for (size_t k = 1; k <= 5; k += 2) {
23747 for (uint32_t mz = 0; mz < 3; mz++) {
23748 GemmMicrokernelTester()
23749 .mr(3)
23750 .nr(16)
23751 .kr(1)
23752 .sr(1)
23753 .m(3)
23754 .n(16)
23755 .k(k)
23756 .ks(3)
23757 .a_offset(17)
23758 .zero_index(mz)
23759 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23760 }
23761 }
23762 }
23763
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,qmin)23764 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, qmin) {
23765 TEST_REQUIRES_X86_FMA3;
23766 GemmMicrokernelTester()
23767 .mr(3)
23768 .nr(16)
23769 .kr(1)
23770 .sr(1)
23771 .m(3)
23772 .n(16)
23773 .k(1)
23774 .qmin(128)
23775 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23776 }
23777
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,qmax)23778 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, qmax) {
23779 TEST_REQUIRES_X86_FMA3;
23780 GemmMicrokernelTester()
23781 .mr(3)
23782 .nr(16)
23783 .kr(1)
23784 .sr(1)
23785 .m(3)
23786 .n(16)
23787 .k(1)
23788 .qmax(128)
23789 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23790 }
23791
TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST,strided_cm)23792 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cm) {
23793 TEST_REQUIRES_X86_FMA3;
23794 GemmMicrokernelTester()
23795 .mr(3)
23796 .nr(16)
23797 .kr(1)
23798 .sr(1)
23799 .m(3)
23800 .n(16)
23801 .k(1)
23802 .cm_stride(19)
23803 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23804 }
23805 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23806
23807
23808 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4)23809 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4) {
23810 TEST_REQUIRES_X86_FMA3;
23811 GemmMicrokernelTester()
23812 .mr(3)
23813 .nr(16)
23814 .kr(1)
23815 .sr(4)
23816 .m(3)
23817 .n(16)
23818 .k(4)
23819 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23820 }
23821
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,strided_cn)23822 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, strided_cn) {
23823 TEST_REQUIRES_X86_FMA3;
23824 GemmMicrokernelTester()
23825 .mr(3)
23826 .nr(16)
23827 .kr(1)
23828 .sr(4)
23829 .m(3)
23830 .n(16)
23831 .k(4)
23832 .cn_stride(19)
23833 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23834 }
23835
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4_subtile)23836 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
23837 TEST_REQUIRES_X86_FMA3;
23838 for (uint32_t n = 1; n <= 16; n++) {
23839 for (uint32_t m = 1; m <= 3; m++) {
23840 GemmMicrokernelTester()
23841 .mr(3)
23842 .nr(16)
23843 .kr(1)
23844 .sr(4)
23845 .m(m)
23846 .n(n)
23847 .k(4)
23848 .iterations(1)
23849 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23850 }
23851 }
23852 }
23853
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)23854 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
23855 TEST_REQUIRES_X86_FMA3;
23856 for (uint32_t m = 1; m <= 3; m++) {
23857 GemmMicrokernelTester()
23858 .mr(3)
23859 .nr(16)
23860 .kr(1)
23861 .sr(4)
23862 .m(m)
23863 .n(16)
23864 .k(4)
23865 .iterations(1)
23866 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23867 }
23868 }
23869
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)23870 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
23871 TEST_REQUIRES_X86_FMA3;
23872 for (uint32_t n = 1; n <= 16; n++) {
23873 GemmMicrokernelTester()
23874 .mr(3)
23875 .nr(16)
23876 .kr(1)
23877 .sr(4)
23878 .m(3)
23879 .n(n)
23880 .k(4)
23881 .iterations(1)
23882 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23883 }
23884 }
23885
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_lt_4)23886 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4) {
23887 TEST_REQUIRES_X86_FMA3;
23888 for (size_t k = 1; k < 4; k++) {
23889 GemmMicrokernelTester()
23890 .mr(3)
23891 .nr(16)
23892 .kr(1)
23893 .sr(4)
23894 .m(3)
23895 .n(16)
23896 .k(k)
23897 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23898 }
23899 }
23900
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_lt_4_subtile)23901 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
23902 TEST_REQUIRES_X86_FMA3;
23903 for (size_t k = 1; k < 4; k++) {
23904 for (uint32_t n = 1; n <= 16; n++) {
23905 for (uint32_t m = 1; m <= 3; m++) {
23906 GemmMicrokernelTester()
23907 .mr(3)
23908 .nr(16)
23909 .kr(1)
23910 .sr(4)
23911 .m(m)
23912 .n(n)
23913 .k(k)
23914 .iterations(1)
23915 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23916 }
23917 }
23918 }
23919 }
23920
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_gt_4)23921 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4) {
23922 TEST_REQUIRES_X86_FMA3;
23923 for (size_t k = 5; k < 8; k++) {
23924 GemmMicrokernelTester()
23925 .mr(3)
23926 .nr(16)
23927 .kr(1)
23928 .sr(4)
23929 .m(3)
23930 .n(16)
23931 .k(k)
23932 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23933 }
23934 }
23935
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_gt_4_subtile)23936 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
23937 TEST_REQUIRES_X86_FMA3;
23938 for (size_t k = 5; k < 8; k++) {
23939 for (uint32_t n = 1; n <= 16; n++) {
23940 for (uint32_t m = 1; m <= 3; m++) {
23941 GemmMicrokernelTester()
23942 .mr(3)
23943 .nr(16)
23944 .kr(1)
23945 .sr(4)
23946 .m(m)
23947 .n(n)
23948 .k(k)
23949 .iterations(1)
23950 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23951 }
23952 }
23953 }
23954 }
23955
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_div_4)23956 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4) {
23957 TEST_REQUIRES_X86_FMA3;
23958 for (size_t k = 8; k <= 40; k += 4) {
23959 GemmMicrokernelTester()
23960 .mr(3)
23961 .nr(16)
23962 .kr(1)
23963 .sr(4)
23964 .m(3)
23965 .n(16)
23966 .k(k)
23967 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23968 }
23969 }
23970
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,k_div_4_subtile)23971 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4_subtile) {
23972 TEST_REQUIRES_X86_FMA3;
23973 for (size_t k = 8; k <= 40; k += 4) {
23974 for (uint32_t n = 1; n <= 16; n++) {
23975 for (uint32_t m = 1; m <= 3; m++) {
23976 GemmMicrokernelTester()
23977 .mr(3)
23978 .nr(16)
23979 .kr(1)
23980 .sr(4)
23981 .m(m)
23982 .n(n)
23983 .k(k)
23984 .iterations(1)
23985 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23986 }
23987 }
23988 }
23989 }
23990
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16)23991 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16) {
23992 TEST_REQUIRES_X86_FMA3;
23993 for (uint32_t n = 17; n < 32; n++) {
23994 for (size_t k = 1; k <= 20; k += 5) {
23995 GemmMicrokernelTester()
23996 .mr(3)
23997 .nr(16)
23998 .kr(1)
23999 .sr(4)
24000 .m(3)
24001 .n(n)
24002 .k(k)
24003 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24004 }
24005 }
24006 }
24007
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)24008 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
24009 TEST_REQUIRES_X86_FMA3;
24010 for (uint32_t n = 17; n < 32; n++) {
24011 for (size_t k = 1; k <= 20; k += 5) {
24012 GemmMicrokernelTester()
24013 .mr(3)
24014 .nr(16)
24015 .kr(1)
24016 .sr(4)
24017 .m(3)
24018 .n(n)
24019 .k(k)
24020 .cn_stride(19)
24021 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24022 }
24023 }
24024 }
24025
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16_subtile)24026 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
24027 TEST_REQUIRES_X86_FMA3;
24028 for (uint32_t n = 17; n < 32; n++) {
24029 for (size_t k = 1; k <= 20; k += 5) {
24030 for (uint32_t m = 1; m <= 3; m++) {
24031 GemmMicrokernelTester()
24032 .mr(3)
24033 .nr(16)
24034 .kr(1)
24035 .sr(4)
24036 .m(m)
24037 .n(n)
24038 .k(k)
24039 .iterations(1)
24040 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24041 }
24042 }
24043 }
24044 }
24045
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16)24046 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16) {
24047 TEST_REQUIRES_X86_FMA3;
24048 for (uint32_t n = 32; n <= 48; n += 16) {
24049 for (size_t k = 1; k <= 20; k += 5) {
24050 GemmMicrokernelTester()
24051 .mr(3)
24052 .nr(16)
24053 .kr(1)
24054 .sr(4)
24055 .m(3)
24056 .n(n)
24057 .k(k)
24058 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24059 }
24060 }
24061 }
24062
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16_strided_cn)24063 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
24064 TEST_REQUIRES_X86_FMA3;
24065 for (uint32_t n = 32; n <= 48; n += 16) {
24066 for (size_t k = 1; k <= 20; k += 5) {
24067 GemmMicrokernelTester()
24068 .mr(3)
24069 .nr(16)
24070 .kr(1)
24071 .sr(4)
24072 .m(3)
24073 .n(n)
24074 .k(k)
24075 .cn_stride(19)
24076 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24077 }
24078 }
24079 }
24080
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16_subtile)24081 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_subtile) {
24082 TEST_REQUIRES_X86_FMA3;
24083 for (uint32_t n = 32; n <= 48; n += 16) {
24084 for (size_t k = 1; k <= 20; k += 5) {
24085 for (uint32_t m = 1; m <= 3; m++) {
24086 GemmMicrokernelTester()
24087 .mr(3)
24088 .nr(16)
24089 .kr(1)
24090 .sr(4)
24091 .m(m)
24092 .n(n)
24093 .k(k)
24094 .iterations(1)
24095 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24096 }
24097 }
24098 }
24099 }
24100
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,small_kernel)24101 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, small_kernel) {
24102 TEST_REQUIRES_X86_FMA3;
24103 for (size_t k = 1; k <= 20; k += 5) {
24104 GemmMicrokernelTester()
24105 .mr(3)
24106 .nr(16)
24107 .kr(1)
24108 .sr(4)
24109 .m(3)
24110 .n(16)
24111 .k(k)
24112 .ks(3)
24113 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24114 }
24115 }
24116
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,small_kernel_subtile)24117 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, small_kernel_subtile) {
24118 TEST_REQUIRES_X86_FMA3;
24119 for (size_t k = 1; k <= 20; k += 5) {
24120 for (uint32_t n = 1; n <= 16; n++) {
24121 for (uint32_t m = 1; m <= 3; m++) {
24122 GemmMicrokernelTester()
24123 .mr(3)
24124 .nr(16)
24125 .kr(1)
24126 .sr(4)
24127 .m(m)
24128 .n(n)
24129 .k(k)
24130 .ks(3)
24131 .iterations(1)
24132 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24133 }
24134 }
24135 }
24136 }
24137
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16_small_kernel)24138 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
24139 TEST_REQUIRES_X86_FMA3;
24140 for (uint32_t n = 17; n < 32; n++) {
24141 for (size_t k = 1; k <= 20; k += 5) {
24142 GemmMicrokernelTester()
24143 .mr(3)
24144 .nr(16)
24145 .kr(1)
24146 .sr(4)
24147 .m(3)
24148 .n(n)
24149 .k(k)
24150 .ks(3)
24151 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24152 }
24153 }
24154 }
24155
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16_small_kernel)24156 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
24157 TEST_REQUIRES_X86_FMA3;
24158 for (uint32_t n = 32; n <= 48; n += 16) {
24159 for (size_t k = 1; k <= 20; k += 5) {
24160 GemmMicrokernelTester()
24161 .mr(3)
24162 .nr(16)
24163 .kr(1)
24164 .sr(4)
24165 .m(3)
24166 .n(n)
24167 .k(k)
24168 .ks(3)
24169 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24170 }
24171 }
24172 }
24173
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,strided_cm_subtile)24174 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm_subtile) {
24175 TEST_REQUIRES_X86_FMA3;
24176 for (size_t k = 1; k <= 20; k += 5) {
24177 for (uint32_t n = 1; n <= 16; n++) {
24178 for (uint32_t m = 1; m <= 3; m++) {
24179 GemmMicrokernelTester()
24180 .mr(3)
24181 .nr(16)
24182 .kr(1)
24183 .sr(4)
24184 .m(m)
24185 .n(n)
24186 .k(k)
24187 .cm_stride(19)
24188 .iterations(1)
24189 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24190 }
24191 }
24192 }
24193 }
24194
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,a_offset)24195 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, a_offset) {
24196 TEST_REQUIRES_X86_FMA3;
24197 for (size_t k = 1; k <= 20; k += 5) {
24198 GemmMicrokernelTester()
24199 .mr(3)
24200 .nr(16)
24201 .kr(1)
24202 .sr(4)
24203 .m(3)
24204 .n(16)
24205 .k(k)
24206 .ks(3)
24207 .a_offset(67)
24208 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24209 }
24210 }
24211
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,zero)24212 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, zero) {
24213 TEST_REQUIRES_X86_FMA3;
24214 for (size_t k = 1; k <= 20; k += 5) {
24215 for (uint32_t mz = 0; mz < 3; mz++) {
24216 GemmMicrokernelTester()
24217 .mr(3)
24218 .nr(16)
24219 .kr(1)
24220 .sr(4)
24221 .m(3)
24222 .n(16)
24223 .k(k)
24224 .ks(3)
24225 .a_offset(67)
24226 .zero_index(mz)
24227 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24228 }
24229 }
24230 }
24231
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,qmin)24232 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, qmin) {
24233 TEST_REQUIRES_X86_FMA3;
24234 GemmMicrokernelTester()
24235 .mr(3)
24236 .nr(16)
24237 .kr(1)
24238 .sr(4)
24239 .m(3)
24240 .n(16)
24241 .k(4)
24242 .qmin(128)
24243 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24244 }
24245
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,qmax)24246 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, qmax) {
24247 TEST_REQUIRES_X86_FMA3;
24248 GemmMicrokernelTester()
24249 .mr(3)
24250 .nr(16)
24251 .kr(1)
24252 .sr(4)
24253 .m(3)
24254 .n(16)
24255 .k(4)
24256 .qmax(128)
24257 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24258 }
24259
TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST,strided_cm)24260 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm) {
24261 TEST_REQUIRES_X86_FMA3;
24262 GemmMicrokernelTester()
24263 .mr(3)
24264 .nr(16)
24265 .kr(1)
24266 .sr(4)
24267 .m(3)
24268 .n(16)
24269 .k(4)
24270 .cm_stride(19)
24271 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24272 }
24273 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24274
24275
24276 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,k_eq_1)24277 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1) {
24278 TEST_REQUIRES_X86_FMA3;
24279 GemmMicrokernelTester()
24280 .mr(4)
24281 .nr(8)
24282 .kr(1)
24283 .sr(1)
24284 .m(4)
24285 .n(8)
24286 .k(1)
24287 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24288 }
24289
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,strided_cn)24290 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cn) {
24291 TEST_REQUIRES_X86_FMA3;
24292 GemmMicrokernelTester()
24293 .mr(4)
24294 .nr(8)
24295 .kr(1)
24296 .sr(1)
24297 .m(4)
24298 .n(8)
24299 .k(1)
24300 .cn_stride(11)
24301 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24302 }
24303
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,k_eq_1_subtile)24304 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile) {
24305 TEST_REQUIRES_X86_FMA3;
24306 for (uint32_t n = 1; n <= 8; n++) {
24307 for (uint32_t m = 1; m <= 4; m++) {
24308 GemmMicrokernelTester()
24309 .mr(4)
24310 .nr(8)
24311 .kr(1)
24312 .sr(1)
24313 .m(m)
24314 .n(n)
24315 .k(1)
24316 .iterations(1)
24317 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24318 }
24319 }
24320 }
24321
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,k_eq_1_subtile_m)24322 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
24323 TEST_REQUIRES_X86_FMA3;
24324 for (uint32_t m = 1; m <= 4; m++) {
24325 GemmMicrokernelTester()
24326 .mr(4)
24327 .nr(8)
24328 .kr(1)
24329 .sr(1)
24330 .m(m)
24331 .n(8)
24332 .k(1)
24333 .iterations(1)
24334 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24335 }
24336 }
24337
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,k_eq_1_subtile_n)24338 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
24339 TEST_REQUIRES_X86_FMA3;
24340 for (uint32_t n = 1; n <= 8; n++) {
24341 GemmMicrokernelTester()
24342 .mr(4)
24343 .nr(8)
24344 .kr(1)
24345 .sr(1)
24346 .m(4)
24347 .n(n)
24348 .k(1)
24349 .iterations(1)
24350 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24351 }
24352 }
24353
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,k_gt_1)24354 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_gt_1) {
24355 TEST_REQUIRES_X86_FMA3;
24356 for (size_t k = 2; k < 10; k++) {
24357 GemmMicrokernelTester()
24358 .mr(4)
24359 .nr(8)
24360 .kr(1)
24361 .sr(1)
24362 .m(4)
24363 .n(8)
24364 .k(k)
24365 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24366 }
24367 }
24368
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,k_gt_1_subtile)24369 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_subtile) {
24370 TEST_REQUIRES_X86_FMA3;
24371 for (size_t k = 2; k < 10; k++) {
24372 for (uint32_t n = 1; n <= 8; n++) {
24373 for (uint32_t m = 1; m <= 4; m++) {
24374 GemmMicrokernelTester()
24375 .mr(4)
24376 .nr(8)
24377 .kr(1)
24378 .sr(1)
24379 .m(m)
24380 .n(n)
24381 .k(k)
24382 .iterations(1)
24383 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24384 }
24385 }
24386 }
24387 }
24388
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_gt_8)24389 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8) {
24390 TEST_REQUIRES_X86_FMA3;
24391 for (uint32_t n = 9; n < 16; n++) {
24392 for (size_t k = 1; k <= 5; k += 2) {
24393 GemmMicrokernelTester()
24394 .mr(4)
24395 .nr(8)
24396 .kr(1)
24397 .sr(1)
24398 .m(4)
24399 .n(n)
24400 .k(k)
24401 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24402 }
24403 }
24404 }
24405
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_gt_8_strided_cn)24406 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
24407 TEST_REQUIRES_X86_FMA3;
24408 for (uint32_t n = 9; n < 16; n++) {
24409 for (size_t k = 1; k <= 5; k += 2) {
24410 GemmMicrokernelTester()
24411 .mr(4)
24412 .nr(8)
24413 .kr(1)
24414 .sr(1)
24415 .m(4)
24416 .n(n)
24417 .k(k)
24418 .cn_stride(11)
24419 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24420 }
24421 }
24422 }
24423
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_gt_8_subtile)24424 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_subtile) {
24425 TEST_REQUIRES_X86_FMA3;
24426 for (uint32_t n = 9; n < 16; n++) {
24427 for (size_t k = 1; k <= 5; k += 2) {
24428 for (uint32_t m = 1; m <= 4; m++) {
24429 GemmMicrokernelTester()
24430 .mr(4)
24431 .nr(8)
24432 .kr(1)
24433 .sr(1)
24434 .m(m)
24435 .n(n)
24436 .k(k)
24437 .iterations(1)
24438 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24439 }
24440 }
24441 }
24442 }
24443
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_div_8)24444 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8) {
24445 TEST_REQUIRES_X86_FMA3;
24446 for (uint32_t n = 16; n <= 24; n += 8) {
24447 for (size_t k = 1; k <= 5; k += 2) {
24448 GemmMicrokernelTester()
24449 .mr(4)
24450 .nr(8)
24451 .kr(1)
24452 .sr(1)
24453 .m(4)
24454 .n(n)
24455 .k(k)
24456 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24457 }
24458 }
24459 }
24460
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_div_8_strided_cn)24461 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_cn) {
24462 TEST_REQUIRES_X86_FMA3;
24463 for (uint32_t n = 16; n <= 24; n += 8) {
24464 for (size_t k = 1; k <= 5; k += 2) {
24465 GemmMicrokernelTester()
24466 .mr(4)
24467 .nr(8)
24468 .kr(1)
24469 .sr(1)
24470 .m(4)
24471 .n(n)
24472 .k(k)
24473 .cn_stride(11)
24474 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24475 }
24476 }
24477 }
24478
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_div_8_subtile)24479 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_subtile) {
24480 TEST_REQUIRES_X86_FMA3;
24481 for (uint32_t n = 16; n <= 24; n += 8) {
24482 for (size_t k = 1; k <= 5; k += 2) {
24483 for (uint32_t m = 1; m <= 4; m++) {
24484 GemmMicrokernelTester()
24485 .mr(4)
24486 .nr(8)
24487 .kr(1)
24488 .sr(1)
24489 .m(m)
24490 .n(n)
24491 .k(k)
24492 .iterations(1)
24493 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24494 }
24495 }
24496 }
24497 }
24498
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,small_kernel)24499 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, small_kernel) {
24500 TEST_REQUIRES_X86_FMA3;
24501 for (size_t k = 1; k <= 5; k += 2) {
24502 GemmMicrokernelTester()
24503 .mr(4)
24504 .nr(8)
24505 .kr(1)
24506 .sr(1)
24507 .m(4)
24508 .n(8)
24509 .k(k)
24510 .ks(3)
24511 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24512 }
24513 }
24514
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,small_kernel_subtile)24515 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, small_kernel_subtile) {
24516 TEST_REQUIRES_X86_FMA3;
24517 for (size_t k = 1; k <= 5; k += 2) {
24518 for (uint32_t n = 1; n <= 8; n++) {
24519 for (uint32_t m = 1; m <= 4; m++) {
24520 GemmMicrokernelTester()
24521 .mr(4)
24522 .nr(8)
24523 .kr(1)
24524 .sr(1)
24525 .m(m)
24526 .n(n)
24527 .k(k)
24528 .ks(3)
24529 .iterations(1)
24530 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24531 }
24532 }
24533 }
24534 }
24535
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_gt_8_small_kernel)24536 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
24537 TEST_REQUIRES_X86_FMA3;
24538 for (uint32_t n = 9; n < 16; n++) {
24539 for (size_t k = 1; k <= 5; k += 2) {
24540 GemmMicrokernelTester()
24541 .mr(4)
24542 .nr(8)
24543 .kr(1)
24544 .sr(1)
24545 .m(4)
24546 .n(n)
24547 .k(k)
24548 .ks(3)
24549 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24550 }
24551 }
24552 }
24553
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,n_div_8_small_kernel)24554 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_small_kernel) {
24555 TEST_REQUIRES_X86_FMA3;
24556 for (uint32_t n = 16; n <= 24; n += 8) {
24557 for (size_t k = 1; k <= 5; k += 2) {
24558 GemmMicrokernelTester()
24559 .mr(4)
24560 .nr(8)
24561 .kr(1)
24562 .sr(1)
24563 .m(4)
24564 .n(n)
24565 .k(k)
24566 .ks(3)
24567 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24568 }
24569 }
24570 }
24571
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,strided_cm_subtile)24572 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cm_subtile) {
24573 TEST_REQUIRES_X86_FMA3;
24574 for (size_t k = 1; k <= 5; k += 2) {
24575 for (uint32_t n = 1; n <= 8; n++) {
24576 for (uint32_t m = 1; m <= 4; m++) {
24577 GemmMicrokernelTester()
24578 .mr(4)
24579 .nr(8)
24580 .kr(1)
24581 .sr(1)
24582 .m(m)
24583 .n(n)
24584 .k(k)
24585 .cm_stride(11)
24586 .iterations(1)
24587 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24588 }
24589 }
24590 }
24591 }
24592
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,a_offset)24593 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, a_offset) {
24594 TEST_REQUIRES_X86_FMA3;
24595 for (size_t k = 1; k <= 5; k += 2) {
24596 GemmMicrokernelTester()
24597 .mr(4)
24598 .nr(8)
24599 .kr(1)
24600 .sr(1)
24601 .m(4)
24602 .n(8)
24603 .k(k)
24604 .ks(3)
24605 .a_offset(23)
24606 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24607 }
24608 }
24609
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,zero)24610 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, zero) {
24611 TEST_REQUIRES_X86_FMA3;
24612 for (size_t k = 1; k <= 5; k += 2) {
24613 for (uint32_t mz = 0; mz < 4; mz++) {
24614 GemmMicrokernelTester()
24615 .mr(4)
24616 .nr(8)
24617 .kr(1)
24618 .sr(1)
24619 .m(4)
24620 .n(8)
24621 .k(k)
24622 .ks(3)
24623 .a_offset(23)
24624 .zero_index(mz)
24625 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24626 }
24627 }
24628 }
24629
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,qmin)24630 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, qmin) {
24631 TEST_REQUIRES_X86_FMA3;
24632 GemmMicrokernelTester()
24633 .mr(4)
24634 .nr(8)
24635 .kr(1)
24636 .sr(1)
24637 .m(4)
24638 .n(8)
24639 .k(1)
24640 .qmin(128)
24641 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24642 }
24643
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,qmax)24644 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, qmax) {
24645 TEST_REQUIRES_X86_FMA3;
24646 GemmMicrokernelTester()
24647 .mr(4)
24648 .nr(8)
24649 .kr(1)
24650 .sr(1)
24651 .m(4)
24652 .n(8)
24653 .k(1)
24654 .qmax(128)
24655 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24656 }
24657
TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST,strided_cm)24658 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cm) {
24659 TEST_REQUIRES_X86_FMA3;
24660 GemmMicrokernelTester()
24661 .mr(4)
24662 .nr(8)
24663 .kr(1)
24664 .sr(1)
24665 .m(4)
24666 .n(8)
24667 .k(1)
24668 .cm_stride(11)
24669 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24670 }
24671 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24672
24673
24674 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4)24675 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4) {
24676 TEST_REQUIRES_X86_FMA3;
24677 GemmMicrokernelTester()
24678 .mr(4)
24679 .nr(16)
24680 .kr(1)
24681 .sr(4)
24682 .m(4)
24683 .n(16)
24684 .k(4)
24685 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24686 }
24687
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,strided_cn)24688 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, strided_cn) {
24689 TEST_REQUIRES_X86_FMA3;
24690 GemmMicrokernelTester()
24691 .mr(4)
24692 .nr(16)
24693 .kr(1)
24694 .sr(4)
24695 .m(4)
24696 .n(16)
24697 .k(4)
24698 .cn_stride(19)
24699 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24700 }
24701
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4_subtile)24702 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
24703 TEST_REQUIRES_X86_FMA3;
24704 for (uint32_t n = 1; n <= 16; n++) {
24705 for (uint32_t m = 1; m <= 4; m++) {
24706 GemmMicrokernelTester()
24707 .mr(4)
24708 .nr(16)
24709 .kr(1)
24710 .sr(4)
24711 .m(m)
24712 .n(n)
24713 .k(4)
24714 .iterations(1)
24715 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24716 }
24717 }
24718 }
24719
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)24720 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
24721 TEST_REQUIRES_X86_FMA3;
24722 for (uint32_t m = 1; m <= 4; m++) {
24723 GemmMicrokernelTester()
24724 .mr(4)
24725 .nr(16)
24726 .kr(1)
24727 .sr(4)
24728 .m(m)
24729 .n(16)
24730 .k(4)
24731 .iterations(1)
24732 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24733 }
24734 }
24735
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)24736 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
24737 TEST_REQUIRES_X86_FMA3;
24738 for (uint32_t n = 1; n <= 16; n++) {
24739 GemmMicrokernelTester()
24740 .mr(4)
24741 .nr(16)
24742 .kr(1)
24743 .sr(4)
24744 .m(4)
24745 .n(n)
24746 .k(4)
24747 .iterations(1)
24748 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24749 }
24750 }
24751
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_lt_4)24752 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4) {
24753 TEST_REQUIRES_X86_FMA3;
24754 for (size_t k = 1; k < 4; k++) {
24755 GemmMicrokernelTester()
24756 .mr(4)
24757 .nr(16)
24758 .kr(1)
24759 .sr(4)
24760 .m(4)
24761 .n(16)
24762 .k(k)
24763 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24764 }
24765 }
24766
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_lt_4_subtile)24767 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
24768 TEST_REQUIRES_X86_FMA3;
24769 for (size_t k = 1; k < 4; k++) {
24770 for (uint32_t n = 1; n <= 16; n++) {
24771 for (uint32_t m = 1; m <= 4; m++) {
24772 GemmMicrokernelTester()
24773 .mr(4)
24774 .nr(16)
24775 .kr(1)
24776 .sr(4)
24777 .m(m)
24778 .n(n)
24779 .k(k)
24780 .iterations(1)
24781 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24782 }
24783 }
24784 }
24785 }
24786
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_gt_4)24787 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4) {
24788 TEST_REQUIRES_X86_FMA3;
24789 for (size_t k = 5; k < 8; k++) {
24790 GemmMicrokernelTester()
24791 .mr(4)
24792 .nr(16)
24793 .kr(1)
24794 .sr(4)
24795 .m(4)
24796 .n(16)
24797 .k(k)
24798 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24799 }
24800 }
24801
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_gt_4_subtile)24802 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
24803 TEST_REQUIRES_X86_FMA3;
24804 for (size_t k = 5; k < 8; k++) {
24805 for (uint32_t n = 1; n <= 16; n++) {
24806 for (uint32_t m = 1; m <= 4; m++) {
24807 GemmMicrokernelTester()
24808 .mr(4)
24809 .nr(16)
24810 .kr(1)
24811 .sr(4)
24812 .m(m)
24813 .n(n)
24814 .k(k)
24815 .iterations(1)
24816 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24817 }
24818 }
24819 }
24820 }
24821
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_div_4)24822 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4) {
24823 TEST_REQUIRES_X86_FMA3;
24824 for (size_t k = 8; k <= 40; k += 4) {
24825 GemmMicrokernelTester()
24826 .mr(4)
24827 .nr(16)
24828 .kr(1)
24829 .sr(4)
24830 .m(4)
24831 .n(16)
24832 .k(k)
24833 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24834 }
24835 }
24836
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,k_div_4_subtile)24837 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4_subtile) {
24838 TEST_REQUIRES_X86_FMA3;
24839 for (size_t k = 8; k <= 40; k += 4) {
24840 for (uint32_t n = 1; n <= 16; n++) {
24841 for (uint32_t m = 1; m <= 4; m++) {
24842 GemmMicrokernelTester()
24843 .mr(4)
24844 .nr(16)
24845 .kr(1)
24846 .sr(4)
24847 .m(m)
24848 .n(n)
24849 .k(k)
24850 .iterations(1)
24851 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24852 }
24853 }
24854 }
24855 }
24856
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16)24857 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16) {
24858 TEST_REQUIRES_X86_FMA3;
24859 for (uint32_t n = 17; n < 32; n++) {
24860 for (size_t k = 1; k <= 20; k += 5) {
24861 GemmMicrokernelTester()
24862 .mr(4)
24863 .nr(16)
24864 .kr(1)
24865 .sr(4)
24866 .m(4)
24867 .n(n)
24868 .k(k)
24869 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24870 }
24871 }
24872 }
24873
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)24874 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
24875 TEST_REQUIRES_X86_FMA3;
24876 for (uint32_t n = 17; n < 32; n++) {
24877 for (size_t k = 1; k <= 20; k += 5) {
24878 GemmMicrokernelTester()
24879 .mr(4)
24880 .nr(16)
24881 .kr(1)
24882 .sr(4)
24883 .m(4)
24884 .n(n)
24885 .k(k)
24886 .cn_stride(19)
24887 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24888 }
24889 }
24890 }
24891
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16_subtile)24892 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
24893 TEST_REQUIRES_X86_FMA3;
24894 for (uint32_t n = 17; n < 32; n++) {
24895 for (size_t k = 1; k <= 20; k += 5) {
24896 for (uint32_t m = 1; m <= 4; m++) {
24897 GemmMicrokernelTester()
24898 .mr(4)
24899 .nr(16)
24900 .kr(1)
24901 .sr(4)
24902 .m(m)
24903 .n(n)
24904 .k(k)
24905 .iterations(1)
24906 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24907 }
24908 }
24909 }
24910 }
24911
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16)24912 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16) {
24913 TEST_REQUIRES_X86_FMA3;
24914 for (uint32_t n = 32; n <= 48; n += 16) {
24915 for (size_t k = 1; k <= 20; k += 5) {
24916 GemmMicrokernelTester()
24917 .mr(4)
24918 .nr(16)
24919 .kr(1)
24920 .sr(4)
24921 .m(4)
24922 .n(n)
24923 .k(k)
24924 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24925 }
24926 }
24927 }
24928
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16_strided_cn)24929 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
24930 TEST_REQUIRES_X86_FMA3;
24931 for (uint32_t n = 32; n <= 48; n += 16) {
24932 for (size_t k = 1; k <= 20; k += 5) {
24933 GemmMicrokernelTester()
24934 .mr(4)
24935 .nr(16)
24936 .kr(1)
24937 .sr(4)
24938 .m(4)
24939 .n(n)
24940 .k(k)
24941 .cn_stride(19)
24942 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24943 }
24944 }
24945 }
24946
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16_subtile)24947 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_subtile) {
24948 TEST_REQUIRES_X86_FMA3;
24949 for (uint32_t n = 32; n <= 48; n += 16) {
24950 for (size_t k = 1; k <= 20; k += 5) {
24951 for (uint32_t m = 1; m <= 4; m++) {
24952 GemmMicrokernelTester()
24953 .mr(4)
24954 .nr(16)
24955 .kr(1)
24956 .sr(4)
24957 .m(m)
24958 .n(n)
24959 .k(k)
24960 .iterations(1)
24961 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24962 }
24963 }
24964 }
24965 }
24966
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,small_kernel)24967 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, small_kernel) {
24968 TEST_REQUIRES_X86_FMA3;
24969 for (size_t k = 1; k <= 20; k += 5) {
24970 GemmMicrokernelTester()
24971 .mr(4)
24972 .nr(16)
24973 .kr(1)
24974 .sr(4)
24975 .m(4)
24976 .n(16)
24977 .k(k)
24978 .ks(3)
24979 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24980 }
24981 }
24982
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,small_kernel_subtile)24983 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, small_kernel_subtile) {
24984 TEST_REQUIRES_X86_FMA3;
24985 for (size_t k = 1; k <= 20; k += 5) {
24986 for (uint32_t n = 1; n <= 16; n++) {
24987 for (uint32_t m = 1; m <= 4; m++) {
24988 GemmMicrokernelTester()
24989 .mr(4)
24990 .nr(16)
24991 .kr(1)
24992 .sr(4)
24993 .m(m)
24994 .n(n)
24995 .k(k)
24996 .ks(3)
24997 .iterations(1)
24998 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
24999 }
25000 }
25001 }
25002 }
25003
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16_small_kernel)25004 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
25005 TEST_REQUIRES_X86_FMA3;
25006 for (uint32_t n = 17; n < 32; n++) {
25007 for (size_t k = 1; k <= 20; k += 5) {
25008 GemmMicrokernelTester()
25009 .mr(4)
25010 .nr(16)
25011 .kr(1)
25012 .sr(4)
25013 .m(4)
25014 .n(n)
25015 .k(k)
25016 .ks(3)
25017 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25018 }
25019 }
25020 }
25021
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16_small_kernel)25022 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
25023 TEST_REQUIRES_X86_FMA3;
25024 for (uint32_t n = 32; n <= 48; n += 16) {
25025 for (size_t k = 1; k <= 20; k += 5) {
25026 GemmMicrokernelTester()
25027 .mr(4)
25028 .nr(16)
25029 .kr(1)
25030 .sr(4)
25031 .m(4)
25032 .n(n)
25033 .k(k)
25034 .ks(3)
25035 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25036 }
25037 }
25038 }
25039
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,strided_cm_subtile)25040 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm_subtile) {
25041 TEST_REQUIRES_X86_FMA3;
25042 for (size_t k = 1; k <= 20; k += 5) {
25043 for (uint32_t n = 1; n <= 16; n++) {
25044 for (uint32_t m = 1; m <= 4; m++) {
25045 GemmMicrokernelTester()
25046 .mr(4)
25047 .nr(16)
25048 .kr(1)
25049 .sr(4)
25050 .m(m)
25051 .n(n)
25052 .k(k)
25053 .cm_stride(19)
25054 .iterations(1)
25055 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25056 }
25057 }
25058 }
25059 }
25060
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,a_offset)25061 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, a_offset) {
25062 TEST_REQUIRES_X86_FMA3;
25063 for (size_t k = 1; k <= 20; k += 5) {
25064 GemmMicrokernelTester()
25065 .mr(4)
25066 .nr(16)
25067 .kr(1)
25068 .sr(4)
25069 .m(4)
25070 .n(16)
25071 .k(k)
25072 .ks(3)
25073 .a_offset(83)
25074 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25075 }
25076 }
25077
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,zero)25078 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, zero) {
25079 TEST_REQUIRES_X86_FMA3;
25080 for (size_t k = 1; k <= 20; k += 5) {
25081 for (uint32_t mz = 0; mz < 4; mz++) {
25082 GemmMicrokernelTester()
25083 .mr(4)
25084 .nr(16)
25085 .kr(1)
25086 .sr(4)
25087 .m(4)
25088 .n(16)
25089 .k(k)
25090 .ks(3)
25091 .a_offset(83)
25092 .zero_index(mz)
25093 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25094 }
25095 }
25096 }
25097
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,qmin)25098 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, qmin) {
25099 TEST_REQUIRES_X86_FMA3;
25100 GemmMicrokernelTester()
25101 .mr(4)
25102 .nr(16)
25103 .kr(1)
25104 .sr(4)
25105 .m(4)
25106 .n(16)
25107 .k(4)
25108 .qmin(128)
25109 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25110 }
25111
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,qmax)25112 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, qmax) {
25113 TEST_REQUIRES_X86_FMA3;
25114 GemmMicrokernelTester()
25115 .mr(4)
25116 .nr(16)
25117 .kr(1)
25118 .sr(4)
25119 .m(4)
25120 .n(16)
25121 .k(4)
25122 .qmax(128)
25123 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25124 }
25125
TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST,strided_cm)25126 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm) {
25127 TEST_REQUIRES_X86_FMA3;
25128 GemmMicrokernelTester()
25129 .mr(4)
25130 .nr(16)
25131 .kr(1)
25132 .sr(4)
25133 .m(4)
25134 .n(16)
25135 .k(4)
25136 .cm_stride(19)
25137 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25138 }
25139 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25140
25141
25142 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,k_eq_1)25143 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1) {
25144 TEST_REQUIRES_X86_FMA3;
25145 GemmMicrokernelTester()
25146 .mr(5)
25147 .nr(8)
25148 .kr(1)
25149 .sr(1)
25150 .m(5)
25151 .n(8)
25152 .k(1)
25153 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25154 }
25155
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,strided_cn)25156 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cn) {
25157 TEST_REQUIRES_X86_FMA3;
25158 GemmMicrokernelTester()
25159 .mr(5)
25160 .nr(8)
25161 .kr(1)
25162 .sr(1)
25163 .m(5)
25164 .n(8)
25165 .k(1)
25166 .cn_stride(11)
25167 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25168 }
25169
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,k_eq_1_subtile)25170 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile) {
25171 TEST_REQUIRES_X86_FMA3;
25172 for (uint32_t n = 1; n <= 8; n++) {
25173 for (uint32_t m = 1; m <= 5; m++) {
25174 GemmMicrokernelTester()
25175 .mr(5)
25176 .nr(8)
25177 .kr(1)
25178 .sr(1)
25179 .m(m)
25180 .n(n)
25181 .k(1)
25182 .iterations(1)
25183 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25184 }
25185 }
25186 }
25187
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,k_eq_1_subtile_m)25188 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
25189 TEST_REQUIRES_X86_FMA3;
25190 for (uint32_t m = 1; m <= 5; m++) {
25191 GemmMicrokernelTester()
25192 .mr(5)
25193 .nr(8)
25194 .kr(1)
25195 .sr(1)
25196 .m(m)
25197 .n(8)
25198 .k(1)
25199 .iterations(1)
25200 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25201 }
25202 }
25203
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,k_eq_1_subtile_n)25204 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
25205 TEST_REQUIRES_X86_FMA3;
25206 for (uint32_t n = 1; n <= 8; n++) {
25207 GemmMicrokernelTester()
25208 .mr(5)
25209 .nr(8)
25210 .kr(1)
25211 .sr(1)
25212 .m(5)
25213 .n(n)
25214 .k(1)
25215 .iterations(1)
25216 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25217 }
25218 }
25219
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,k_gt_1)25220 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_gt_1) {
25221 TEST_REQUIRES_X86_FMA3;
25222 for (size_t k = 2; k < 10; k++) {
25223 GemmMicrokernelTester()
25224 .mr(5)
25225 .nr(8)
25226 .kr(1)
25227 .sr(1)
25228 .m(5)
25229 .n(8)
25230 .k(k)
25231 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25232 }
25233 }
25234
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,k_gt_1_subtile)25235 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_subtile) {
25236 TEST_REQUIRES_X86_FMA3;
25237 for (size_t k = 2; k < 10; k++) {
25238 for (uint32_t n = 1; n <= 8; n++) {
25239 for (uint32_t m = 1; m <= 5; m++) {
25240 GemmMicrokernelTester()
25241 .mr(5)
25242 .nr(8)
25243 .kr(1)
25244 .sr(1)
25245 .m(m)
25246 .n(n)
25247 .k(k)
25248 .iterations(1)
25249 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25250 }
25251 }
25252 }
25253 }
25254
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_gt_8)25255 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8) {
25256 TEST_REQUIRES_X86_FMA3;
25257 for (uint32_t n = 9; n < 16; n++) {
25258 for (size_t k = 1; k <= 5; k += 2) {
25259 GemmMicrokernelTester()
25260 .mr(5)
25261 .nr(8)
25262 .kr(1)
25263 .sr(1)
25264 .m(5)
25265 .n(n)
25266 .k(k)
25267 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25268 }
25269 }
25270 }
25271
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_gt_8_strided_cn)25272 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
25273 TEST_REQUIRES_X86_FMA3;
25274 for (uint32_t n = 9; n < 16; n++) {
25275 for (size_t k = 1; k <= 5; k += 2) {
25276 GemmMicrokernelTester()
25277 .mr(5)
25278 .nr(8)
25279 .kr(1)
25280 .sr(1)
25281 .m(5)
25282 .n(n)
25283 .k(k)
25284 .cn_stride(11)
25285 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25286 }
25287 }
25288 }
25289
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_gt_8_subtile)25290 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_subtile) {
25291 TEST_REQUIRES_X86_FMA3;
25292 for (uint32_t n = 9; n < 16; n++) {
25293 for (size_t k = 1; k <= 5; k += 2) {
25294 for (uint32_t m = 1; m <= 5; m++) {
25295 GemmMicrokernelTester()
25296 .mr(5)
25297 .nr(8)
25298 .kr(1)
25299 .sr(1)
25300 .m(m)
25301 .n(n)
25302 .k(k)
25303 .iterations(1)
25304 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25305 }
25306 }
25307 }
25308 }
25309
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_div_8)25310 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8) {
25311 TEST_REQUIRES_X86_FMA3;
25312 for (uint32_t n = 16; n <= 24; n += 8) {
25313 for (size_t k = 1; k <= 5; k += 2) {
25314 GemmMicrokernelTester()
25315 .mr(5)
25316 .nr(8)
25317 .kr(1)
25318 .sr(1)
25319 .m(5)
25320 .n(n)
25321 .k(k)
25322 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25323 }
25324 }
25325 }
25326
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_div_8_strided_cn)25327 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_cn) {
25328 TEST_REQUIRES_X86_FMA3;
25329 for (uint32_t n = 16; n <= 24; n += 8) {
25330 for (size_t k = 1; k <= 5; k += 2) {
25331 GemmMicrokernelTester()
25332 .mr(5)
25333 .nr(8)
25334 .kr(1)
25335 .sr(1)
25336 .m(5)
25337 .n(n)
25338 .k(k)
25339 .cn_stride(11)
25340 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25341 }
25342 }
25343 }
25344
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_div_8_subtile)25345 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_subtile) {
25346 TEST_REQUIRES_X86_FMA3;
25347 for (uint32_t n = 16; n <= 24; n += 8) {
25348 for (size_t k = 1; k <= 5; k += 2) {
25349 for (uint32_t m = 1; m <= 5; m++) {
25350 GemmMicrokernelTester()
25351 .mr(5)
25352 .nr(8)
25353 .kr(1)
25354 .sr(1)
25355 .m(m)
25356 .n(n)
25357 .k(k)
25358 .iterations(1)
25359 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25360 }
25361 }
25362 }
25363 }
25364
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,small_kernel)25365 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, small_kernel) {
25366 TEST_REQUIRES_X86_FMA3;
25367 for (size_t k = 1; k <= 5; k += 2) {
25368 GemmMicrokernelTester()
25369 .mr(5)
25370 .nr(8)
25371 .kr(1)
25372 .sr(1)
25373 .m(5)
25374 .n(8)
25375 .k(k)
25376 .ks(3)
25377 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25378 }
25379 }
25380
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,small_kernel_subtile)25381 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, small_kernel_subtile) {
25382 TEST_REQUIRES_X86_FMA3;
25383 for (size_t k = 1; k <= 5; k += 2) {
25384 for (uint32_t n = 1; n <= 8; n++) {
25385 for (uint32_t m = 1; m <= 5; m++) {
25386 GemmMicrokernelTester()
25387 .mr(5)
25388 .nr(8)
25389 .kr(1)
25390 .sr(1)
25391 .m(m)
25392 .n(n)
25393 .k(k)
25394 .ks(3)
25395 .iterations(1)
25396 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25397 }
25398 }
25399 }
25400 }
25401
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_gt_8_small_kernel)25402 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
25403 TEST_REQUIRES_X86_FMA3;
25404 for (uint32_t n = 9; n < 16; n++) {
25405 for (size_t k = 1; k <= 5; k += 2) {
25406 GemmMicrokernelTester()
25407 .mr(5)
25408 .nr(8)
25409 .kr(1)
25410 .sr(1)
25411 .m(5)
25412 .n(n)
25413 .k(k)
25414 .ks(3)
25415 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25416 }
25417 }
25418 }
25419
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,n_div_8_small_kernel)25420 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_small_kernel) {
25421 TEST_REQUIRES_X86_FMA3;
25422 for (uint32_t n = 16; n <= 24; n += 8) {
25423 for (size_t k = 1; k <= 5; k += 2) {
25424 GemmMicrokernelTester()
25425 .mr(5)
25426 .nr(8)
25427 .kr(1)
25428 .sr(1)
25429 .m(5)
25430 .n(n)
25431 .k(k)
25432 .ks(3)
25433 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25434 }
25435 }
25436 }
25437
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,strided_cm_subtile)25438 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cm_subtile) {
25439 TEST_REQUIRES_X86_FMA3;
25440 for (size_t k = 1; k <= 5; k += 2) {
25441 for (uint32_t n = 1; n <= 8; n++) {
25442 for (uint32_t m = 1; m <= 5; m++) {
25443 GemmMicrokernelTester()
25444 .mr(5)
25445 .nr(8)
25446 .kr(1)
25447 .sr(1)
25448 .m(m)
25449 .n(n)
25450 .k(k)
25451 .cm_stride(11)
25452 .iterations(1)
25453 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25454 }
25455 }
25456 }
25457 }
25458
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,a_offset)25459 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, a_offset) {
25460 TEST_REQUIRES_X86_FMA3;
25461 for (size_t k = 1; k <= 5; k += 2) {
25462 GemmMicrokernelTester()
25463 .mr(5)
25464 .nr(8)
25465 .kr(1)
25466 .sr(1)
25467 .m(5)
25468 .n(8)
25469 .k(k)
25470 .ks(3)
25471 .a_offset(29)
25472 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25473 }
25474 }
25475
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,zero)25476 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, zero) {
25477 TEST_REQUIRES_X86_FMA3;
25478 for (size_t k = 1; k <= 5; k += 2) {
25479 for (uint32_t mz = 0; mz < 5; mz++) {
25480 GemmMicrokernelTester()
25481 .mr(5)
25482 .nr(8)
25483 .kr(1)
25484 .sr(1)
25485 .m(5)
25486 .n(8)
25487 .k(k)
25488 .ks(3)
25489 .a_offset(29)
25490 .zero_index(mz)
25491 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25492 }
25493 }
25494 }
25495
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,qmin)25496 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, qmin) {
25497 TEST_REQUIRES_X86_FMA3;
25498 GemmMicrokernelTester()
25499 .mr(5)
25500 .nr(8)
25501 .kr(1)
25502 .sr(1)
25503 .m(5)
25504 .n(8)
25505 .k(1)
25506 .qmin(128)
25507 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25508 }
25509
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,qmax)25510 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, qmax) {
25511 TEST_REQUIRES_X86_FMA3;
25512 GemmMicrokernelTester()
25513 .mr(5)
25514 .nr(8)
25515 .kr(1)
25516 .sr(1)
25517 .m(5)
25518 .n(8)
25519 .k(1)
25520 .qmax(128)
25521 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25522 }
25523
TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST,strided_cm)25524 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cm) {
25525 TEST_REQUIRES_X86_FMA3;
25526 GemmMicrokernelTester()
25527 .mr(5)
25528 .nr(8)
25529 .kr(1)
25530 .sr(1)
25531 .m(5)
25532 .n(8)
25533 .k(1)
25534 .cm_stride(11)
25535 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25536 }
25537 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25538
25539
25540 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4)25541 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4) {
25542 TEST_REQUIRES_X86_FMA3;
25543 GemmMicrokernelTester()
25544 .mr(5)
25545 .nr(16)
25546 .kr(1)
25547 .sr(4)
25548 .m(5)
25549 .n(16)
25550 .k(4)
25551 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25552 }
25553
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,strided_cn)25554 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, strided_cn) {
25555 TEST_REQUIRES_X86_FMA3;
25556 GemmMicrokernelTester()
25557 .mr(5)
25558 .nr(16)
25559 .kr(1)
25560 .sr(4)
25561 .m(5)
25562 .n(16)
25563 .k(4)
25564 .cn_stride(19)
25565 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25566 }
25567
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4_subtile)25568 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
25569 TEST_REQUIRES_X86_FMA3;
25570 for (uint32_t n = 1; n <= 16; n++) {
25571 for (uint32_t m = 1; m <= 5; m++) {
25572 GemmMicrokernelTester()
25573 .mr(5)
25574 .nr(16)
25575 .kr(1)
25576 .sr(4)
25577 .m(m)
25578 .n(n)
25579 .k(4)
25580 .iterations(1)
25581 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25582 }
25583 }
25584 }
25585
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)25586 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
25587 TEST_REQUIRES_X86_FMA3;
25588 for (uint32_t m = 1; m <= 5; m++) {
25589 GemmMicrokernelTester()
25590 .mr(5)
25591 .nr(16)
25592 .kr(1)
25593 .sr(4)
25594 .m(m)
25595 .n(16)
25596 .k(4)
25597 .iterations(1)
25598 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25599 }
25600 }
25601
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)25602 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
25603 TEST_REQUIRES_X86_FMA3;
25604 for (uint32_t n = 1; n <= 16; n++) {
25605 GemmMicrokernelTester()
25606 .mr(5)
25607 .nr(16)
25608 .kr(1)
25609 .sr(4)
25610 .m(5)
25611 .n(n)
25612 .k(4)
25613 .iterations(1)
25614 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25615 }
25616 }
25617
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_lt_4)25618 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4) {
25619 TEST_REQUIRES_X86_FMA3;
25620 for (size_t k = 1; k < 4; k++) {
25621 GemmMicrokernelTester()
25622 .mr(5)
25623 .nr(16)
25624 .kr(1)
25625 .sr(4)
25626 .m(5)
25627 .n(16)
25628 .k(k)
25629 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25630 }
25631 }
25632
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_lt_4_subtile)25633 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
25634 TEST_REQUIRES_X86_FMA3;
25635 for (size_t k = 1; k < 4; k++) {
25636 for (uint32_t n = 1; n <= 16; n++) {
25637 for (uint32_t m = 1; m <= 5; m++) {
25638 GemmMicrokernelTester()
25639 .mr(5)
25640 .nr(16)
25641 .kr(1)
25642 .sr(4)
25643 .m(m)
25644 .n(n)
25645 .k(k)
25646 .iterations(1)
25647 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25648 }
25649 }
25650 }
25651 }
25652
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_gt_4)25653 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4) {
25654 TEST_REQUIRES_X86_FMA3;
25655 for (size_t k = 5; k < 8; k++) {
25656 GemmMicrokernelTester()
25657 .mr(5)
25658 .nr(16)
25659 .kr(1)
25660 .sr(4)
25661 .m(5)
25662 .n(16)
25663 .k(k)
25664 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25665 }
25666 }
25667
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_gt_4_subtile)25668 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
25669 TEST_REQUIRES_X86_FMA3;
25670 for (size_t k = 5; k < 8; k++) {
25671 for (uint32_t n = 1; n <= 16; n++) {
25672 for (uint32_t m = 1; m <= 5; m++) {
25673 GemmMicrokernelTester()
25674 .mr(5)
25675 .nr(16)
25676 .kr(1)
25677 .sr(4)
25678 .m(m)
25679 .n(n)
25680 .k(k)
25681 .iterations(1)
25682 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25683 }
25684 }
25685 }
25686 }
25687
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_div_4)25688 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4) {
25689 TEST_REQUIRES_X86_FMA3;
25690 for (size_t k = 8; k <= 40; k += 4) {
25691 GemmMicrokernelTester()
25692 .mr(5)
25693 .nr(16)
25694 .kr(1)
25695 .sr(4)
25696 .m(5)
25697 .n(16)
25698 .k(k)
25699 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25700 }
25701 }
25702
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,k_div_4_subtile)25703 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4_subtile) {
25704 TEST_REQUIRES_X86_FMA3;
25705 for (size_t k = 8; k <= 40; k += 4) {
25706 for (uint32_t n = 1; n <= 16; n++) {
25707 for (uint32_t m = 1; m <= 5; m++) {
25708 GemmMicrokernelTester()
25709 .mr(5)
25710 .nr(16)
25711 .kr(1)
25712 .sr(4)
25713 .m(m)
25714 .n(n)
25715 .k(k)
25716 .iterations(1)
25717 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25718 }
25719 }
25720 }
25721 }
25722
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16)25723 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16) {
25724 TEST_REQUIRES_X86_FMA3;
25725 for (uint32_t n = 17; n < 32; n++) {
25726 for (size_t k = 1; k <= 20; k += 5) {
25727 GemmMicrokernelTester()
25728 .mr(5)
25729 .nr(16)
25730 .kr(1)
25731 .sr(4)
25732 .m(5)
25733 .n(n)
25734 .k(k)
25735 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25736 }
25737 }
25738 }
25739
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)25740 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
25741 TEST_REQUIRES_X86_FMA3;
25742 for (uint32_t n = 17; n < 32; n++) {
25743 for (size_t k = 1; k <= 20; k += 5) {
25744 GemmMicrokernelTester()
25745 .mr(5)
25746 .nr(16)
25747 .kr(1)
25748 .sr(4)
25749 .m(5)
25750 .n(n)
25751 .k(k)
25752 .cn_stride(19)
25753 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25754 }
25755 }
25756 }
25757
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16_subtile)25758 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
25759 TEST_REQUIRES_X86_FMA3;
25760 for (uint32_t n = 17; n < 32; n++) {
25761 for (size_t k = 1; k <= 20; k += 5) {
25762 for (uint32_t m = 1; m <= 5; m++) {
25763 GemmMicrokernelTester()
25764 .mr(5)
25765 .nr(16)
25766 .kr(1)
25767 .sr(4)
25768 .m(m)
25769 .n(n)
25770 .k(k)
25771 .iterations(1)
25772 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25773 }
25774 }
25775 }
25776 }
25777
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16)25778 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16) {
25779 TEST_REQUIRES_X86_FMA3;
25780 for (uint32_t n = 32; n <= 48; n += 16) {
25781 for (size_t k = 1; k <= 20; k += 5) {
25782 GemmMicrokernelTester()
25783 .mr(5)
25784 .nr(16)
25785 .kr(1)
25786 .sr(4)
25787 .m(5)
25788 .n(n)
25789 .k(k)
25790 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25791 }
25792 }
25793 }
25794
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16_strided_cn)25795 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
25796 TEST_REQUIRES_X86_FMA3;
25797 for (uint32_t n = 32; n <= 48; n += 16) {
25798 for (size_t k = 1; k <= 20; k += 5) {
25799 GemmMicrokernelTester()
25800 .mr(5)
25801 .nr(16)
25802 .kr(1)
25803 .sr(4)
25804 .m(5)
25805 .n(n)
25806 .k(k)
25807 .cn_stride(19)
25808 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25809 }
25810 }
25811 }
25812
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16_subtile)25813 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_subtile) {
25814 TEST_REQUIRES_X86_FMA3;
25815 for (uint32_t n = 32; n <= 48; n += 16) {
25816 for (size_t k = 1; k <= 20; k += 5) {
25817 for (uint32_t m = 1; m <= 5; m++) {
25818 GemmMicrokernelTester()
25819 .mr(5)
25820 .nr(16)
25821 .kr(1)
25822 .sr(4)
25823 .m(m)
25824 .n(n)
25825 .k(k)
25826 .iterations(1)
25827 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25828 }
25829 }
25830 }
25831 }
25832
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,small_kernel)25833 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, small_kernel) {
25834 TEST_REQUIRES_X86_FMA3;
25835 for (size_t k = 1; k <= 20; k += 5) {
25836 GemmMicrokernelTester()
25837 .mr(5)
25838 .nr(16)
25839 .kr(1)
25840 .sr(4)
25841 .m(5)
25842 .n(16)
25843 .k(k)
25844 .ks(3)
25845 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25846 }
25847 }
25848
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,small_kernel_subtile)25849 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, small_kernel_subtile) {
25850 TEST_REQUIRES_X86_FMA3;
25851 for (size_t k = 1; k <= 20; k += 5) {
25852 for (uint32_t n = 1; n <= 16; n++) {
25853 for (uint32_t m = 1; m <= 5; m++) {
25854 GemmMicrokernelTester()
25855 .mr(5)
25856 .nr(16)
25857 .kr(1)
25858 .sr(4)
25859 .m(m)
25860 .n(n)
25861 .k(k)
25862 .ks(3)
25863 .iterations(1)
25864 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25865 }
25866 }
25867 }
25868 }
25869
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16_small_kernel)25870 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
25871 TEST_REQUIRES_X86_FMA3;
25872 for (uint32_t n = 17; n < 32; n++) {
25873 for (size_t k = 1; k <= 20; k += 5) {
25874 GemmMicrokernelTester()
25875 .mr(5)
25876 .nr(16)
25877 .kr(1)
25878 .sr(4)
25879 .m(5)
25880 .n(n)
25881 .k(k)
25882 .ks(3)
25883 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25884 }
25885 }
25886 }
25887
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16_small_kernel)25888 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
25889 TEST_REQUIRES_X86_FMA3;
25890 for (uint32_t n = 32; n <= 48; n += 16) {
25891 for (size_t k = 1; k <= 20; k += 5) {
25892 GemmMicrokernelTester()
25893 .mr(5)
25894 .nr(16)
25895 .kr(1)
25896 .sr(4)
25897 .m(5)
25898 .n(n)
25899 .k(k)
25900 .ks(3)
25901 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25902 }
25903 }
25904 }
25905
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,strided_cm_subtile)25906 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm_subtile) {
25907 TEST_REQUIRES_X86_FMA3;
25908 for (size_t k = 1; k <= 20; k += 5) {
25909 for (uint32_t n = 1; n <= 16; n++) {
25910 for (uint32_t m = 1; m <= 5; m++) {
25911 GemmMicrokernelTester()
25912 .mr(5)
25913 .nr(16)
25914 .kr(1)
25915 .sr(4)
25916 .m(m)
25917 .n(n)
25918 .k(k)
25919 .cm_stride(19)
25920 .iterations(1)
25921 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25922 }
25923 }
25924 }
25925 }
25926
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,a_offset)25927 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, a_offset) {
25928 TEST_REQUIRES_X86_FMA3;
25929 for (size_t k = 1; k <= 20; k += 5) {
25930 GemmMicrokernelTester()
25931 .mr(5)
25932 .nr(16)
25933 .kr(1)
25934 .sr(4)
25935 .m(5)
25936 .n(16)
25937 .k(k)
25938 .ks(3)
25939 .a_offset(103)
25940 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25941 }
25942 }
25943
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,zero)25944 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, zero) {
25945 TEST_REQUIRES_X86_FMA3;
25946 for (size_t k = 1; k <= 20; k += 5) {
25947 for (uint32_t mz = 0; mz < 5; mz++) {
25948 GemmMicrokernelTester()
25949 .mr(5)
25950 .nr(16)
25951 .kr(1)
25952 .sr(4)
25953 .m(5)
25954 .n(16)
25955 .k(k)
25956 .ks(3)
25957 .a_offset(103)
25958 .zero_index(mz)
25959 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25960 }
25961 }
25962 }
25963
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,qmin)25964 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, qmin) {
25965 TEST_REQUIRES_X86_FMA3;
25966 GemmMicrokernelTester()
25967 .mr(5)
25968 .nr(16)
25969 .kr(1)
25970 .sr(4)
25971 .m(5)
25972 .n(16)
25973 .k(4)
25974 .qmin(128)
25975 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25976 }
25977
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,qmax)25978 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, qmax) {
25979 TEST_REQUIRES_X86_FMA3;
25980 GemmMicrokernelTester()
25981 .mr(5)
25982 .nr(16)
25983 .kr(1)
25984 .sr(4)
25985 .m(5)
25986 .n(16)
25987 .k(4)
25988 .qmax(128)
25989 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
25990 }
25991
TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST,strided_cm)25992 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm) {
25993 TEST_REQUIRES_X86_FMA3;
25994 GemmMicrokernelTester()
25995 .mr(5)
25996 .nr(16)
25997 .kr(1)
25998 .sr(4)
25999 .m(5)
26000 .n(16)
26001 .k(4)
26002 .cm_stride(19)
26003 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
26004 }
26005 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26006
26007
26008 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1)26009 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1) {
26010 TEST_REQUIRES_X86_AVX512F;
26011 GemmMicrokernelTester()
26012 .mr(1)
26013 .nr(16)
26014 .kr(1)
26015 .sr(1)
26016 .m(1)
26017 .n(16)
26018 .k(1)
26019 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26020 }
26021
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,strided_cn)26022 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cn) {
26023 TEST_REQUIRES_X86_AVX512F;
26024 GemmMicrokernelTester()
26025 .mr(1)
26026 .nr(16)
26027 .kr(1)
26028 .sr(1)
26029 .m(1)
26030 .n(16)
26031 .k(1)
26032 .cn_stride(19)
26033 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26034 }
26035
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1_subtile)26036 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile) {
26037 TEST_REQUIRES_X86_AVX512F;
26038 for (uint32_t n = 1; n <= 16; n++) {
26039 for (uint32_t m = 1; m <= 1; m++) {
26040 GemmMicrokernelTester()
26041 .mr(1)
26042 .nr(16)
26043 .kr(1)
26044 .sr(1)
26045 .m(m)
26046 .n(n)
26047 .k(1)
26048 .iterations(1)
26049 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26050 }
26051 }
26052 }
26053
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1_subtile_m)26054 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
26055 TEST_REQUIRES_X86_AVX512F;
26056 for (uint32_t m = 1; m <= 1; m++) {
26057 GemmMicrokernelTester()
26058 .mr(1)
26059 .nr(16)
26060 .kr(1)
26061 .sr(1)
26062 .m(m)
26063 .n(16)
26064 .k(1)
26065 .iterations(1)
26066 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26067 }
26068 }
26069
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1_subtile_n)26070 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
26071 TEST_REQUIRES_X86_AVX512F;
26072 for (uint32_t n = 1; n <= 16; n++) {
26073 GemmMicrokernelTester()
26074 .mr(1)
26075 .nr(16)
26076 .kr(1)
26077 .sr(1)
26078 .m(1)
26079 .n(n)
26080 .k(1)
26081 .iterations(1)
26082 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26083 }
26084 }
26085
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,k_gt_1)26086 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1) {
26087 TEST_REQUIRES_X86_AVX512F;
26088 for (size_t k = 2; k < 10; k++) {
26089 GemmMicrokernelTester()
26090 .mr(1)
26091 .nr(16)
26092 .kr(1)
26093 .sr(1)
26094 .m(1)
26095 .n(16)
26096 .k(k)
26097 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26098 }
26099 }
26100
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,k_gt_1_subtile)26101 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_subtile) {
26102 TEST_REQUIRES_X86_AVX512F;
26103 for (size_t k = 2; k < 10; k++) {
26104 for (uint32_t n = 1; n <= 16; n++) {
26105 for (uint32_t m = 1; m <= 1; m++) {
26106 GemmMicrokernelTester()
26107 .mr(1)
26108 .nr(16)
26109 .kr(1)
26110 .sr(1)
26111 .m(m)
26112 .n(n)
26113 .k(k)
26114 .iterations(1)
26115 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26116 }
26117 }
26118 }
26119 }
26120
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16)26121 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16) {
26122 TEST_REQUIRES_X86_AVX512F;
26123 for (uint32_t n = 17; n < 32; n++) {
26124 for (size_t k = 1; k <= 5; k += 2) {
26125 GemmMicrokernelTester()
26126 .mr(1)
26127 .nr(16)
26128 .kr(1)
26129 .sr(1)
26130 .m(1)
26131 .n(n)
26132 .k(k)
26133 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26134 }
26135 }
26136 }
26137
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16_strided_cn)26138 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
26139 TEST_REQUIRES_X86_AVX512F;
26140 for (uint32_t n = 17; n < 32; n++) {
26141 for (size_t k = 1; k <= 5; k += 2) {
26142 GemmMicrokernelTester()
26143 .mr(1)
26144 .nr(16)
26145 .kr(1)
26146 .sr(1)
26147 .m(1)
26148 .n(n)
26149 .k(k)
26150 .cn_stride(19)
26151 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26152 }
26153 }
26154 }
26155
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16_subtile)26156 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_subtile) {
26157 TEST_REQUIRES_X86_AVX512F;
26158 for (uint32_t n = 17; n < 32; n++) {
26159 for (size_t k = 1; k <= 5; k += 2) {
26160 for (uint32_t m = 1; m <= 1; m++) {
26161 GemmMicrokernelTester()
26162 .mr(1)
26163 .nr(16)
26164 .kr(1)
26165 .sr(1)
26166 .m(m)
26167 .n(n)
26168 .k(k)
26169 .iterations(1)
26170 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26171 }
26172 }
26173 }
26174 }
26175
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_div_16)26176 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16) {
26177 TEST_REQUIRES_X86_AVX512F;
26178 for (uint32_t n = 32; n <= 48; n += 16) {
26179 for (size_t k = 1; k <= 5; k += 2) {
26180 GemmMicrokernelTester()
26181 .mr(1)
26182 .nr(16)
26183 .kr(1)
26184 .sr(1)
26185 .m(1)
26186 .n(n)
26187 .k(k)
26188 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26189 }
26190 }
26191 }
26192
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_div_16_strided_cn)26193 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
26194 TEST_REQUIRES_X86_AVX512F;
26195 for (uint32_t n = 32; n <= 48; n += 16) {
26196 for (size_t k = 1; k <= 5; k += 2) {
26197 GemmMicrokernelTester()
26198 .mr(1)
26199 .nr(16)
26200 .kr(1)
26201 .sr(1)
26202 .m(1)
26203 .n(n)
26204 .k(k)
26205 .cn_stride(19)
26206 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26207 }
26208 }
26209 }
26210
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_div_16_subtile)26211 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_subtile) {
26212 TEST_REQUIRES_X86_AVX512F;
26213 for (uint32_t n = 32; n <= 48; n += 16) {
26214 for (size_t k = 1; k <= 5; k += 2) {
26215 for (uint32_t m = 1; m <= 1; m++) {
26216 GemmMicrokernelTester()
26217 .mr(1)
26218 .nr(16)
26219 .kr(1)
26220 .sr(1)
26221 .m(m)
26222 .n(n)
26223 .k(k)
26224 .iterations(1)
26225 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26226 }
26227 }
26228 }
26229 }
26230
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,small_kernel)26231 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, small_kernel) {
26232 TEST_REQUIRES_X86_AVX512F;
26233 for (size_t k = 1; k <= 5; k += 2) {
26234 GemmMicrokernelTester()
26235 .mr(1)
26236 .nr(16)
26237 .kr(1)
26238 .sr(1)
26239 .m(1)
26240 .n(16)
26241 .k(k)
26242 .ks(3)
26243 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26244 }
26245 }
26246
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,small_kernel_subtile)26247 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, small_kernel_subtile) {
26248 TEST_REQUIRES_X86_AVX512F;
26249 for (size_t k = 1; k <= 5; k += 2) {
26250 for (uint32_t n = 1; n <= 16; n++) {
26251 for (uint32_t m = 1; m <= 1; m++) {
26252 GemmMicrokernelTester()
26253 .mr(1)
26254 .nr(16)
26255 .kr(1)
26256 .sr(1)
26257 .m(m)
26258 .n(n)
26259 .k(k)
26260 .ks(3)
26261 .iterations(1)
26262 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26263 }
26264 }
26265 }
26266 }
26267
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16_small_kernel)26268 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
26269 TEST_REQUIRES_X86_AVX512F;
26270 for (uint32_t n = 17; n < 32; n++) {
26271 for (size_t k = 1; k <= 5; k += 2) {
26272 GemmMicrokernelTester()
26273 .mr(1)
26274 .nr(16)
26275 .kr(1)
26276 .sr(1)
26277 .m(1)
26278 .n(n)
26279 .k(k)
26280 .ks(3)
26281 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26282 }
26283 }
26284 }
26285
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,n_div_16_small_kernel)26286 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
26287 TEST_REQUIRES_X86_AVX512F;
26288 for (uint32_t n = 32; n <= 48; n += 16) {
26289 for (size_t k = 1; k <= 5; k += 2) {
26290 GemmMicrokernelTester()
26291 .mr(1)
26292 .nr(16)
26293 .kr(1)
26294 .sr(1)
26295 .m(1)
26296 .n(n)
26297 .k(k)
26298 .ks(3)
26299 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26300 }
26301 }
26302 }
26303
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,strided_cm_subtile)26304 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cm_subtile) {
26305 TEST_REQUIRES_X86_AVX512F;
26306 for (size_t k = 1; k <= 5; k += 2) {
26307 for (uint32_t n = 1; n <= 16; n++) {
26308 for (uint32_t m = 1; m <= 1; m++) {
26309 GemmMicrokernelTester()
26310 .mr(1)
26311 .nr(16)
26312 .kr(1)
26313 .sr(1)
26314 .m(m)
26315 .n(n)
26316 .k(k)
26317 .cm_stride(19)
26318 .iterations(1)
26319 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26320 }
26321 }
26322 }
26323 }
26324
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,a_offset)26325 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, a_offset) {
26326 TEST_REQUIRES_X86_AVX512F;
26327 for (size_t k = 1; k <= 5; k += 2) {
26328 GemmMicrokernelTester()
26329 .mr(1)
26330 .nr(16)
26331 .kr(1)
26332 .sr(1)
26333 .m(1)
26334 .n(16)
26335 .k(k)
26336 .ks(3)
26337 .a_offset(7)
26338 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26339 }
26340 }
26341
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,zero)26342 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, zero) {
26343 TEST_REQUIRES_X86_AVX512F;
26344 for (size_t k = 1; k <= 5; k += 2) {
26345 for (uint32_t mz = 0; mz < 1; mz++) {
26346 GemmMicrokernelTester()
26347 .mr(1)
26348 .nr(16)
26349 .kr(1)
26350 .sr(1)
26351 .m(1)
26352 .n(16)
26353 .k(k)
26354 .ks(3)
26355 .a_offset(7)
26356 .zero_index(mz)
26357 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26358 }
26359 }
26360 }
26361
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,qmin)26362 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, qmin) {
26363 TEST_REQUIRES_X86_AVX512F;
26364 GemmMicrokernelTester()
26365 .mr(1)
26366 .nr(16)
26367 .kr(1)
26368 .sr(1)
26369 .m(1)
26370 .n(16)
26371 .k(1)
26372 .qmin(128)
26373 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26374 }
26375
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,qmax)26376 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, qmax) {
26377 TEST_REQUIRES_X86_AVX512F;
26378 GemmMicrokernelTester()
26379 .mr(1)
26380 .nr(16)
26381 .kr(1)
26382 .sr(1)
26383 .m(1)
26384 .n(16)
26385 .k(1)
26386 .qmax(128)
26387 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26388 }
26389
TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST,strided_cm)26390 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cm) {
26391 TEST_REQUIRES_X86_AVX512F;
26392 GemmMicrokernelTester()
26393 .mr(1)
26394 .nr(16)
26395 .kr(1)
26396 .sr(1)
26397 .m(1)
26398 .n(16)
26399 .k(1)
26400 .cm_stride(19)
26401 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26402 }
26403 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26404
26405
26406 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1)26407 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1) {
26408 TEST_REQUIRES_X86_AVX512F;
26409 GemmMicrokernelTester()
26410 .mr(6)
26411 .nr(16)
26412 .kr(1)
26413 .sr(1)
26414 .m(6)
26415 .n(16)
26416 .k(1)
26417 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26418 }
26419
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,strided_cn)26420 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cn) {
26421 TEST_REQUIRES_X86_AVX512F;
26422 GemmMicrokernelTester()
26423 .mr(6)
26424 .nr(16)
26425 .kr(1)
26426 .sr(1)
26427 .m(6)
26428 .n(16)
26429 .k(1)
26430 .cn_stride(19)
26431 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26432 }
26433
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1_subtile)26434 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile) {
26435 TEST_REQUIRES_X86_AVX512F;
26436 for (uint32_t n = 1; n <= 16; n++) {
26437 for (uint32_t m = 1; m <= 6; m++) {
26438 GemmMicrokernelTester()
26439 .mr(6)
26440 .nr(16)
26441 .kr(1)
26442 .sr(1)
26443 .m(m)
26444 .n(n)
26445 .k(1)
26446 .iterations(1)
26447 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26448 }
26449 }
26450 }
26451
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1_subtile_m)26452 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
26453 TEST_REQUIRES_X86_AVX512F;
26454 for (uint32_t m = 1; m <= 6; m++) {
26455 GemmMicrokernelTester()
26456 .mr(6)
26457 .nr(16)
26458 .kr(1)
26459 .sr(1)
26460 .m(m)
26461 .n(16)
26462 .k(1)
26463 .iterations(1)
26464 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26465 }
26466 }
26467
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1_subtile_n)26468 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
26469 TEST_REQUIRES_X86_AVX512F;
26470 for (uint32_t n = 1; n <= 16; n++) {
26471 GemmMicrokernelTester()
26472 .mr(6)
26473 .nr(16)
26474 .kr(1)
26475 .sr(1)
26476 .m(6)
26477 .n(n)
26478 .k(1)
26479 .iterations(1)
26480 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26481 }
26482 }
26483
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,k_gt_1)26484 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1) {
26485 TEST_REQUIRES_X86_AVX512F;
26486 for (size_t k = 2; k < 10; k++) {
26487 GemmMicrokernelTester()
26488 .mr(6)
26489 .nr(16)
26490 .kr(1)
26491 .sr(1)
26492 .m(6)
26493 .n(16)
26494 .k(k)
26495 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26496 }
26497 }
26498
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,k_gt_1_subtile)26499 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_subtile) {
26500 TEST_REQUIRES_X86_AVX512F;
26501 for (size_t k = 2; k < 10; k++) {
26502 for (uint32_t n = 1; n <= 16; n++) {
26503 for (uint32_t m = 1; m <= 6; m++) {
26504 GemmMicrokernelTester()
26505 .mr(6)
26506 .nr(16)
26507 .kr(1)
26508 .sr(1)
26509 .m(m)
26510 .n(n)
26511 .k(k)
26512 .iterations(1)
26513 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26514 }
26515 }
26516 }
26517 }
26518
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16)26519 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16) {
26520 TEST_REQUIRES_X86_AVX512F;
26521 for (uint32_t n = 17; n < 32; n++) {
26522 for (size_t k = 1; k <= 5; k += 2) {
26523 GemmMicrokernelTester()
26524 .mr(6)
26525 .nr(16)
26526 .kr(1)
26527 .sr(1)
26528 .m(6)
26529 .n(n)
26530 .k(k)
26531 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26532 }
26533 }
26534 }
26535
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16_strided_cn)26536 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
26537 TEST_REQUIRES_X86_AVX512F;
26538 for (uint32_t n = 17; n < 32; n++) {
26539 for (size_t k = 1; k <= 5; k += 2) {
26540 GemmMicrokernelTester()
26541 .mr(6)
26542 .nr(16)
26543 .kr(1)
26544 .sr(1)
26545 .m(6)
26546 .n(n)
26547 .k(k)
26548 .cn_stride(19)
26549 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26550 }
26551 }
26552 }
26553
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16_subtile)26554 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_subtile) {
26555 TEST_REQUIRES_X86_AVX512F;
26556 for (uint32_t n = 17; n < 32; n++) {
26557 for (size_t k = 1; k <= 5; k += 2) {
26558 for (uint32_t m = 1; m <= 6; m++) {
26559 GemmMicrokernelTester()
26560 .mr(6)
26561 .nr(16)
26562 .kr(1)
26563 .sr(1)
26564 .m(m)
26565 .n(n)
26566 .k(k)
26567 .iterations(1)
26568 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26569 }
26570 }
26571 }
26572 }
26573
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_div_16)26574 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16) {
26575 TEST_REQUIRES_X86_AVX512F;
26576 for (uint32_t n = 32; n <= 48; n += 16) {
26577 for (size_t k = 1; k <= 5; k += 2) {
26578 GemmMicrokernelTester()
26579 .mr(6)
26580 .nr(16)
26581 .kr(1)
26582 .sr(1)
26583 .m(6)
26584 .n(n)
26585 .k(k)
26586 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26587 }
26588 }
26589 }
26590
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_div_16_strided_cn)26591 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
26592 TEST_REQUIRES_X86_AVX512F;
26593 for (uint32_t n = 32; n <= 48; n += 16) {
26594 for (size_t k = 1; k <= 5; k += 2) {
26595 GemmMicrokernelTester()
26596 .mr(6)
26597 .nr(16)
26598 .kr(1)
26599 .sr(1)
26600 .m(6)
26601 .n(n)
26602 .k(k)
26603 .cn_stride(19)
26604 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26605 }
26606 }
26607 }
26608
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_div_16_subtile)26609 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_subtile) {
26610 TEST_REQUIRES_X86_AVX512F;
26611 for (uint32_t n = 32; n <= 48; n += 16) {
26612 for (size_t k = 1; k <= 5; k += 2) {
26613 for (uint32_t m = 1; m <= 6; m++) {
26614 GemmMicrokernelTester()
26615 .mr(6)
26616 .nr(16)
26617 .kr(1)
26618 .sr(1)
26619 .m(m)
26620 .n(n)
26621 .k(k)
26622 .iterations(1)
26623 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26624 }
26625 }
26626 }
26627 }
26628
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,small_kernel)26629 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, small_kernel) {
26630 TEST_REQUIRES_X86_AVX512F;
26631 for (size_t k = 1; k <= 5; k += 2) {
26632 GemmMicrokernelTester()
26633 .mr(6)
26634 .nr(16)
26635 .kr(1)
26636 .sr(1)
26637 .m(6)
26638 .n(16)
26639 .k(k)
26640 .ks(3)
26641 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26642 }
26643 }
26644
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,small_kernel_subtile)26645 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, small_kernel_subtile) {
26646 TEST_REQUIRES_X86_AVX512F;
26647 for (size_t k = 1; k <= 5; k += 2) {
26648 for (uint32_t n = 1; n <= 16; n++) {
26649 for (uint32_t m = 1; m <= 6; m++) {
26650 GemmMicrokernelTester()
26651 .mr(6)
26652 .nr(16)
26653 .kr(1)
26654 .sr(1)
26655 .m(m)
26656 .n(n)
26657 .k(k)
26658 .ks(3)
26659 .iterations(1)
26660 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26661 }
26662 }
26663 }
26664 }
26665
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16_small_kernel)26666 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
26667 TEST_REQUIRES_X86_AVX512F;
26668 for (uint32_t n = 17; n < 32; n++) {
26669 for (size_t k = 1; k <= 5; k += 2) {
26670 GemmMicrokernelTester()
26671 .mr(6)
26672 .nr(16)
26673 .kr(1)
26674 .sr(1)
26675 .m(6)
26676 .n(n)
26677 .k(k)
26678 .ks(3)
26679 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26680 }
26681 }
26682 }
26683
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,n_div_16_small_kernel)26684 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
26685 TEST_REQUIRES_X86_AVX512F;
26686 for (uint32_t n = 32; n <= 48; n += 16) {
26687 for (size_t k = 1; k <= 5; k += 2) {
26688 GemmMicrokernelTester()
26689 .mr(6)
26690 .nr(16)
26691 .kr(1)
26692 .sr(1)
26693 .m(6)
26694 .n(n)
26695 .k(k)
26696 .ks(3)
26697 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26698 }
26699 }
26700 }
26701
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,strided_cm_subtile)26702 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cm_subtile) {
26703 TEST_REQUIRES_X86_AVX512F;
26704 for (size_t k = 1; k <= 5; k += 2) {
26705 for (uint32_t n = 1; n <= 16; n++) {
26706 for (uint32_t m = 1; m <= 6; m++) {
26707 GemmMicrokernelTester()
26708 .mr(6)
26709 .nr(16)
26710 .kr(1)
26711 .sr(1)
26712 .m(m)
26713 .n(n)
26714 .k(k)
26715 .cm_stride(19)
26716 .iterations(1)
26717 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26718 }
26719 }
26720 }
26721 }
26722
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,a_offset)26723 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, a_offset) {
26724 TEST_REQUIRES_X86_AVX512F;
26725 for (size_t k = 1; k <= 5; k += 2) {
26726 GemmMicrokernelTester()
26727 .mr(6)
26728 .nr(16)
26729 .kr(1)
26730 .sr(1)
26731 .m(6)
26732 .n(16)
26733 .k(k)
26734 .ks(3)
26735 .a_offset(37)
26736 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26737 }
26738 }
26739
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,zero)26740 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, zero) {
26741 TEST_REQUIRES_X86_AVX512F;
26742 for (size_t k = 1; k <= 5; k += 2) {
26743 for (uint32_t mz = 0; mz < 6; mz++) {
26744 GemmMicrokernelTester()
26745 .mr(6)
26746 .nr(16)
26747 .kr(1)
26748 .sr(1)
26749 .m(6)
26750 .n(16)
26751 .k(k)
26752 .ks(3)
26753 .a_offset(37)
26754 .zero_index(mz)
26755 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26756 }
26757 }
26758 }
26759
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,qmin)26760 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, qmin) {
26761 TEST_REQUIRES_X86_AVX512F;
26762 GemmMicrokernelTester()
26763 .mr(6)
26764 .nr(16)
26765 .kr(1)
26766 .sr(1)
26767 .m(6)
26768 .n(16)
26769 .k(1)
26770 .qmin(128)
26771 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26772 }
26773
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,qmax)26774 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, qmax) {
26775 TEST_REQUIRES_X86_AVX512F;
26776 GemmMicrokernelTester()
26777 .mr(6)
26778 .nr(16)
26779 .kr(1)
26780 .sr(1)
26781 .m(6)
26782 .n(16)
26783 .k(1)
26784 .qmax(128)
26785 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26786 }
26787
TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST,strided_cm)26788 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cm) {
26789 TEST_REQUIRES_X86_AVX512F;
26790 GemmMicrokernelTester()
26791 .mr(6)
26792 .nr(16)
26793 .kr(1)
26794 .sr(1)
26795 .m(6)
26796 .n(16)
26797 .k(1)
26798 .cm_stride(19)
26799 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26800 }
26801 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26802
26803
26804 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1)26805 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1) {
26806 TEST_REQUIRES_X86_AVX512F;
26807 GemmMicrokernelTester()
26808 .mr(7)
26809 .nr(16)
26810 .kr(1)
26811 .sr(1)
26812 .m(7)
26813 .n(16)
26814 .k(1)
26815 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26816 }
26817
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,strided_cn)26818 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cn) {
26819 TEST_REQUIRES_X86_AVX512F;
26820 GemmMicrokernelTester()
26821 .mr(7)
26822 .nr(16)
26823 .kr(1)
26824 .sr(1)
26825 .m(7)
26826 .n(16)
26827 .k(1)
26828 .cn_stride(19)
26829 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26830 }
26831
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1_subtile)26832 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile) {
26833 TEST_REQUIRES_X86_AVX512F;
26834 for (uint32_t n = 1; n <= 16; n++) {
26835 for (uint32_t m = 1; m <= 7; m++) {
26836 GemmMicrokernelTester()
26837 .mr(7)
26838 .nr(16)
26839 .kr(1)
26840 .sr(1)
26841 .m(m)
26842 .n(n)
26843 .k(1)
26844 .iterations(1)
26845 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26846 }
26847 }
26848 }
26849
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1_subtile_m)26850 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
26851 TEST_REQUIRES_X86_AVX512F;
26852 for (uint32_t m = 1; m <= 7; m++) {
26853 GemmMicrokernelTester()
26854 .mr(7)
26855 .nr(16)
26856 .kr(1)
26857 .sr(1)
26858 .m(m)
26859 .n(16)
26860 .k(1)
26861 .iterations(1)
26862 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26863 }
26864 }
26865
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1_subtile_n)26866 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
26867 TEST_REQUIRES_X86_AVX512F;
26868 for (uint32_t n = 1; n <= 16; n++) {
26869 GemmMicrokernelTester()
26870 .mr(7)
26871 .nr(16)
26872 .kr(1)
26873 .sr(1)
26874 .m(7)
26875 .n(n)
26876 .k(1)
26877 .iterations(1)
26878 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26879 }
26880 }
26881
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,k_gt_1)26882 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1) {
26883 TEST_REQUIRES_X86_AVX512F;
26884 for (size_t k = 2; k < 10; k++) {
26885 GemmMicrokernelTester()
26886 .mr(7)
26887 .nr(16)
26888 .kr(1)
26889 .sr(1)
26890 .m(7)
26891 .n(16)
26892 .k(k)
26893 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26894 }
26895 }
26896
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,k_gt_1_subtile)26897 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_subtile) {
26898 TEST_REQUIRES_X86_AVX512F;
26899 for (size_t k = 2; k < 10; k++) {
26900 for (uint32_t n = 1; n <= 16; n++) {
26901 for (uint32_t m = 1; m <= 7; m++) {
26902 GemmMicrokernelTester()
26903 .mr(7)
26904 .nr(16)
26905 .kr(1)
26906 .sr(1)
26907 .m(m)
26908 .n(n)
26909 .k(k)
26910 .iterations(1)
26911 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26912 }
26913 }
26914 }
26915 }
26916
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16)26917 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16) {
26918 TEST_REQUIRES_X86_AVX512F;
26919 for (uint32_t n = 17; n < 32; n++) {
26920 for (size_t k = 1; k <= 5; k += 2) {
26921 GemmMicrokernelTester()
26922 .mr(7)
26923 .nr(16)
26924 .kr(1)
26925 .sr(1)
26926 .m(7)
26927 .n(n)
26928 .k(k)
26929 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26930 }
26931 }
26932 }
26933
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16_strided_cn)26934 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
26935 TEST_REQUIRES_X86_AVX512F;
26936 for (uint32_t n = 17; n < 32; n++) {
26937 for (size_t k = 1; k <= 5; k += 2) {
26938 GemmMicrokernelTester()
26939 .mr(7)
26940 .nr(16)
26941 .kr(1)
26942 .sr(1)
26943 .m(7)
26944 .n(n)
26945 .k(k)
26946 .cn_stride(19)
26947 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26948 }
26949 }
26950 }
26951
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16_subtile)26952 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_subtile) {
26953 TEST_REQUIRES_X86_AVX512F;
26954 for (uint32_t n = 17; n < 32; n++) {
26955 for (size_t k = 1; k <= 5; k += 2) {
26956 for (uint32_t m = 1; m <= 7; m++) {
26957 GemmMicrokernelTester()
26958 .mr(7)
26959 .nr(16)
26960 .kr(1)
26961 .sr(1)
26962 .m(m)
26963 .n(n)
26964 .k(k)
26965 .iterations(1)
26966 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26967 }
26968 }
26969 }
26970 }
26971
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_div_16)26972 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16) {
26973 TEST_REQUIRES_X86_AVX512F;
26974 for (uint32_t n = 32; n <= 48; n += 16) {
26975 for (size_t k = 1; k <= 5; k += 2) {
26976 GemmMicrokernelTester()
26977 .mr(7)
26978 .nr(16)
26979 .kr(1)
26980 .sr(1)
26981 .m(7)
26982 .n(n)
26983 .k(k)
26984 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
26985 }
26986 }
26987 }
26988
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_div_16_strided_cn)26989 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
26990 TEST_REQUIRES_X86_AVX512F;
26991 for (uint32_t n = 32; n <= 48; n += 16) {
26992 for (size_t k = 1; k <= 5; k += 2) {
26993 GemmMicrokernelTester()
26994 .mr(7)
26995 .nr(16)
26996 .kr(1)
26997 .sr(1)
26998 .m(7)
26999 .n(n)
27000 .k(k)
27001 .cn_stride(19)
27002 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27003 }
27004 }
27005 }
27006
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_div_16_subtile)27007 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_subtile) {
27008 TEST_REQUIRES_X86_AVX512F;
27009 for (uint32_t n = 32; n <= 48; n += 16) {
27010 for (size_t k = 1; k <= 5; k += 2) {
27011 for (uint32_t m = 1; m <= 7; m++) {
27012 GemmMicrokernelTester()
27013 .mr(7)
27014 .nr(16)
27015 .kr(1)
27016 .sr(1)
27017 .m(m)
27018 .n(n)
27019 .k(k)
27020 .iterations(1)
27021 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27022 }
27023 }
27024 }
27025 }
27026
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,small_kernel)27027 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, small_kernel) {
27028 TEST_REQUIRES_X86_AVX512F;
27029 for (size_t k = 1; k <= 5; k += 2) {
27030 GemmMicrokernelTester()
27031 .mr(7)
27032 .nr(16)
27033 .kr(1)
27034 .sr(1)
27035 .m(7)
27036 .n(16)
27037 .k(k)
27038 .ks(3)
27039 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27040 }
27041 }
27042
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,small_kernel_subtile)27043 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, small_kernel_subtile) {
27044 TEST_REQUIRES_X86_AVX512F;
27045 for (size_t k = 1; k <= 5; k += 2) {
27046 for (uint32_t n = 1; n <= 16; n++) {
27047 for (uint32_t m = 1; m <= 7; m++) {
27048 GemmMicrokernelTester()
27049 .mr(7)
27050 .nr(16)
27051 .kr(1)
27052 .sr(1)
27053 .m(m)
27054 .n(n)
27055 .k(k)
27056 .ks(3)
27057 .iterations(1)
27058 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27059 }
27060 }
27061 }
27062 }
27063
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16_small_kernel)27064 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
27065 TEST_REQUIRES_X86_AVX512F;
27066 for (uint32_t n = 17; n < 32; n++) {
27067 for (size_t k = 1; k <= 5; k += 2) {
27068 GemmMicrokernelTester()
27069 .mr(7)
27070 .nr(16)
27071 .kr(1)
27072 .sr(1)
27073 .m(7)
27074 .n(n)
27075 .k(k)
27076 .ks(3)
27077 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27078 }
27079 }
27080 }
27081
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,n_div_16_small_kernel)27082 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
27083 TEST_REQUIRES_X86_AVX512F;
27084 for (uint32_t n = 32; n <= 48; n += 16) {
27085 for (size_t k = 1; k <= 5; k += 2) {
27086 GemmMicrokernelTester()
27087 .mr(7)
27088 .nr(16)
27089 .kr(1)
27090 .sr(1)
27091 .m(7)
27092 .n(n)
27093 .k(k)
27094 .ks(3)
27095 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27096 }
27097 }
27098 }
27099
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,strided_cm_subtile)27100 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cm_subtile) {
27101 TEST_REQUIRES_X86_AVX512F;
27102 for (size_t k = 1; k <= 5; k += 2) {
27103 for (uint32_t n = 1; n <= 16; n++) {
27104 for (uint32_t m = 1; m <= 7; m++) {
27105 GemmMicrokernelTester()
27106 .mr(7)
27107 .nr(16)
27108 .kr(1)
27109 .sr(1)
27110 .m(m)
27111 .n(n)
27112 .k(k)
27113 .cm_stride(19)
27114 .iterations(1)
27115 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27116 }
27117 }
27118 }
27119 }
27120
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,a_offset)27121 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, a_offset) {
27122 TEST_REQUIRES_X86_AVX512F;
27123 for (size_t k = 1; k <= 5; k += 2) {
27124 GemmMicrokernelTester()
27125 .mr(7)
27126 .nr(16)
27127 .kr(1)
27128 .sr(1)
27129 .m(7)
27130 .n(16)
27131 .k(k)
27132 .ks(3)
27133 .a_offset(37)
27134 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27135 }
27136 }
27137
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,zero)27138 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, zero) {
27139 TEST_REQUIRES_X86_AVX512F;
27140 for (size_t k = 1; k <= 5; k += 2) {
27141 for (uint32_t mz = 0; mz < 7; mz++) {
27142 GemmMicrokernelTester()
27143 .mr(7)
27144 .nr(16)
27145 .kr(1)
27146 .sr(1)
27147 .m(7)
27148 .n(16)
27149 .k(k)
27150 .ks(3)
27151 .a_offset(37)
27152 .zero_index(mz)
27153 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27154 }
27155 }
27156 }
27157
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,qmin)27158 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, qmin) {
27159 TEST_REQUIRES_X86_AVX512F;
27160 GemmMicrokernelTester()
27161 .mr(7)
27162 .nr(16)
27163 .kr(1)
27164 .sr(1)
27165 .m(7)
27166 .n(16)
27167 .k(1)
27168 .qmin(128)
27169 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27170 }
27171
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,qmax)27172 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, qmax) {
27173 TEST_REQUIRES_X86_AVX512F;
27174 GemmMicrokernelTester()
27175 .mr(7)
27176 .nr(16)
27177 .kr(1)
27178 .sr(1)
27179 .m(7)
27180 .n(16)
27181 .k(1)
27182 .qmax(128)
27183 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27184 }
27185
TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST,strided_cm)27186 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cm) {
27187 TEST_REQUIRES_X86_AVX512F;
27188 GemmMicrokernelTester()
27189 .mr(7)
27190 .nr(16)
27191 .kr(1)
27192 .sr(1)
27193 .m(7)
27194 .n(16)
27195 .k(1)
27196 .cm_stride(19)
27197 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27198 }
27199 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27200
27201
27202 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1)27203 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1) {
27204 TEST_REQUIRES_X86_AVX512F;
27205 GemmMicrokernelTester()
27206 .mr(8)
27207 .nr(16)
27208 .kr(1)
27209 .sr(1)
27210 .m(8)
27211 .n(16)
27212 .k(1)
27213 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27214 }
27215
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,strided_cn)27216 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cn) {
27217 TEST_REQUIRES_X86_AVX512F;
27218 GemmMicrokernelTester()
27219 .mr(8)
27220 .nr(16)
27221 .kr(1)
27222 .sr(1)
27223 .m(8)
27224 .n(16)
27225 .k(1)
27226 .cn_stride(19)
27227 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27228 }
27229
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1_subtile)27230 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile) {
27231 TEST_REQUIRES_X86_AVX512F;
27232 for (uint32_t n = 1; n <= 16; n++) {
27233 for (uint32_t m = 1; m <= 8; m++) {
27234 GemmMicrokernelTester()
27235 .mr(8)
27236 .nr(16)
27237 .kr(1)
27238 .sr(1)
27239 .m(m)
27240 .n(n)
27241 .k(1)
27242 .iterations(1)
27243 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27244 }
27245 }
27246 }
27247
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1_subtile_m)27248 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
27249 TEST_REQUIRES_X86_AVX512F;
27250 for (uint32_t m = 1; m <= 8; m++) {
27251 GemmMicrokernelTester()
27252 .mr(8)
27253 .nr(16)
27254 .kr(1)
27255 .sr(1)
27256 .m(m)
27257 .n(16)
27258 .k(1)
27259 .iterations(1)
27260 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27261 }
27262 }
27263
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1_subtile_n)27264 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
27265 TEST_REQUIRES_X86_AVX512F;
27266 for (uint32_t n = 1; n <= 16; n++) {
27267 GemmMicrokernelTester()
27268 .mr(8)
27269 .nr(16)
27270 .kr(1)
27271 .sr(1)
27272 .m(8)
27273 .n(n)
27274 .k(1)
27275 .iterations(1)
27276 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27277 }
27278 }
27279
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,k_gt_1)27280 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1) {
27281 TEST_REQUIRES_X86_AVX512F;
27282 for (size_t k = 2; k < 10; k++) {
27283 GemmMicrokernelTester()
27284 .mr(8)
27285 .nr(16)
27286 .kr(1)
27287 .sr(1)
27288 .m(8)
27289 .n(16)
27290 .k(k)
27291 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27292 }
27293 }
27294
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,k_gt_1_subtile)27295 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_subtile) {
27296 TEST_REQUIRES_X86_AVX512F;
27297 for (size_t k = 2; k < 10; k++) {
27298 for (uint32_t n = 1; n <= 16; n++) {
27299 for (uint32_t m = 1; m <= 8; m++) {
27300 GemmMicrokernelTester()
27301 .mr(8)
27302 .nr(16)
27303 .kr(1)
27304 .sr(1)
27305 .m(m)
27306 .n(n)
27307 .k(k)
27308 .iterations(1)
27309 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27310 }
27311 }
27312 }
27313 }
27314
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16)27315 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16) {
27316 TEST_REQUIRES_X86_AVX512F;
27317 for (uint32_t n = 17; n < 32; n++) {
27318 for (size_t k = 1; k <= 5; k += 2) {
27319 GemmMicrokernelTester()
27320 .mr(8)
27321 .nr(16)
27322 .kr(1)
27323 .sr(1)
27324 .m(8)
27325 .n(n)
27326 .k(k)
27327 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27328 }
27329 }
27330 }
27331
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16_strided_cn)27332 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
27333 TEST_REQUIRES_X86_AVX512F;
27334 for (uint32_t n = 17; n < 32; n++) {
27335 for (size_t k = 1; k <= 5; k += 2) {
27336 GemmMicrokernelTester()
27337 .mr(8)
27338 .nr(16)
27339 .kr(1)
27340 .sr(1)
27341 .m(8)
27342 .n(n)
27343 .k(k)
27344 .cn_stride(19)
27345 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27346 }
27347 }
27348 }
27349
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16_subtile)27350 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_subtile) {
27351 TEST_REQUIRES_X86_AVX512F;
27352 for (uint32_t n = 17; n < 32; n++) {
27353 for (size_t k = 1; k <= 5; k += 2) {
27354 for (uint32_t m = 1; m <= 8; m++) {
27355 GemmMicrokernelTester()
27356 .mr(8)
27357 .nr(16)
27358 .kr(1)
27359 .sr(1)
27360 .m(m)
27361 .n(n)
27362 .k(k)
27363 .iterations(1)
27364 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27365 }
27366 }
27367 }
27368 }
27369
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_div_16)27370 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16) {
27371 TEST_REQUIRES_X86_AVX512F;
27372 for (uint32_t n = 32; n <= 48; n += 16) {
27373 for (size_t k = 1; k <= 5; k += 2) {
27374 GemmMicrokernelTester()
27375 .mr(8)
27376 .nr(16)
27377 .kr(1)
27378 .sr(1)
27379 .m(8)
27380 .n(n)
27381 .k(k)
27382 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27383 }
27384 }
27385 }
27386
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_div_16_strided_cn)27387 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
27388 TEST_REQUIRES_X86_AVX512F;
27389 for (uint32_t n = 32; n <= 48; n += 16) {
27390 for (size_t k = 1; k <= 5; k += 2) {
27391 GemmMicrokernelTester()
27392 .mr(8)
27393 .nr(16)
27394 .kr(1)
27395 .sr(1)
27396 .m(8)
27397 .n(n)
27398 .k(k)
27399 .cn_stride(19)
27400 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27401 }
27402 }
27403 }
27404
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_div_16_subtile)27405 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_subtile) {
27406 TEST_REQUIRES_X86_AVX512F;
27407 for (uint32_t n = 32; n <= 48; n += 16) {
27408 for (size_t k = 1; k <= 5; k += 2) {
27409 for (uint32_t m = 1; m <= 8; m++) {
27410 GemmMicrokernelTester()
27411 .mr(8)
27412 .nr(16)
27413 .kr(1)
27414 .sr(1)
27415 .m(m)
27416 .n(n)
27417 .k(k)
27418 .iterations(1)
27419 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27420 }
27421 }
27422 }
27423 }
27424
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,small_kernel)27425 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, small_kernel) {
27426 TEST_REQUIRES_X86_AVX512F;
27427 for (size_t k = 1; k <= 5; k += 2) {
27428 GemmMicrokernelTester()
27429 .mr(8)
27430 .nr(16)
27431 .kr(1)
27432 .sr(1)
27433 .m(8)
27434 .n(16)
27435 .k(k)
27436 .ks(3)
27437 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27438 }
27439 }
27440
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,small_kernel_subtile)27441 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, small_kernel_subtile) {
27442 TEST_REQUIRES_X86_AVX512F;
27443 for (size_t k = 1; k <= 5; k += 2) {
27444 for (uint32_t n = 1; n <= 16; n++) {
27445 for (uint32_t m = 1; m <= 8; m++) {
27446 GemmMicrokernelTester()
27447 .mr(8)
27448 .nr(16)
27449 .kr(1)
27450 .sr(1)
27451 .m(m)
27452 .n(n)
27453 .k(k)
27454 .ks(3)
27455 .iterations(1)
27456 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27457 }
27458 }
27459 }
27460 }
27461
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16_small_kernel)27462 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
27463 TEST_REQUIRES_X86_AVX512F;
27464 for (uint32_t n = 17; n < 32; n++) {
27465 for (size_t k = 1; k <= 5; k += 2) {
27466 GemmMicrokernelTester()
27467 .mr(8)
27468 .nr(16)
27469 .kr(1)
27470 .sr(1)
27471 .m(8)
27472 .n(n)
27473 .k(k)
27474 .ks(3)
27475 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27476 }
27477 }
27478 }
27479
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,n_div_16_small_kernel)27480 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
27481 TEST_REQUIRES_X86_AVX512F;
27482 for (uint32_t n = 32; n <= 48; n += 16) {
27483 for (size_t k = 1; k <= 5; k += 2) {
27484 GemmMicrokernelTester()
27485 .mr(8)
27486 .nr(16)
27487 .kr(1)
27488 .sr(1)
27489 .m(8)
27490 .n(n)
27491 .k(k)
27492 .ks(3)
27493 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27494 }
27495 }
27496 }
27497
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,strided_cm_subtile)27498 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cm_subtile) {
27499 TEST_REQUIRES_X86_AVX512F;
27500 for (size_t k = 1; k <= 5; k += 2) {
27501 for (uint32_t n = 1; n <= 16; n++) {
27502 for (uint32_t m = 1; m <= 8; m++) {
27503 GemmMicrokernelTester()
27504 .mr(8)
27505 .nr(16)
27506 .kr(1)
27507 .sr(1)
27508 .m(m)
27509 .n(n)
27510 .k(k)
27511 .cm_stride(19)
27512 .iterations(1)
27513 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27514 }
27515 }
27516 }
27517 }
27518
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,a_offset)27519 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, a_offset) {
27520 TEST_REQUIRES_X86_AVX512F;
27521 for (size_t k = 1; k <= 5; k += 2) {
27522 GemmMicrokernelTester()
27523 .mr(8)
27524 .nr(16)
27525 .kr(1)
27526 .sr(1)
27527 .m(8)
27528 .n(16)
27529 .k(k)
27530 .ks(3)
27531 .a_offset(43)
27532 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27533 }
27534 }
27535
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,zero)27536 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, zero) {
27537 TEST_REQUIRES_X86_AVX512F;
27538 for (size_t k = 1; k <= 5; k += 2) {
27539 for (uint32_t mz = 0; mz < 8; mz++) {
27540 GemmMicrokernelTester()
27541 .mr(8)
27542 .nr(16)
27543 .kr(1)
27544 .sr(1)
27545 .m(8)
27546 .n(16)
27547 .k(k)
27548 .ks(3)
27549 .a_offset(43)
27550 .zero_index(mz)
27551 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27552 }
27553 }
27554 }
27555
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,qmin)27556 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, qmin) {
27557 TEST_REQUIRES_X86_AVX512F;
27558 GemmMicrokernelTester()
27559 .mr(8)
27560 .nr(16)
27561 .kr(1)
27562 .sr(1)
27563 .m(8)
27564 .n(16)
27565 .k(1)
27566 .qmin(128)
27567 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27568 }
27569
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,qmax)27570 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, qmax) {
27571 TEST_REQUIRES_X86_AVX512F;
27572 GemmMicrokernelTester()
27573 .mr(8)
27574 .nr(16)
27575 .kr(1)
27576 .sr(1)
27577 .m(8)
27578 .n(16)
27579 .k(1)
27580 .qmax(128)
27581 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27582 }
27583
TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST,strided_cm)27584 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cm) {
27585 TEST_REQUIRES_X86_AVX512F;
27586 GemmMicrokernelTester()
27587 .mr(8)
27588 .nr(16)
27589 .kr(1)
27590 .sr(1)
27591 .m(8)
27592 .n(16)
27593 .k(1)
27594 .cm_stride(19)
27595 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
27596 }
27597 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27598
27599
27600 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4)27601 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
27602 GemmMicrokernelTester()
27603 .mr(1)
27604 .nr(8)
27605 .kr(1)
27606 .sr(1)
27607 .m(1)
27608 .n(8)
27609 .k(4)
27610 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27611 }
27612
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,strided_cn)27613 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cn) {
27614 GemmMicrokernelTester()
27615 .mr(1)
27616 .nr(8)
27617 .kr(1)
27618 .sr(1)
27619 .m(1)
27620 .n(8)
27621 .k(4)
27622 .cn_stride(11)
27623 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27624 }
27625
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)27626 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
27627 for (uint32_t n = 1; n <= 8; n++) {
27628 for (uint32_t m = 1; m <= 1; m++) {
27629 GemmMicrokernelTester()
27630 .mr(1)
27631 .nr(8)
27632 .kr(1)
27633 .sr(1)
27634 .m(m)
27635 .n(n)
27636 .k(4)
27637 .iterations(1)
27638 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27639 }
27640 }
27641 }
27642
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)27643 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
27644 for (uint32_t m = 1; m <= 1; m++) {
27645 GemmMicrokernelTester()
27646 .mr(1)
27647 .nr(8)
27648 .kr(1)
27649 .sr(1)
27650 .m(m)
27651 .n(8)
27652 .k(4)
27653 .iterations(1)
27654 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27655 }
27656 }
27657
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)27658 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
27659 for (uint32_t n = 1; n <= 8; n++) {
27660 GemmMicrokernelTester()
27661 .mr(1)
27662 .nr(8)
27663 .kr(1)
27664 .sr(1)
27665 .m(1)
27666 .n(n)
27667 .k(4)
27668 .iterations(1)
27669 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27670 }
27671 }
27672
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_lt_4)27673 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
27674 for (size_t k = 1; k < 4; k++) {
27675 GemmMicrokernelTester()
27676 .mr(1)
27677 .nr(8)
27678 .kr(1)
27679 .sr(1)
27680 .m(1)
27681 .n(8)
27682 .k(k)
27683 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27684 }
27685 }
27686
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)27687 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
27688 for (size_t k = 1; k < 4; k++) {
27689 for (uint32_t n = 1; n <= 8; n++) {
27690 for (uint32_t m = 1; m <= 1; m++) {
27691 GemmMicrokernelTester()
27692 .mr(1)
27693 .nr(8)
27694 .kr(1)
27695 .sr(1)
27696 .m(m)
27697 .n(n)
27698 .k(k)
27699 .iterations(1)
27700 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27701 }
27702 }
27703 }
27704 }
27705
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_gt_4)27706 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
27707 for (size_t k = 5; k < 8; k++) {
27708 GemmMicrokernelTester()
27709 .mr(1)
27710 .nr(8)
27711 .kr(1)
27712 .sr(1)
27713 .m(1)
27714 .n(8)
27715 .k(k)
27716 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27717 }
27718 }
27719
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)27720 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
27721 for (size_t k = 5; k < 8; k++) {
27722 for (uint32_t n = 1; n <= 8; n++) {
27723 for (uint32_t m = 1; m <= 1; m++) {
27724 GemmMicrokernelTester()
27725 .mr(1)
27726 .nr(8)
27727 .kr(1)
27728 .sr(1)
27729 .m(m)
27730 .n(n)
27731 .k(k)
27732 .iterations(1)
27733 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27734 }
27735 }
27736 }
27737 }
27738
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_div_4)27739 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4) {
27740 for (size_t k = 8; k <= 40; k += 4) {
27741 GemmMicrokernelTester()
27742 .mr(1)
27743 .nr(8)
27744 .kr(1)
27745 .sr(1)
27746 .m(1)
27747 .n(8)
27748 .k(k)
27749 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27750 }
27751 }
27752
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)27753 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
27754 for (size_t k = 8; k <= 40; k += 4) {
27755 for (uint32_t n = 1; n <= 8; n++) {
27756 for (uint32_t m = 1; m <= 1; m++) {
27757 GemmMicrokernelTester()
27758 .mr(1)
27759 .nr(8)
27760 .kr(1)
27761 .sr(1)
27762 .m(m)
27763 .n(n)
27764 .k(k)
27765 .iterations(1)
27766 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27767 }
27768 }
27769 }
27770 }
27771
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8)27772 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
27773 for (uint32_t n = 9; n < 16; n++) {
27774 for (size_t k = 1; k <= 20; k += 5) {
27775 GemmMicrokernelTester()
27776 .mr(1)
27777 .nr(8)
27778 .kr(1)
27779 .sr(1)
27780 .m(1)
27781 .n(n)
27782 .k(k)
27783 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27784 }
27785 }
27786 }
27787
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)27788 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
27789 for (uint32_t n = 9; n < 16; n++) {
27790 for (size_t k = 1; k <= 20; k += 5) {
27791 GemmMicrokernelTester()
27792 .mr(1)
27793 .nr(8)
27794 .kr(1)
27795 .sr(1)
27796 .m(1)
27797 .n(n)
27798 .k(k)
27799 .cn_stride(11)
27800 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27801 }
27802 }
27803 }
27804
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)27805 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
27806 for (uint32_t n = 9; n < 16; n++) {
27807 for (size_t k = 1; k <= 20; k += 5) {
27808 for (uint32_t m = 1; m <= 1; m++) {
27809 GemmMicrokernelTester()
27810 .mr(1)
27811 .nr(8)
27812 .kr(1)
27813 .sr(1)
27814 .m(m)
27815 .n(n)
27816 .k(k)
27817 .iterations(1)
27818 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27819 }
27820 }
27821 }
27822 }
27823
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8)27824 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8) {
27825 for (uint32_t n = 16; n <= 24; n += 8) {
27826 for (size_t k = 1; k <= 20; k += 5) {
27827 GemmMicrokernelTester()
27828 .mr(1)
27829 .nr(8)
27830 .kr(1)
27831 .sr(1)
27832 .m(1)
27833 .n(n)
27834 .k(k)
27835 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27836 }
27837 }
27838 }
27839
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)27840 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
27841 for (uint32_t n = 16; n <= 24; n += 8) {
27842 for (size_t k = 1; k <= 20; k += 5) {
27843 GemmMicrokernelTester()
27844 .mr(1)
27845 .nr(8)
27846 .kr(1)
27847 .sr(1)
27848 .m(1)
27849 .n(n)
27850 .k(k)
27851 .cn_stride(11)
27852 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27853 }
27854 }
27855 }
27856
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)27857 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
27858 for (uint32_t n = 16; n <= 24; n += 8) {
27859 for (size_t k = 1; k <= 20; k += 5) {
27860 for (uint32_t m = 1; m <= 1; m++) {
27861 GemmMicrokernelTester()
27862 .mr(1)
27863 .nr(8)
27864 .kr(1)
27865 .sr(1)
27866 .m(m)
27867 .n(n)
27868 .k(k)
27869 .iterations(1)
27870 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27871 }
27872 }
27873 }
27874 }
27875
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,small_kernel)27876 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, small_kernel) {
27877 for (size_t k = 1; k <= 20; k += 5) {
27878 GemmMicrokernelTester()
27879 .mr(1)
27880 .nr(8)
27881 .kr(1)
27882 .sr(1)
27883 .m(1)
27884 .n(8)
27885 .k(k)
27886 .ks(3)
27887 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27888 }
27889 }
27890
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,small_kernel_subtile)27891 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
27892 for (size_t k = 1; k <= 20; k += 5) {
27893 for (uint32_t n = 1; n <= 8; n++) {
27894 for (uint32_t m = 1; m <= 1; m++) {
27895 GemmMicrokernelTester()
27896 .mr(1)
27897 .nr(8)
27898 .kr(1)
27899 .sr(1)
27900 .m(m)
27901 .n(n)
27902 .k(k)
27903 .ks(3)
27904 .iterations(1)
27905 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27906 }
27907 }
27908 }
27909 }
27910
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8_small_kernel)27911 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
27912 for (uint32_t n = 9; n < 16; n++) {
27913 for (size_t k = 1; k <= 20; k += 5) {
27914 GemmMicrokernelTester()
27915 .mr(1)
27916 .nr(8)
27917 .kr(1)
27918 .sr(1)
27919 .m(1)
27920 .n(n)
27921 .k(k)
27922 .ks(3)
27923 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27924 }
27925 }
27926 }
27927
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8_small_kernel)27928 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
27929 for (uint32_t n = 16; n <= 24; n += 8) {
27930 for (size_t k = 1; k <= 20; k += 5) {
27931 GemmMicrokernelTester()
27932 .mr(1)
27933 .nr(8)
27934 .kr(1)
27935 .sr(1)
27936 .m(1)
27937 .n(n)
27938 .k(k)
27939 .ks(3)
27940 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27941 }
27942 }
27943 }
27944
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)27945 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
27946 for (size_t k = 1; k <= 20; k += 5) {
27947 for (uint32_t n = 1; n <= 8; n++) {
27948 for (uint32_t m = 1; m <= 1; m++) {
27949 GemmMicrokernelTester()
27950 .mr(1)
27951 .nr(8)
27952 .kr(1)
27953 .sr(1)
27954 .m(m)
27955 .n(n)
27956 .k(k)
27957 .cm_stride(11)
27958 .iterations(1)
27959 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27960 }
27961 }
27962 }
27963 }
27964
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,a_offset)27965 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, a_offset) {
27966 for (size_t k = 1; k <= 20; k += 5) {
27967 GemmMicrokernelTester()
27968 .mr(1)
27969 .nr(8)
27970 .kr(1)
27971 .sr(1)
27972 .m(1)
27973 .n(8)
27974 .k(k)
27975 .ks(3)
27976 .a_offset(23)
27977 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27978 }
27979 }
27980
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,zero)27981 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, zero) {
27982 for (size_t k = 1; k <= 20; k += 5) {
27983 for (uint32_t mz = 0; mz < 1; mz++) {
27984 GemmMicrokernelTester()
27985 .mr(1)
27986 .nr(8)
27987 .kr(1)
27988 .sr(1)
27989 .m(1)
27990 .n(8)
27991 .k(k)
27992 .ks(3)
27993 .a_offset(23)
27994 .zero_index(mz)
27995 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27996 }
27997 }
27998 }
27999
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,qmin)28000 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmin) {
28001 GemmMicrokernelTester()
28002 .mr(1)
28003 .nr(8)
28004 .kr(1)
28005 .sr(1)
28006 .m(1)
28007 .n(8)
28008 .k(4)
28009 .qmin(128)
28010 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28011 }
28012
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,qmax)28013 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmax) {
28014 GemmMicrokernelTester()
28015 .mr(1)
28016 .nr(8)
28017 .kr(1)
28018 .sr(1)
28019 .m(1)
28020 .n(8)
28021 .k(4)
28022 .qmax(128)
28023 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28024 }
28025
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT,strided_cm)28026 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm) {
28027 GemmMicrokernelTester()
28028 .mr(1)
28029 .nr(8)
28030 .kr(1)
28031 .sr(1)
28032 .m(1)
28033 .n(8)
28034 .k(4)
28035 .cm_stride(11)
28036 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28037 }
28038 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28039
28040
28041 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)28042 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
28043 GemmMicrokernelTester()
28044 .mr(1)
28045 .nr(8)
28046 .kr(1)
28047 .sr(1)
28048 .m(1)
28049 .n(8)
28050 .k(1)
28051 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28052 }
28053
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,strided_cn)28054 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
28055 GemmMicrokernelTester()
28056 .mr(1)
28057 .nr(8)
28058 .kr(1)
28059 .sr(1)
28060 .m(1)
28061 .n(8)
28062 .k(1)
28063 .cn_stride(11)
28064 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28065 }
28066
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)28067 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
28068 for (uint32_t n = 1; n <= 8; n++) {
28069 for (uint32_t m = 1; m <= 1; m++) {
28070 GemmMicrokernelTester()
28071 .mr(1)
28072 .nr(8)
28073 .kr(1)
28074 .sr(1)
28075 .m(m)
28076 .n(n)
28077 .k(1)
28078 .iterations(1)
28079 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28080 }
28081 }
28082 }
28083
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)28084 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
28085 for (uint32_t m = 1; m <= 1; m++) {
28086 GemmMicrokernelTester()
28087 .mr(1)
28088 .nr(8)
28089 .kr(1)
28090 .sr(1)
28091 .m(m)
28092 .n(8)
28093 .k(1)
28094 .iterations(1)
28095 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28096 }
28097 }
28098
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)28099 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
28100 for (uint32_t n = 1; n <= 8; n++) {
28101 GemmMicrokernelTester()
28102 .mr(1)
28103 .nr(8)
28104 .kr(1)
28105 .sr(1)
28106 .m(1)
28107 .n(n)
28108 .k(1)
28109 .iterations(1)
28110 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28111 }
28112 }
28113
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)28114 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
28115 for (size_t k = 2; k < 10; k++) {
28116 GemmMicrokernelTester()
28117 .mr(1)
28118 .nr(8)
28119 .kr(1)
28120 .sr(1)
28121 .m(1)
28122 .n(8)
28123 .k(k)
28124 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28125 }
28126 }
28127
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)28128 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
28129 for (size_t k = 2; k < 10; k++) {
28130 for (uint32_t n = 1; n <= 8; n++) {
28131 for (uint32_t m = 1; m <= 1; m++) {
28132 GemmMicrokernelTester()
28133 .mr(1)
28134 .nr(8)
28135 .kr(1)
28136 .sr(1)
28137 .m(m)
28138 .n(n)
28139 .k(k)
28140 .iterations(1)
28141 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28142 }
28143 }
28144 }
28145 }
28146
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)28147 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
28148 for (uint32_t n = 9; n < 16; n++) {
28149 for (size_t k = 1; k <= 5; k += 2) {
28150 GemmMicrokernelTester()
28151 .mr(1)
28152 .nr(8)
28153 .kr(1)
28154 .sr(1)
28155 .m(1)
28156 .n(n)
28157 .k(k)
28158 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28159 }
28160 }
28161 }
28162
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)28163 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
28164 for (uint32_t n = 9; n < 16; n++) {
28165 for (size_t k = 1; k <= 5; k += 2) {
28166 GemmMicrokernelTester()
28167 .mr(1)
28168 .nr(8)
28169 .kr(1)
28170 .sr(1)
28171 .m(1)
28172 .n(n)
28173 .k(k)
28174 .cn_stride(11)
28175 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28176 }
28177 }
28178 }
28179
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)28180 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
28181 for (uint32_t n = 9; n < 16; n++) {
28182 for (size_t k = 1; k <= 5; k += 2) {
28183 for (uint32_t m = 1; m <= 1; m++) {
28184 GemmMicrokernelTester()
28185 .mr(1)
28186 .nr(8)
28187 .kr(1)
28188 .sr(1)
28189 .m(m)
28190 .n(n)
28191 .k(k)
28192 .iterations(1)
28193 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28194 }
28195 }
28196 }
28197 }
28198
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8)28199 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
28200 for (uint32_t n = 16; n <= 24; n += 8) {
28201 for (size_t k = 1; k <= 5; k += 2) {
28202 GemmMicrokernelTester()
28203 .mr(1)
28204 .nr(8)
28205 .kr(1)
28206 .sr(1)
28207 .m(1)
28208 .n(n)
28209 .k(k)
28210 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28211 }
28212 }
28213 }
28214
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)28215 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
28216 for (uint32_t n = 16; n <= 24; n += 8) {
28217 for (size_t k = 1; k <= 5; k += 2) {
28218 GemmMicrokernelTester()
28219 .mr(1)
28220 .nr(8)
28221 .kr(1)
28222 .sr(1)
28223 .m(1)
28224 .n(n)
28225 .k(k)
28226 .cn_stride(11)
28227 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28228 }
28229 }
28230 }
28231
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)28232 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
28233 for (uint32_t n = 16; n <= 24; n += 8) {
28234 for (size_t k = 1; k <= 5; k += 2) {
28235 for (uint32_t m = 1; m <= 1; m++) {
28236 GemmMicrokernelTester()
28237 .mr(1)
28238 .nr(8)
28239 .kr(1)
28240 .sr(1)
28241 .m(m)
28242 .n(n)
28243 .k(k)
28244 .iterations(1)
28245 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28246 }
28247 }
28248 }
28249 }
28250
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,small_kernel)28251 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
28252 for (size_t k = 1; k <= 5; k += 2) {
28253 GemmMicrokernelTester()
28254 .mr(1)
28255 .nr(8)
28256 .kr(1)
28257 .sr(1)
28258 .m(1)
28259 .n(8)
28260 .k(k)
28261 .ks(3)
28262 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28263 }
28264 }
28265
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,small_kernel_subtile)28266 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
28267 for (size_t k = 1; k <= 5; k += 2) {
28268 for (uint32_t n = 1; n <= 8; n++) {
28269 for (uint32_t m = 1; m <= 1; m++) {
28270 GemmMicrokernelTester()
28271 .mr(1)
28272 .nr(8)
28273 .kr(1)
28274 .sr(1)
28275 .m(m)
28276 .n(n)
28277 .k(k)
28278 .ks(3)
28279 .iterations(1)
28280 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28281 }
28282 }
28283 }
28284 }
28285
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_small_kernel)28286 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
28287 for (uint32_t n = 9; n < 16; n++) {
28288 for (size_t k = 1; k <= 5; k += 2) {
28289 GemmMicrokernelTester()
28290 .mr(1)
28291 .nr(8)
28292 .kr(1)
28293 .sr(1)
28294 .m(1)
28295 .n(n)
28296 .k(k)
28297 .ks(3)
28298 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28299 }
28300 }
28301 }
28302
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8_small_kernel)28303 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
28304 for (uint32_t n = 16; n <= 24; n += 8) {
28305 for (size_t k = 1; k <= 5; k += 2) {
28306 GemmMicrokernelTester()
28307 .mr(1)
28308 .nr(8)
28309 .kr(1)
28310 .sr(1)
28311 .m(1)
28312 .n(n)
28313 .k(k)
28314 .ks(3)
28315 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28316 }
28317 }
28318 }
28319
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)28320 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
28321 for (size_t k = 1; k <= 5; k += 2) {
28322 for (uint32_t n = 1; n <= 8; n++) {
28323 for (uint32_t m = 1; m <= 1; m++) {
28324 GemmMicrokernelTester()
28325 .mr(1)
28326 .nr(8)
28327 .kr(1)
28328 .sr(1)
28329 .m(m)
28330 .n(n)
28331 .k(k)
28332 .cm_stride(11)
28333 .iterations(1)
28334 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28335 }
28336 }
28337 }
28338 }
28339
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,a_offset)28340 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
28341 for (size_t k = 1; k <= 5; k += 2) {
28342 GemmMicrokernelTester()
28343 .mr(1)
28344 .nr(8)
28345 .kr(1)
28346 .sr(1)
28347 .m(1)
28348 .n(8)
28349 .k(k)
28350 .ks(3)
28351 .a_offset(7)
28352 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28353 }
28354 }
28355
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,zero)28356 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, zero) {
28357 for (size_t k = 1; k <= 5; k += 2) {
28358 for (uint32_t mz = 0; mz < 1; mz++) {
28359 GemmMicrokernelTester()
28360 .mr(1)
28361 .nr(8)
28362 .kr(1)
28363 .sr(1)
28364 .m(1)
28365 .n(8)
28366 .k(k)
28367 .ks(3)
28368 .a_offset(7)
28369 .zero_index(mz)
28370 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28371 }
28372 }
28373 }
28374
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,qmin)28375 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmin) {
28376 GemmMicrokernelTester()
28377 .mr(1)
28378 .nr(8)
28379 .kr(1)
28380 .sr(1)
28381 .m(1)
28382 .n(8)
28383 .k(1)
28384 .qmin(128)
28385 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28386 }
28387
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,qmax)28388 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmax) {
28389 GemmMicrokernelTester()
28390 .mr(1)
28391 .nr(8)
28392 .kr(1)
28393 .sr(1)
28394 .m(1)
28395 .n(8)
28396 .k(1)
28397 .qmax(128)
28398 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28399 }
28400
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,strided_cm)28401 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
28402 GemmMicrokernelTester()
28403 .mr(1)
28404 .nr(8)
28405 .kr(1)
28406 .sr(1)
28407 .m(1)
28408 .n(8)
28409 .k(1)
28410 .cm_stride(11)
28411 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28412 }
28413 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28414
28415
28416 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4)28417 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4) {
28418 GemmMicrokernelTester()
28419 .mr(1)
28420 .nr(8)
28421 .kr(1)
28422 .sr(1)
28423 .m(1)
28424 .n(8)
28425 .k(4)
28426 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28427 }
28428
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,strided_cn)28429 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cn) {
28430 GemmMicrokernelTester()
28431 .mr(1)
28432 .nr(8)
28433 .kr(1)
28434 .sr(1)
28435 .m(1)
28436 .n(8)
28437 .k(4)
28438 .cn_stride(11)
28439 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28440 }
28441
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)28442 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
28443 for (uint32_t n = 1; n <= 8; n++) {
28444 for (uint32_t m = 1; m <= 1; m++) {
28445 GemmMicrokernelTester()
28446 .mr(1)
28447 .nr(8)
28448 .kr(1)
28449 .sr(1)
28450 .m(m)
28451 .n(n)
28452 .k(4)
28453 .iterations(1)
28454 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28455 }
28456 }
28457 }
28458
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)28459 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
28460 for (uint32_t m = 1; m <= 1; m++) {
28461 GemmMicrokernelTester()
28462 .mr(1)
28463 .nr(8)
28464 .kr(1)
28465 .sr(1)
28466 .m(m)
28467 .n(8)
28468 .k(4)
28469 .iterations(1)
28470 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28471 }
28472 }
28473
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)28474 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
28475 for (uint32_t n = 1; n <= 8; n++) {
28476 GemmMicrokernelTester()
28477 .mr(1)
28478 .nr(8)
28479 .kr(1)
28480 .sr(1)
28481 .m(1)
28482 .n(n)
28483 .k(4)
28484 .iterations(1)
28485 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28486 }
28487 }
28488
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_lt_4)28489 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4) {
28490 for (size_t k = 1; k < 4; k++) {
28491 GemmMicrokernelTester()
28492 .mr(1)
28493 .nr(8)
28494 .kr(1)
28495 .sr(1)
28496 .m(1)
28497 .n(8)
28498 .k(k)
28499 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28500 }
28501 }
28502
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)28503 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
28504 for (size_t k = 1; k < 4; k++) {
28505 for (uint32_t n = 1; n <= 8; n++) {
28506 for (uint32_t m = 1; m <= 1; m++) {
28507 GemmMicrokernelTester()
28508 .mr(1)
28509 .nr(8)
28510 .kr(1)
28511 .sr(1)
28512 .m(m)
28513 .n(n)
28514 .k(k)
28515 .iterations(1)
28516 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28517 }
28518 }
28519 }
28520 }
28521
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_gt_4)28522 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4) {
28523 for (size_t k = 5; k < 8; k++) {
28524 GemmMicrokernelTester()
28525 .mr(1)
28526 .nr(8)
28527 .kr(1)
28528 .sr(1)
28529 .m(1)
28530 .n(8)
28531 .k(k)
28532 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28533 }
28534 }
28535
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)28536 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
28537 for (size_t k = 5; k < 8; k++) {
28538 for (uint32_t n = 1; n <= 8; n++) {
28539 for (uint32_t m = 1; m <= 1; m++) {
28540 GemmMicrokernelTester()
28541 .mr(1)
28542 .nr(8)
28543 .kr(1)
28544 .sr(1)
28545 .m(m)
28546 .n(n)
28547 .k(k)
28548 .iterations(1)
28549 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28550 }
28551 }
28552 }
28553 }
28554
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_div_4)28555 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4) {
28556 for (size_t k = 8; k <= 40; k += 4) {
28557 GemmMicrokernelTester()
28558 .mr(1)
28559 .nr(8)
28560 .kr(1)
28561 .sr(1)
28562 .m(1)
28563 .n(8)
28564 .k(k)
28565 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28566 }
28567 }
28568
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)28569 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
28570 for (size_t k = 8; k <= 40; k += 4) {
28571 for (uint32_t n = 1; n <= 8; n++) {
28572 for (uint32_t m = 1; m <= 1; m++) {
28573 GemmMicrokernelTester()
28574 .mr(1)
28575 .nr(8)
28576 .kr(1)
28577 .sr(1)
28578 .m(m)
28579 .n(n)
28580 .k(k)
28581 .iterations(1)
28582 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28583 }
28584 }
28585 }
28586 }
28587
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8)28588 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8) {
28589 for (uint32_t n = 9; n < 16; n++) {
28590 for (size_t k = 1; k <= 20; k += 5) {
28591 GemmMicrokernelTester()
28592 .mr(1)
28593 .nr(8)
28594 .kr(1)
28595 .sr(1)
28596 .m(1)
28597 .n(n)
28598 .k(k)
28599 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28600 }
28601 }
28602 }
28603
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)28604 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
28605 for (uint32_t n = 9; n < 16; n++) {
28606 for (size_t k = 1; k <= 20; k += 5) {
28607 GemmMicrokernelTester()
28608 .mr(1)
28609 .nr(8)
28610 .kr(1)
28611 .sr(1)
28612 .m(1)
28613 .n(n)
28614 .k(k)
28615 .cn_stride(11)
28616 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28617 }
28618 }
28619 }
28620
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)28621 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
28622 for (uint32_t n = 9; n < 16; n++) {
28623 for (size_t k = 1; k <= 20; k += 5) {
28624 for (uint32_t m = 1; m <= 1; m++) {
28625 GemmMicrokernelTester()
28626 .mr(1)
28627 .nr(8)
28628 .kr(1)
28629 .sr(1)
28630 .m(m)
28631 .n(n)
28632 .k(k)
28633 .iterations(1)
28634 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28635 }
28636 }
28637 }
28638 }
28639
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8)28640 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8) {
28641 for (uint32_t n = 16; n <= 24; n += 8) {
28642 for (size_t k = 1; k <= 20; k += 5) {
28643 GemmMicrokernelTester()
28644 .mr(1)
28645 .nr(8)
28646 .kr(1)
28647 .sr(1)
28648 .m(1)
28649 .n(n)
28650 .k(k)
28651 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28652 }
28653 }
28654 }
28655
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)28656 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
28657 for (uint32_t n = 16; n <= 24; n += 8) {
28658 for (size_t k = 1; k <= 20; k += 5) {
28659 GemmMicrokernelTester()
28660 .mr(1)
28661 .nr(8)
28662 .kr(1)
28663 .sr(1)
28664 .m(1)
28665 .n(n)
28666 .k(k)
28667 .cn_stride(11)
28668 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28669 }
28670 }
28671 }
28672
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)28673 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
28674 for (uint32_t n = 16; n <= 24; n += 8) {
28675 for (size_t k = 1; k <= 20; k += 5) {
28676 for (uint32_t m = 1; m <= 1; m++) {
28677 GemmMicrokernelTester()
28678 .mr(1)
28679 .nr(8)
28680 .kr(1)
28681 .sr(1)
28682 .m(m)
28683 .n(n)
28684 .k(k)
28685 .iterations(1)
28686 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28687 }
28688 }
28689 }
28690 }
28691
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,small_kernel)28692 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, small_kernel) {
28693 for (size_t k = 1; k <= 20; k += 5) {
28694 GemmMicrokernelTester()
28695 .mr(1)
28696 .nr(8)
28697 .kr(1)
28698 .sr(1)
28699 .m(1)
28700 .n(8)
28701 .k(k)
28702 .ks(3)
28703 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28704 }
28705 }
28706
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,small_kernel_subtile)28707 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
28708 for (size_t k = 1; k <= 20; k += 5) {
28709 for (uint32_t n = 1; n <= 8; n++) {
28710 for (uint32_t m = 1; m <= 1; m++) {
28711 GemmMicrokernelTester()
28712 .mr(1)
28713 .nr(8)
28714 .kr(1)
28715 .sr(1)
28716 .m(m)
28717 .n(n)
28718 .k(k)
28719 .ks(3)
28720 .iterations(1)
28721 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28722 }
28723 }
28724 }
28725 }
28726
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8_small_kernel)28727 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
28728 for (uint32_t n = 9; n < 16; n++) {
28729 for (size_t k = 1; k <= 20; k += 5) {
28730 GemmMicrokernelTester()
28731 .mr(1)
28732 .nr(8)
28733 .kr(1)
28734 .sr(1)
28735 .m(1)
28736 .n(n)
28737 .k(k)
28738 .ks(3)
28739 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28740 }
28741 }
28742 }
28743
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8_small_kernel)28744 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
28745 for (uint32_t n = 16; n <= 24; n += 8) {
28746 for (size_t k = 1; k <= 20; k += 5) {
28747 GemmMicrokernelTester()
28748 .mr(1)
28749 .nr(8)
28750 .kr(1)
28751 .sr(1)
28752 .m(1)
28753 .n(n)
28754 .k(k)
28755 .ks(3)
28756 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28757 }
28758 }
28759 }
28760
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)28761 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
28762 for (size_t k = 1; k <= 20; k += 5) {
28763 for (uint32_t n = 1; n <= 8; n++) {
28764 for (uint32_t m = 1; m <= 1; m++) {
28765 GemmMicrokernelTester()
28766 .mr(1)
28767 .nr(8)
28768 .kr(1)
28769 .sr(1)
28770 .m(m)
28771 .n(n)
28772 .k(k)
28773 .cm_stride(11)
28774 .iterations(1)
28775 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28776 }
28777 }
28778 }
28779 }
28780
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,a_offset)28781 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, a_offset) {
28782 for (size_t k = 1; k <= 20; k += 5) {
28783 GemmMicrokernelTester()
28784 .mr(1)
28785 .nr(8)
28786 .kr(1)
28787 .sr(1)
28788 .m(1)
28789 .n(8)
28790 .k(k)
28791 .ks(3)
28792 .a_offset(23)
28793 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28794 }
28795 }
28796
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,zero)28797 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, zero) {
28798 for (size_t k = 1; k <= 20; k += 5) {
28799 for (uint32_t mz = 0; mz < 1; mz++) {
28800 GemmMicrokernelTester()
28801 .mr(1)
28802 .nr(8)
28803 .kr(1)
28804 .sr(1)
28805 .m(1)
28806 .n(8)
28807 .k(k)
28808 .ks(3)
28809 .a_offset(23)
28810 .zero_index(mz)
28811 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28812 }
28813 }
28814 }
28815
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,qmin)28816 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmin) {
28817 GemmMicrokernelTester()
28818 .mr(1)
28819 .nr(8)
28820 .kr(1)
28821 .sr(1)
28822 .m(1)
28823 .n(8)
28824 .k(4)
28825 .qmin(128)
28826 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28827 }
28828
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,qmax)28829 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmax) {
28830 GemmMicrokernelTester()
28831 .mr(1)
28832 .nr(8)
28833 .kr(1)
28834 .sr(1)
28835 .m(1)
28836 .n(8)
28837 .k(4)
28838 .qmax(128)
28839 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28840 }
28841
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT,strided_cm)28842 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm) {
28843 GemmMicrokernelTester()
28844 .mr(1)
28845 .nr(8)
28846 .kr(1)
28847 .sr(1)
28848 .m(1)
28849 .n(8)
28850 .k(4)
28851 .cm_stride(11)
28852 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28853 }
28854 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28855
28856
28857 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)28858 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
28859 GemmMicrokernelTester()
28860 .mr(3)
28861 .nr(8)
28862 .kr(1)
28863 .sr(1)
28864 .m(3)
28865 .n(8)
28866 .k(1)
28867 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28868 }
28869
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,strided_cn)28870 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
28871 GemmMicrokernelTester()
28872 .mr(3)
28873 .nr(8)
28874 .kr(1)
28875 .sr(1)
28876 .m(3)
28877 .n(8)
28878 .k(1)
28879 .cn_stride(11)
28880 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28881 }
28882
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)28883 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
28884 for (uint32_t n = 1; n <= 8; n++) {
28885 for (uint32_t m = 1; m <= 3; m++) {
28886 GemmMicrokernelTester()
28887 .mr(3)
28888 .nr(8)
28889 .kr(1)
28890 .sr(1)
28891 .m(m)
28892 .n(n)
28893 .k(1)
28894 .iterations(1)
28895 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28896 }
28897 }
28898 }
28899
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)28900 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
28901 for (uint32_t m = 1; m <= 3; m++) {
28902 GemmMicrokernelTester()
28903 .mr(3)
28904 .nr(8)
28905 .kr(1)
28906 .sr(1)
28907 .m(m)
28908 .n(8)
28909 .k(1)
28910 .iterations(1)
28911 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28912 }
28913 }
28914
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)28915 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
28916 for (uint32_t n = 1; n <= 8; n++) {
28917 GemmMicrokernelTester()
28918 .mr(3)
28919 .nr(8)
28920 .kr(1)
28921 .sr(1)
28922 .m(3)
28923 .n(n)
28924 .k(1)
28925 .iterations(1)
28926 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28927 }
28928 }
28929
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)28930 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
28931 for (size_t k = 2; k < 10; k++) {
28932 GemmMicrokernelTester()
28933 .mr(3)
28934 .nr(8)
28935 .kr(1)
28936 .sr(1)
28937 .m(3)
28938 .n(8)
28939 .k(k)
28940 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28941 }
28942 }
28943
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)28944 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
28945 for (size_t k = 2; k < 10; k++) {
28946 for (uint32_t n = 1; n <= 8; n++) {
28947 for (uint32_t m = 1; m <= 3; m++) {
28948 GemmMicrokernelTester()
28949 .mr(3)
28950 .nr(8)
28951 .kr(1)
28952 .sr(1)
28953 .m(m)
28954 .n(n)
28955 .k(k)
28956 .iterations(1)
28957 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28958 }
28959 }
28960 }
28961 }
28962
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)28963 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
28964 for (uint32_t n = 9; n < 16; n++) {
28965 for (size_t k = 1; k <= 5; k += 2) {
28966 GemmMicrokernelTester()
28967 .mr(3)
28968 .nr(8)
28969 .kr(1)
28970 .sr(1)
28971 .m(3)
28972 .n(n)
28973 .k(k)
28974 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28975 }
28976 }
28977 }
28978
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)28979 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
28980 for (uint32_t n = 9; n < 16; n++) {
28981 for (size_t k = 1; k <= 5; k += 2) {
28982 GemmMicrokernelTester()
28983 .mr(3)
28984 .nr(8)
28985 .kr(1)
28986 .sr(1)
28987 .m(3)
28988 .n(n)
28989 .k(k)
28990 .cn_stride(11)
28991 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28992 }
28993 }
28994 }
28995
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)28996 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
28997 for (uint32_t n = 9; n < 16; n++) {
28998 for (size_t k = 1; k <= 5; k += 2) {
28999 for (uint32_t m = 1; m <= 3; m++) {
29000 GemmMicrokernelTester()
29001 .mr(3)
29002 .nr(8)
29003 .kr(1)
29004 .sr(1)
29005 .m(m)
29006 .n(n)
29007 .k(k)
29008 .iterations(1)
29009 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29010 }
29011 }
29012 }
29013 }
29014
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8)29015 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
29016 for (uint32_t n = 16; n <= 24; n += 8) {
29017 for (size_t k = 1; k <= 5; k += 2) {
29018 GemmMicrokernelTester()
29019 .mr(3)
29020 .nr(8)
29021 .kr(1)
29022 .sr(1)
29023 .m(3)
29024 .n(n)
29025 .k(k)
29026 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29027 }
29028 }
29029 }
29030
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)29031 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
29032 for (uint32_t n = 16; n <= 24; n += 8) {
29033 for (size_t k = 1; k <= 5; k += 2) {
29034 GemmMicrokernelTester()
29035 .mr(3)
29036 .nr(8)
29037 .kr(1)
29038 .sr(1)
29039 .m(3)
29040 .n(n)
29041 .k(k)
29042 .cn_stride(11)
29043 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29044 }
29045 }
29046 }
29047
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)29048 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
29049 for (uint32_t n = 16; n <= 24; n += 8) {
29050 for (size_t k = 1; k <= 5; k += 2) {
29051 for (uint32_t m = 1; m <= 3; m++) {
29052 GemmMicrokernelTester()
29053 .mr(3)
29054 .nr(8)
29055 .kr(1)
29056 .sr(1)
29057 .m(m)
29058 .n(n)
29059 .k(k)
29060 .iterations(1)
29061 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29062 }
29063 }
29064 }
29065 }
29066
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,small_kernel)29067 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
29068 for (size_t k = 1; k <= 5; k += 2) {
29069 GemmMicrokernelTester()
29070 .mr(3)
29071 .nr(8)
29072 .kr(1)
29073 .sr(1)
29074 .m(3)
29075 .n(8)
29076 .k(k)
29077 .ks(3)
29078 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29079 }
29080 }
29081
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,small_kernel_subtile)29082 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
29083 for (size_t k = 1; k <= 5; k += 2) {
29084 for (uint32_t n = 1; n <= 8; n++) {
29085 for (uint32_t m = 1; m <= 3; m++) {
29086 GemmMicrokernelTester()
29087 .mr(3)
29088 .nr(8)
29089 .kr(1)
29090 .sr(1)
29091 .m(m)
29092 .n(n)
29093 .k(k)
29094 .ks(3)
29095 .iterations(1)
29096 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29097 }
29098 }
29099 }
29100 }
29101
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_small_kernel)29102 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
29103 for (uint32_t n = 9; n < 16; n++) {
29104 for (size_t k = 1; k <= 5; k += 2) {
29105 GemmMicrokernelTester()
29106 .mr(3)
29107 .nr(8)
29108 .kr(1)
29109 .sr(1)
29110 .m(3)
29111 .n(n)
29112 .k(k)
29113 .ks(3)
29114 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29115 }
29116 }
29117 }
29118
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8_small_kernel)29119 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
29120 for (uint32_t n = 16; n <= 24; n += 8) {
29121 for (size_t k = 1; k <= 5; k += 2) {
29122 GemmMicrokernelTester()
29123 .mr(3)
29124 .nr(8)
29125 .kr(1)
29126 .sr(1)
29127 .m(3)
29128 .n(n)
29129 .k(k)
29130 .ks(3)
29131 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29132 }
29133 }
29134 }
29135
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)29136 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
29137 for (size_t k = 1; k <= 5; k += 2) {
29138 for (uint32_t n = 1; n <= 8; n++) {
29139 for (uint32_t m = 1; m <= 3; m++) {
29140 GemmMicrokernelTester()
29141 .mr(3)
29142 .nr(8)
29143 .kr(1)
29144 .sr(1)
29145 .m(m)
29146 .n(n)
29147 .k(k)
29148 .cm_stride(11)
29149 .iterations(1)
29150 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29151 }
29152 }
29153 }
29154 }
29155
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,a_offset)29156 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
29157 for (size_t k = 1; k <= 5; k += 2) {
29158 GemmMicrokernelTester()
29159 .mr(3)
29160 .nr(8)
29161 .kr(1)
29162 .sr(1)
29163 .m(3)
29164 .n(8)
29165 .k(k)
29166 .ks(3)
29167 .a_offset(17)
29168 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29169 }
29170 }
29171
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,zero)29172 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, zero) {
29173 for (size_t k = 1; k <= 5; k += 2) {
29174 for (uint32_t mz = 0; mz < 3; mz++) {
29175 GemmMicrokernelTester()
29176 .mr(3)
29177 .nr(8)
29178 .kr(1)
29179 .sr(1)
29180 .m(3)
29181 .n(8)
29182 .k(k)
29183 .ks(3)
29184 .a_offset(17)
29185 .zero_index(mz)
29186 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29187 }
29188 }
29189 }
29190
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,qmin)29191 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmin) {
29192 GemmMicrokernelTester()
29193 .mr(3)
29194 .nr(8)
29195 .kr(1)
29196 .sr(1)
29197 .m(3)
29198 .n(8)
29199 .k(1)
29200 .qmin(128)
29201 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29202 }
29203
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,qmax)29204 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmax) {
29205 GemmMicrokernelTester()
29206 .mr(3)
29207 .nr(8)
29208 .kr(1)
29209 .sr(1)
29210 .m(3)
29211 .n(8)
29212 .k(1)
29213 .qmax(128)
29214 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29215 }
29216
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,strided_cm)29217 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
29218 GemmMicrokernelTester()
29219 .mr(3)
29220 .nr(8)
29221 .kr(1)
29222 .sr(1)
29223 .m(3)
29224 .n(8)
29225 .k(1)
29226 .cm_stride(11)
29227 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29228 }
29229 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29230
29231
29232 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4)29233 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4) {
29234 GemmMicrokernelTester()
29235 .mr(3)
29236 .nr(8)
29237 .kr(1)
29238 .sr(4)
29239 .m(3)
29240 .n(8)
29241 .k(4)
29242 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29243 }
29244
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,strided_cn)29245 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cn) {
29246 GemmMicrokernelTester()
29247 .mr(3)
29248 .nr(8)
29249 .kr(1)
29250 .sr(4)
29251 .m(3)
29252 .n(8)
29253 .k(4)
29254 .cn_stride(11)
29255 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29256 }
29257
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4_subtile)29258 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
29259 for (uint32_t n = 1; n <= 8; n++) {
29260 for (uint32_t m = 1; m <= 3; m++) {
29261 GemmMicrokernelTester()
29262 .mr(3)
29263 .nr(8)
29264 .kr(1)
29265 .sr(4)
29266 .m(m)
29267 .n(n)
29268 .k(4)
29269 .iterations(1)
29270 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29271 }
29272 }
29273 }
29274
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)29275 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
29276 for (uint32_t m = 1; m <= 3; m++) {
29277 GemmMicrokernelTester()
29278 .mr(3)
29279 .nr(8)
29280 .kr(1)
29281 .sr(4)
29282 .m(m)
29283 .n(8)
29284 .k(4)
29285 .iterations(1)
29286 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29287 }
29288 }
29289
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)29290 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
29291 for (uint32_t n = 1; n <= 8; n++) {
29292 GemmMicrokernelTester()
29293 .mr(3)
29294 .nr(8)
29295 .kr(1)
29296 .sr(4)
29297 .m(3)
29298 .n(n)
29299 .k(4)
29300 .iterations(1)
29301 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29302 }
29303 }
29304
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_lt_4)29305 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4) {
29306 for (size_t k = 1; k < 4; k++) {
29307 GemmMicrokernelTester()
29308 .mr(3)
29309 .nr(8)
29310 .kr(1)
29311 .sr(4)
29312 .m(3)
29313 .n(8)
29314 .k(k)
29315 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29316 }
29317 }
29318
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_lt_4_subtile)29319 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
29320 for (size_t k = 1; k < 4; k++) {
29321 for (uint32_t n = 1; n <= 8; n++) {
29322 for (uint32_t m = 1; m <= 3; m++) {
29323 GemmMicrokernelTester()
29324 .mr(3)
29325 .nr(8)
29326 .kr(1)
29327 .sr(4)
29328 .m(m)
29329 .n(n)
29330 .k(k)
29331 .iterations(1)
29332 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29333 }
29334 }
29335 }
29336 }
29337
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_gt_4)29338 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4) {
29339 for (size_t k = 5; k < 8; k++) {
29340 GemmMicrokernelTester()
29341 .mr(3)
29342 .nr(8)
29343 .kr(1)
29344 .sr(4)
29345 .m(3)
29346 .n(8)
29347 .k(k)
29348 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29349 }
29350 }
29351
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_gt_4_subtile)29352 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
29353 for (size_t k = 5; k < 8; k++) {
29354 for (uint32_t n = 1; n <= 8; n++) {
29355 for (uint32_t m = 1; m <= 3; m++) {
29356 GemmMicrokernelTester()
29357 .mr(3)
29358 .nr(8)
29359 .kr(1)
29360 .sr(4)
29361 .m(m)
29362 .n(n)
29363 .k(k)
29364 .iterations(1)
29365 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29366 }
29367 }
29368 }
29369 }
29370
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_div_4)29371 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4) {
29372 for (size_t k = 8; k <= 40; k += 4) {
29373 GemmMicrokernelTester()
29374 .mr(3)
29375 .nr(8)
29376 .kr(1)
29377 .sr(4)
29378 .m(3)
29379 .n(8)
29380 .k(k)
29381 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29382 }
29383 }
29384
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,k_div_4_subtile)29385 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_subtile) {
29386 for (size_t k = 8; k <= 40; k += 4) {
29387 for (uint32_t n = 1; n <= 8; n++) {
29388 for (uint32_t m = 1; m <= 3; m++) {
29389 GemmMicrokernelTester()
29390 .mr(3)
29391 .nr(8)
29392 .kr(1)
29393 .sr(4)
29394 .m(m)
29395 .n(n)
29396 .k(k)
29397 .iterations(1)
29398 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29399 }
29400 }
29401 }
29402 }
29403
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8)29404 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8) {
29405 for (uint32_t n = 9; n < 16; n++) {
29406 for (size_t k = 1; k <= 20; k += 5) {
29407 GemmMicrokernelTester()
29408 .mr(3)
29409 .nr(8)
29410 .kr(1)
29411 .sr(4)
29412 .m(3)
29413 .n(n)
29414 .k(k)
29415 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29416 }
29417 }
29418 }
29419
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)29420 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
29421 for (uint32_t n = 9; n < 16; n++) {
29422 for (size_t k = 1; k <= 20; k += 5) {
29423 GemmMicrokernelTester()
29424 .mr(3)
29425 .nr(8)
29426 .kr(1)
29427 .sr(4)
29428 .m(3)
29429 .n(n)
29430 .k(k)
29431 .cn_stride(11)
29432 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29433 }
29434 }
29435 }
29436
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8_subtile)29437 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
29438 for (uint32_t n = 9; n < 16; n++) {
29439 for (size_t k = 1; k <= 20; k += 5) {
29440 for (uint32_t m = 1; m <= 3; m++) {
29441 GemmMicrokernelTester()
29442 .mr(3)
29443 .nr(8)
29444 .kr(1)
29445 .sr(4)
29446 .m(m)
29447 .n(n)
29448 .k(k)
29449 .iterations(1)
29450 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29451 }
29452 }
29453 }
29454 }
29455
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8)29456 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8) {
29457 for (uint32_t n = 16; n <= 24; n += 8) {
29458 for (size_t k = 1; k <= 20; k += 5) {
29459 GemmMicrokernelTester()
29460 .mr(3)
29461 .nr(8)
29462 .kr(1)
29463 .sr(4)
29464 .m(3)
29465 .n(n)
29466 .k(k)
29467 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29468 }
29469 }
29470 }
29471
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8_strided_cn)29472 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
29473 for (uint32_t n = 16; n <= 24; n += 8) {
29474 for (size_t k = 1; k <= 20; k += 5) {
29475 GemmMicrokernelTester()
29476 .mr(3)
29477 .nr(8)
29478 .kr(1)
29479 .sr(4)
29480 .m(3)
29481 .n(n)
29482 .k(k)
29483 .cn_stride(11)
29484 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29485 }
29486 }
29487 }
29488
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8_subtile)29489 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_subtile) {
29490 for (uint32_t n = 16; n <= 24; n += 8) {
29491 for (size_t k = 1; k <= 20; k += 5) {
29492 for (uint32_t m = 1; m <= 3; m++) {
29493 GemmMicrokernelTester()
29494 .mr(3)
29495 .nr(8)
29496 .kr(1)
29497 .sr(4)
29498 .m(m)
29499 .n(n)
29500 .k(k)
29501 .iterations(1)
29502 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29503 }
29504 }
29505 }
29506 }
29507
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,small_kernel)29508 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, small_kernel) {
29509 for (size_t k = 1; k <= 20; k += 5) {
29510 GemmMicrokernelTester()
29511 .mr(3)
29512 .nr(8)
29513 .kr(1)
29514 .sr(4)
29515 .m(3)
29516 .n(8)
29517 .k(k)
29518 .ks(3)
29519 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29520 }
29521 }
29522
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,small_kernel_subtile)29523 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, small_kernel_subtile) {
29524 for (size_t k = 1; k <= 20; k += 5) {
29525 for (uint32_t n = 1; n <= 8; n++) {
29526 for (uint32_t m = 1; m <= 3; m++) {
29527 GemmMicrokernelTester()
29528 .mr(3)
29529 .nr(8)
29530 .kr(1)
29531 .sr(4)
29532 .m(m)
29533 .n(n)
29534 .k(k)
29535 .ks(3)
29536 .iterations(1)
29537 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29538 }
29539 }
29540 }
29541 }
29542
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8_small_kernel)29543 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
29544 for (uint32_t n = 9; n < 16; n++) {
29545 for (size_t k = 1; k <= 20; k += 5) {
29546 GemmMicrokernelTester()
29547 .mr(3)
29548 .nr(8)
29549 .kr(1)
29550 .sr(4)
29551 .m(3)
29552 .n(n)
29553 .k(k)
29554 .ks(3)
29555 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29556 }
29557 }
29558 }
29559
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8_small_kernel)29560 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
29561 for (uint32_t n = 16; n <= 24; n += 8) {
29562 for (size_t k = 1; k <= 20; k += 5) {
29563 GemmMicrokernelTester()
29564 .mr(3)
29565 .nr(8)
29566 .kr(1)
29567 .sr(4)
29568 .m(3)
29569 .n(n)
29570 .k(k)
29571 .ks(3)
29572 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29573 }
29574 }
29575 }
29576
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,strided_cm_subtile)29577 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm_subtile) {
29578 for (size_t k = 1; k <= 20; k += 5) {
29579 for (uint32_t n = 1; n <= 8; n++) {
29580 for (uint32_t m = 1; m <= 3; m++) {
29581 GemmMicrokernelTester()
29582 .mr(3)
29583 .nr(8)
29584 .kr(1)
29585 .sr(4)
29586 .m(m)
29587 .n(n)
29588 .k(k)
29589 .cm_stride(11)
29590 .iterations(1)
29591 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29592 }
29593 }
29594 }
29595 }
29596
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,a_offset)29597 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, a_offset) {
29598 for (size_t k = 1; k <= 20; k += 5) {
29599 GemmMicrokernelTester()
29600 .mr(3)
29601 .nr(8)
29602 .kr(1)
29603 .sr(4)
29604 .m(3)
29605 .n(8)
29606 .k(k)
29607 .ks(3)
29608 .a_offset(67)
29609 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29610 }
29611 }
29612
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,zero)29613 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, zero) {
29614 for (size_t k = 1; k <= 20; k += 5) {
29615 for (uint32_t mz = 0; mz < 3; mz++) {
29616 GemmMicrokernelTester()
29617 .mr(3)
29618 .nr(8)
29619 .kr(1)
29620 .sr(4)
29621 .m(3)
29622 .n(8)
29623 .k(k)
29624 .ks(3)
29625 .a_offset(67)
29626 .zero_index(mz)
29627 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29628 }
29629 }
29630 }
29631
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,qmin)29632 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, qmin) {
29633 GemmMicrokernelTester()
29634 .mr(3)
29635 .nr(8)
29636 .kr(1)
29637 .sr(4)
29638 .m(3)
29639 .n(8)
29640 .k(4)
29641 .qmin(128)
29642 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29643 }
29644
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,qmax)29645 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, qmax) {
29646 GemmMicrokernelTester()
29647 .mr(3)
29648 .nr(8)
29649 .kr(1)
29650 .sr(4)
29651 .m(3)
29652 .n(8)
29653 .k(4)
29654 .qmax(128)
29655 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29656 }
29657
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM,strided_cm)29658 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm) {
29659 GemmMicrokernelTester()
29660 .mr(3)
29661 .nr(8)
29662 .kr(1)
29663 .sr(4)
29664 .m(3)
29665 .n(8)
29666 .k(4)
29667 .cm_stride(11)
29668 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29669 }
29670 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29671
29672
29673 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4)29674 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4) {
29675 GemmMicrokernelTester()
29676 .mr(3)
29677 .nr(8)
29678 .kr(1)
29679 .sr(4)
29680 .m(3)
29681 .n(8)
29682 .k(4)
29683 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29684 }
29685
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,strided_cn)29686 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cn) {
29687 GemmMicrokernelTester()
29688 .mr(3)
29689 .nr(8)
29690 .kr(1)
29691 .sr(4)
29692 .m(3)
29693 .n(8)
29694 .k(4)
29695 .cn_stride(11)
29696 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29697 }
29698
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4_subtile)29699 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile) {
29700 for (uint32_t n = 1; n <= 8; n++) {
29701 for (uint32_t m = 1; m <= 3; m++) {
29702 GemmMicrokernelTester()
29703 .mr(3)
29704 .nr(8)
29705 .kr(1)
29706 .sr(4)
29707 .m(m)
29708 .n(n)
29709 .k(4)
29710 .iterations(1)
29711 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29712 }
29713 }
29714 }
29715
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4_subtile_m)29716 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
29717 for (uint32_t m = 1; m <= 3; m++) {
29718 GemmMicrokernelTester()
29719 .mr(3)
29720 .nr(8)
29721 .kr(1)
29722 .sr(4)
29723 .m(m)
29724 .n(8)
29725 .k(4)
29726 .iterations(1)
29727 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29728 }
29729 }
29730
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4_subtile_n)29731 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
29732 for (uint32_t n = 1; n <= 8; n++) {
29733 GemmMicrokernelTester()
29734 .mr(3)
29735 .nr(8)
29736 .kr(1)
29737 .sr(4)
29738 .m(3)
29739 .n(n)
29740 .k(4)
29741 .iterations(1)
29742 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29743 }
29744 }
29745
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_lt_4)29746 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4) {
29747 for (size_t k = 1; k < 4; k++) {
29748 GemmMicrokernelTester()
29749 .mr(3)
29750 .nr(8)
29751 .kr(1)
29752 .sr(4)
29753 .m(3)
29754 .n(8)
29755 .k(k)
29756 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29757 }
29758 }
29759
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_lt_4_subtile)29760 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_subtile) {
29761 for (size_t k = 1; k < 4; k++) {
29762 for (uint32_t n = 1; n <= 8; n++) {
29763 for (uint32_t m = 1; m <= 3; m++) {
29764 GemmMicrokernelTester()
29765 .mr(3)
29766 .nr(8)
29767 .kr(1)
29768 .sr(4)
29769 .m(m)
29770 .n(n)
29771 .k(k)
29772 .iterations(1)
29773 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29774 }
29775 }
29776 }
29777 }
29778
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_gt_4)29779 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4) {
29780 for (size_t k = 5; k < 8; k++) {
29781 GemmMicrokernelTester()
29782 .mr(3)
29783 .nr(8)
29784 .kr(1)
29785 .sr(4)
29786 .m(3)
29787 .n(8)
29788 .k(k)
29789 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29790 }
29791 }
29792
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_gt_4_subtile)29793 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_subtile) {
29794 for (size_t k = 5; k < 8; k++) {
29795 for (uint32_t n = 1; n <= 8; n++) {
29796 for (uint32_t m = 1; m <= 3; m++) {
29797 GemmMicrokernelTester()
29798 .mr(3)
29799 .nr(8)
29800 .kr(1)
29801 .sr(4)
29802 .m(m)
29803 .n(n)
29804 .k(k)
29805 .iterations(1)
29806 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29807 }
29808 }
29809 }
29810 }
29811
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_div_4)29812 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_div_4) {
29813 for (size_t k = 8; k <= 40; k += 4) {
29814 GemmMicrokernelTester()
29815 .mr(3)
29816 .nr(8)
29817 .kr(1)
29818 .sr(4)
29819 .m(3)
29820 .n(8)
29821 .k(k)
29822 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29823 }
29824 }
29825
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,k_div_4_subtile)29826 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_subtile) {
29827 for (size_t k = 8; k <= 40; k += 4) {
29828 for (uint32_t n = 1; n <= 8; n++) {
29829 for (uint32_t m = 1; m <= 3; m++) {
29830 GemmMicrokernelTester()
29831 .mr(3)
29832 .nr(8)
29833 .kr(1)
29834 .sr(4)
29835 .m(m)
29836 .n(n)
29837 .k(k)
29838 .iterations(1)
29839 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29840 }
29841 }
29842 }
29843 }
29844
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8)29845 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8) {
29846 for (uint32_t n = 9; n < 16; n++) {
29847 for (size_t k = 1; k <= 20; k += 5) {
29848 GemmMicrokernelTester()
29849 .mr(3)
29850 .nr(8)
29851 .kr(1)
29852 .sr(4)
29853 .m(3)
29854 .n(n)
29855 .k(k)
29856 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29857 }
29858 }
29859 }
29860
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8_strided_cn)29861 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
29862 for (uint32_t n = 9; n < 16; n++) {
29863 for (size_t k = 1; k <= 20; k += 5) {
29864 GemmMicrokernelTester()
29865 .mr(3)
29866 .nr(8)
29867 .kr(1)
29868 .sr(4)
29869 .m(3)
29870 .n(n)
29871 .k(k)
29872 .cn_stride(11)
29873 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29874 }
29875 }
29876 }
29877
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8_subtile)29878 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_subtile) {
29879 for (uint32_t n = 9; n < 16; n++) {
29880 for (size_t k = 1; k <= 20; k += 5) {
29881 for (uint32_t m = 1; m <= 3; m++) {
29882 GemmMicrokernelTester()
29883 .mr(3)
29884 .nr(8)
29885 .kr(1)
29886 .sr(4)
29887 .m(m)
29888 .n(n)
29889 .k(k)
29890 .iterations(1)
29891 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29892 }
29893 }
29894 }
29895 }
29896
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_div_8)29897 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8) {
29898 for (uint32_t n = 16; n <= 24; n += 8) {
29899 for (size_t k = 1; k <= 20; k += 5) {
29900 GemmMicrokernelTester()
29901 .mr(3)
29902 .nr(8)
29903 .kr(1)
29904 .sr(4)
29905 .m(3)
29906 .n(n)
29907 .k(k)
29908 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29909 }
29910 }
29911 }
29912
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_div_8_strided_cn)29913 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
29914 for (uint32_t n = 16; n <= 24; n += 8) {
29915 for (size_t k = 1; k <= 20; k += 5) {
29916 GemmMicrokernelTester()
29917 .mr(3)
29918 .nr(8)
29919 .kr(1)
29920 .sr(4)
29921 .m(3)
29922 .n(n)
29923 .k(k)
29924 .cn_stride(11)
29925 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29926 }
29927 }
29928 }
29929
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_div_8_subtile)29930 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_subtile) {
29931 for (uint32_t n = 16; n <= 24; n += 8) {
29932 for (size_t k = 1; k <= 20; k += 5) {
29933 for (uint32_t m = 1; m <= 3; m++) {
29934 GemmMicrokernelTester()
29935 .mr(3)
29936 .nr(8)
29937 .kr(1)
29938 .sr(4)
29939 .m(m)
29940 .n(n)
29941 .k(k)
29942 .iterations(1)
29943 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29944 }
29945 }
29946 }
29947 }
29948
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,small_kernel)29949 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, small_kernel) {
29950 for (size_t k = 1; k <= 20; k += 5) {
29951 GemmMicrokernelTester()
29952 .mr(3)
29953 .nr(8)
29954 .kr(1)
29955 .sr(4)
29956 .m(3)
29957 .n(8)
29958 .k(k)
29959 .ks(3)
29960 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29961 }
29962 }
29963
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,small_kernel_subtile)29964 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, small_kernel_subtile) {
29965 for (size_t k = 1; k <= 20; k += 5) {
29966 for (uint32_t n = 1; n <= 8; n++) {
29967 for (uint32_t m = 1; m <= 3; m++) {
29968 GemmMicrokernelTester()
29969 .mr(3)
29970 .nr(8)
29971 .kr(1)
29972 .sr(4)
29973 .m(m)
29974 .n(n)
29975 .k(k)
29976 .ks(3)
29977 .iterations(1)
29978 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29979 }
29980 }
29981 }
29982 }
29983
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8_small_kernel)29984 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
29985 for (uint32_t n = 9; n < 16; n++) {
29986 for (size_t k = 1; k <= 20; k += 5) {
29987 GemmMicrokernelTester()
29988 .mr(3)
29989 .nr(8)
29990 .kr(1)
29991 .sr(4)
29992 .m(3)
29993 .n(n)
29994 .k(k)
29995 .ks(3)
29996 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29997 }
29998 }
29999 }
30000
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,n_div_8_small_kernel)30001 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
30002 for (uint32_t n = 16; n <= 24; n += 8) {
30003 for (size_t k = 1; k <= 20; k += 5) {
30004 GemmMicrokernelTester()
30005 .mr(3)
30006 .nr(8)
30007 .kr(1)
30008 .sr(4)
30009 .m(3)
30010 .n(n)
30011 .k(k)
30012 .ks(3)
30013 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30014 }
30015 }
30016 }
30017
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,strided_cm_subtile)30018 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cm_subtile) {
30019 for (size_t k = 1; k <= 20; k += 5) {
30020 for (uint32_t n = 1; n <= 8; n++) {
30021 for (uint32_t m = 1; m <= 3; m++) {
30022 GemmMicrokernelTester()
30023 .mr(3)
30024 .nr(8)
30025 .kr(1)
30026 .sr(4)
30027 .m(m)
30028 .n(n)
30029 .k(k)
30030 .cm_stride(11)
30031 .iterations(1)
30032 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30033 }
30034 }
30035 }
30036 }
30037
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,a_offset)30038 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, a_offset) {
30039 for (size_t k = 1; k <= 20; k += 5) {
30040 GemmMicrokernelTester()
30041 .mr(3)
30042 .nr(8)
30043 .kr(1)
30044 .sr(4)
30045 .m(3)
30046 .n(8)
30047 .k(k)
30048 .ks(3)
30049 .a_offset(67)
30050 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30051 }
30052 }
30053
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,zero)30054 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, zero) {
30055 for (size_t k = 1; k <= 20; k += 5) {
30056 for (uint32_t mz = 0; mz < 3; mz++) {
30057 GemmMicrokernelTester()
30058 .mr(3)
30059 .nr(8)
30060 .kr(1)
30061 .sr(4)
30062 .m(3)
30063 .n(8)
30064 .k(k)
30065 .ks(3)
30066 .a_offset(67)
30067 .zero_index(mz)
30068 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30069 }
30070 }
30071 }
30072
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,qmin)30073 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, qmin) {
30074 GemmMicrokernelTester()
30075 .mr(3)
30076 .nr(8)
30077 .kr(1)
30078 .sr(4)
30079 .m(3)
30080 .n(8)
30081 .k(4)
30082 .qmin(128)
30083 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30084 }
30085
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,qmax)30086 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, qmax) {
30087 GemmMicrokernelTester()
30088 .mr(3)
30089 .nr(8)
30090 .kr(1)
30091 .sr(4)
30092 .m(3)
30093 .n(8)
30094 .k(4)
30095 .qmax(128)
30096 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30097 }
30098
TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86,strided_cm)30099 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cm) {
30100 GemmMicrokernelTester()
30101 .mr(3)
30102 .nr(8)
30103 .kr(1)
30104 .sr(4)
30105 .m(3)
30106 .n(8)
30107 .k(4)
30108 .cm_stride(11)
30109 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30110 }
30111 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30112
30113
30114 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_eq_4)30115 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4) {
30116 GemmMicrokernelTester()
30117 .mr(4)
30118 .nr(2)
30119 .kr(4)
30120 .sr(1)
30121 .m(4)
30122 .n(2)
30123 .k(4)
30124 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30125 }
30126
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,strided_cn)30127 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cn) {
30128 GemmMicrokernelTester()
30129 .mr(4)
30130 .nr(2)
30131 .kr(4)
30132 .sr(1)
30133 .m(4)
30134 .n(2)
30135 .k(4)
30136 .cn_stride(5)
30137 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30138 }
30139
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_eq_4_subtile)30140 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile) {
30141 for (uint32_t n = 1; n <= 2; n++) {
30142 for (uint32_t m = 1; m <= 4; m++) {
30143 GemmMicrokernelTester()
30144 .mr(4)
30145 .nr(2)
30146 .kr(4)
30147 .sr(1)
30148 .m(m)
30149 .n(n)
30150 .k(4)
30151 .iterations(1)
30152 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30153 }
30154 }
30155 }
30156
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_eq_4_subtile_m)30157 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile_m) {
30158 for (uint32_t m = 1; m <= 4; m++) {
30159 GemmMicrokernelTester()
30160 .mr(4)
30161 .nr(2)
30162 .kr(4)
30163 .sr(1)
30164 .m(m)
30165 .n(2)
30166 .k(4)
30167 .iterations(1)
30168 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30169 }
30170 }
30171
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_eq_4_subtile_n)30172 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile_n) {
30173 for (uint32_t n = 1; n <= 2; n++) {
30174 GemmMicrokernelTester()
30175 .mr(4)
30176 .nr(2)
30177 .kr(4)
30178 .sr(1)
30179 .m(4)
30180 .n(n)
30181 .k(4)
30182 .iterations(1)
30183 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30184 }
30185 }
30186
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_lt_4)30187 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_lt_4) {
30188 for (size_t k = 1; k < 4; k++) {
30189 GemmMicrokernelTester()
30190 .mr(4)
30191 .nr(2)
30192 .kr(4)
30193 .sr(1)
30194 .m(4)
30195 .n(2)
30196 .k(k)
30197 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30198 }
30199 }
30200
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_lt_4_subtile)30201 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_lt_4_subtile) {
30202 for (size_t k = 1; k < 4; k++) {
30203 for (uint32_t n = 1; n <= 2; n++) {
30204 for (uint32_t m = 1; m <= 4; m++) {
30205 GemmMicrokernelTester()
30206 .mr(4)
30207 .nr(2)
30208 .kr(4)
30209 .sr(1)
30210 .m(m)
30211 .n(n)
30212 .k(k)
30213 .iterations(1)
30214 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30215 }
30216 }
30217 }
30218 }
30219
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_gt_4)30220 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_gt_4) {
30221 for (size_t k = 5; k < 8; k++) {
30222 GemmMicrokernelTester()
30223 .mr(4)
30224 .nr(2)
30225 .kr(4)
30226 .sr(1)
30227 .m(4)
30228 .n(2)
30229 .k(k)
30230 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30231 }
30232 }
30233
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_gt_4_subtile)30234 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_gt_4_subtile) {
30235 for (size_t k = 5; k < 8; k++) {
30236 for (uint32_t n = 1; n <= 2; n++) {
30237 for (uint32_t m = 1; m <= 4; m++) {
30238 GemmMicrokernelTester()
30239 .mr(4)
30240 .nr(2)
30241 .kr(4)
30242 .sr(1)
30243 .m(m)
30244 .n(n)
30245 .k(k)
30246 .iterations(1)
30247 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30248 }
30249 }
30250 }
30251 }
30252
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_div_4)30253 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_div_4) {
30254 for (size_t k = 8; k <= 40; k += 4) {
30255 GemmMicrokernelTester()
30256 .mr(4)
30257 .nr(2)
30258 .kr(4)
30259 .sr(1)
30260 .m(4)
30261 .n(2)
30262 .k(k)
30263 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30264 }
30265 }
30266
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,k_div_4_subtile)30267 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_div_4_subtile) {
30268 for (size_t k = 8; k <= 40; k += 4) {
30269 for (uint32_t n = 1; n <= 2; n++) {
30270 for (uint32_t m = 1; m <= 4; m++) {
30271 GemmMicrokernelTester()
30272 .mr(4)
30273 .nr(2)
30274 .kr(4)
30275 .sr(1)
30276 .m(m)
30277 .n(n)
30278 .k(k)
30279 .iterations(1)
30280 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30281 }
30282 }
30283 }
30284 }
30285
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_gt_2)30286 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2) {
30287 for (uint32_t n = 3; n < 4; n++) {
30288 for (size_t k = 1; k <= 20; k += 5) {
30289 GemmMicrokernelTester()
30290 .mr(4)
30291 .nr(2)
30292 .kr(4)
30293 .sr(1)
30294 .m(4)
30295 .n(n)
30296 .k(k)
30297 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30298 }
30299 }
30300 }
30301
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_gt_2_strided_cn)30302 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_strided_cn) {
30303 for (uint32_t n = 3; n < 4; n++) {
30304 for (size_t k = 1; k <= 20; k += 5) {
30305 GemmMicrokernelTester()
30306 .mr(4)
30307 .nr(2)
30308 .kr(4)
30309 .sr(1)
30310 .m(4)
30311 .n(n)
30312 .k(k)
30313 .cn_stride(5)
30314 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30315 }
30316 }
30317 }
30318
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_gt_2_subtile)30319 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_subtile) {
30320 for (uint32_t n = 3; n < 4; n++) {
30321 for (size_t k = 1; k <= 20; k += 5) {
30322 for (uint32_t m = 1; m <= 4; m++) {
30323 GemmMicrokernelTester()
30324 .mr(4)
30325 .nr(2)
30326 .kr(4)
30327 .sr(1)
30328 .m(m)
30329 .n(n)
30330 .k(k)
30331 .iterations(1)
30332 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30333 }
30334 }
30335 }
30336 }
30337
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_div_2)30338 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2) {
30339 for (uint32_t n = 4; n <= 6; n += 2) {
30340 for (size_t k = 1; k <= 20; k += 5) {
30341 GemmMicrokernelTester()
30342 .mr(4)
30343 .nr(2)
30344 .kr(4)
30345 .sr(1)
30346 .m(4)
30347 .n(n)
30348 .k(k)
30349 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30350 }
30351 }
30352 }
30353
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_div_2_strided_cn)30354 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_strided_cn) {
30355 for (uint32_t n = 4; n <= 6; n += 2) {
30356 for (size_t k = 1; k <= 20; k += 5) {
30357 GemmMicrokernelTester()
30358 .mr(4)
30359 .nr(2)
30360 .kr(4)
30361 .sr(1)
30362 .m(4)
30363 .n(n)
30364 .k(k)
30365 .cn_stride(5)
30366 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30367 }
30368 }
30369 }
30370
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_div_2_subtile)30371 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_subtile) {
30372 for (uint32_t n = 4; n <= 6; n += 2) {
30373 for (size_t k = 1; k <= 20; k += 5) {
30374 for (uint32_t m = 1; m <= 4; m++) {
30375 GemmMicrokernelTester()
30376 .mr(4)
30377 .nr(2)
30378 .kr(4)
30379 .sr(1)
30380 .m(m)
30381 .n(n)
30382 .k(k)
30383 .iterations(1)
30384 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30385 }
30386 }
30387 }
30388 }
30389
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,small_kernel)30390 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, small_kernel) {
30391 for (size_t k = 1; k <= 20; k += 5) {
30392 GemmMicrokernelTester()
30393 .mr(4)
30394 .nr(2)
30395 .kr(4)
30396 .sr(1)
30397 .m(4)
30398 .n(2)
30399 .k(k)
30400 .ks(3)
30401 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30402 }
30403 }
30404
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,small_kernel_subtile)30405 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, small_kernel_subtile) {
30406 for (size_t k = 1; k <= 20; k += 5) {
30407 for (uint32_t n = 1; n <= 2; n++) {
30408 for (uint32_t m = 1; m <= 4; m++) {
30409 GemmMicrokernelTester()
30410 .mr(4)
30411 .nr(2)
30412 .kr(4)
30413 .sr(1)
30414 .m(m)
30415 .n(n)
30416 .k(k)
30417 .ks(3)
30418 .iterations(1)
30419 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30420 }
30421 }
30422 }
30423 }
30424
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_gt_2_small_kernel)30425 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_small_kernel) {
30426 for (uint32_t n = 3; n < 4; n++) {
30427 for (size_t k = 1; k <= 20; k += 5) {
30428 GemmMicrokernelTester()
30429 .mr(4)
30430 .nr(2)
30431 .kr(4)
30432 .sr(1)
30433 .m(4)
30434 .n(n)
30435 .k(k)
30436 .ks(3)
30437 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30438 }
30439 }
30440 }
30441
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,n_div_2_small_kernel)30442 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_small_kernel) {
30443 for (uint32_t n = 4; n <= 6; n += 2) {
30444 for (size_t k = 1; k <= 20; k += 5) {
30445 GemmMicrokernelTester()
30446 .mr(4)
30447 .nr(2)
30448 .kr(4)
30449 .sr(1)
30450 .m(4)
30451 .n(n)
30452 .k(k)
30453 .ks(3)
30454 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30455 }
30456 }
30457 }
30458
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,strided_cm_subtile)30459 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cm_subtile) {
30460 for (size_t k = 1; k <= 20; k += 5) {
30461 for (uint32_t n = 1; n <= 2; n++) {
30462 for (uint32_t m = 1; m <= 4; m++) {
30463 GemmMicrokernelTester()
30464 .mr(4)
30465 .nr(2)
30466 .kr(4)
30467 .sr(1)
30468 .m(m)
30469 .n(n)
30470 .k(k)
30471 .cm_stride(5)
30472 .iterations(1)
30473 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30474 }
30475 }
30476 }
30477 }
30478
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,a_offset)30479 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, a_offset) {
30480 for (size_t k = 1; k <= 20; k += 5) {
30481 GemmMicrokernelTester()
30482 .mr(4)
30483 .nr(2)
30484 .kr(4)
30485 .sr(1)
30486 .m(4)
30487 .n(2)
30488 .k(k)
30489 .ks(3)
30490 .a_offset(83)
30491 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30492 }
30493 }
30494
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,zero)30495 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, zero) {
30496 for (size_t k = 1; k <= 20; k += 5) {
30497 for (uint32_t mz = 0; mz < 4; mz++) {
30498 GemmMicrokernelTester()
30499 .mr(4)
30500 .nr(2)
30501 .kr(4)
30502 .sr(1)
30503 .m(4)
30504 .n(2)
30505 .k(k)
30506 .ks(3)
30507 .a_offset(83)
30508 .zero_index(mz)
30509 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30510 }
30511 }
30512 }
30513
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,qmin)30514 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, qmin) {
30515 GemmMicrokernelTester()
30516 .mr(4)
30517 .nr(2)
30518 .kr(4)
30519 .sr(1)
30520 .m(4)
30521 .n(2)
30522 .k(4)
30523 .qmin(128)
30524 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30525 }
30526
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,qmax)30527 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, qmax) {
30528 GemmMicrokernelTester()
30529 .mr(4)
30530 .nr(2)
30531 .kr(4)
30532 .sr(1)
30533 .m(4)
30534 .n(2)
30535 .k(4)
30536 .qmax(128)
30537 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30538 }
30539
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM,strided_cm)30540 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cm) {
30541 GemmMicrokernelTester()
30542 .mr(4)
30543 .nr(2)
30544 .kr(4)
30545 .sr(1)
30546 .m(4)
30547 .n(2)
30548 .k(4)
30549 .cm_stride(5)
30550 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30551 }
30552 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30553
30554
30555 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_eq_4)30556 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4) {
30557 GemmMicrokernelTester()
30558 .mr(4)
30559 .nr(2)
30560 .kr(4)
30561 .sr(1)
30562 .m(4)
30563 .n(2)
30564 .k(4)
30565 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30566 }
30567
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,strided_cn)30568 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cn) {
30569 GemmMicrokernelTester()
30570 .mr(4)
30571 .nr(2)
30572 .kr(4)
30573 .sr(1)
30574 .m(4)
30575 .n(2)
30576 .k(4)
30577 .cn_stride(5)
30578 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30579 }
30580
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_eq_4_subtile)30581 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile) {
30582 for (uint32_t n = 1; n <= 2; n++) {
30583 for (uint32_t m = 1; m <= 4; m++) {
30584 GemmMicrokernelTester()
30585 .mr(4)
30586 .nr(2)
30587 .kr(4)
30588 .sr(1)
30589 .m(m)
30590 .n(n)
30591 .k(4)
30592 .iterations(1)
30593 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30594 }
30595 }
30596 }
30597
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_eq_4_subtile_m)30598 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile_m) {
30599 for (uint32_t m = 1; m <= 4; m++) {
30600 GemmMicrokernelTester()
30601 .mr(4)
30602 .nr(2)
30603 .kr(4)
30604 .sr(1)
30605 .m(m)
30606 .n(2)
30607 .k(4)
30608 .iterations(1)
30609 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30610 }
30611 }
30612
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_eq_4_subtile_n)30613 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile_n) {
30614 for (uint32_t n = 1; n <= 2; n++) {
30615 GemmMicrokernelTester()
30616 .mr(4)
30617 .nr(2)
30618 .kr(4)
30619 .sr(1)
30620 .m(4)
30621 .n(n)
30622 .k(4)
30623 .iterations(1)
30624 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30625 }
30626 }
30627
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_lt_4)30628 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_lt_4) {
30629 for (size_t k = 1; k < 4; k++) {
30630 GemmMicrokernelTester()
30631 .mr(4)
30632 .nr(2)
30633 .kr(4)
30634 .sr(1)
30635 .m(4)
30636 .n(2)
30637 .k(k)
30638 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30639 }
30640 }
30641
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_lt_4_subtile)30642 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_lt_4_subtile) {
30643 for (size_t k = 1; k < 4; k++) {
30644 for (uint32_t n = 1; n <= 2; n++) {
30645 for (uint32_t m = 1; m <= 4; m++) {
30646 GemmMicrokernelTester()
30647 .mr(4)
30648 .nr(2)
30649 .kr(4)
30650 .sr(1)
30651 .m(m)
30652 .n(n)
30653 .k(k)
30654 .iterations(1)
30655 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30656 }
30657 }
30658 }
30659 }
30660
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_gt_4)30661 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_gt_4) {
30662 for (size_t k = 5; k < 8; k++) {
30663 GemmMicrokernelTester()
30664 .mr(4)
30665 .nr(2)
30666 .kr(4)
30667 .sr(1)
30668 .m(4)
30669 .n(2)
30670 .k(k)
30671 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30672 }
30673 }
30674
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_gt_4_subtile)30675 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_gt_4_subtile) {
30676 for (size_t k = 5; k < 8; k++) {
30677 for (uint32_t n = 1; n <= 2; n++) {
30678 for (uint32_t m = 1; m <= 4; m++) {
30679 GemmMicrokernelTester()
30680 .mr(4)
30681 .nr(2)
30682 .kr(4)
30683 .sr(1)
30684 .m(m)
30685 .n(n)
30686 .k(k)
30687 .iterations(1)
30688 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30689 }
30690 }
30691 }
30692 }
30693
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_div_4)30694 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_div_4) {
30695 for (size_t k = 8; k <= 40; k += 4) {
30696 GemmMicrokernelTester()
30697 .mr(4)
30698 .nr(2)
30699 .kr(4)
30700 .sr(1)
30701 .m(4)
30702 .n(2)
30703 .k(k)
30704 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30705 }
30706 }
30707
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,k_div_4_subtile)30708 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_div_4_subtile) {
30709 for (size_t k = 8; k <= 40; k += 4) {
30710 for (uint32_t n = 1; n <= 2; n++) {
30711 for (uint32_t m = 1; m <= 4; m++) {
30712 GemmMicrokernelTester()
30713 .mr(4)
30714 .nr(2)
30715 .kr(4)
30716 .sr(1)
30717 .m(m)
30718 .n(n)
30719 .k(k)
30720 .iterations(1)
30721 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30722 }
30723 }
30724 }
30725 }
30726
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_gt_2)30727 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2) {
30728 for (uint32_t n = 3; n < 4; n++) {
30729 for (size_t k = 1; k <= 20; k += 5) {
30730 GemmMicrokernelTester()
30731 .mr(4)
30732 .nr(2)
30733 .kr(4)
30734 .sr(1)
30735 .m(4)
30736 .n(n)
30737 .k(k)
30738 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30739 }
30740 }
30741 }
30742
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_gt_2_strided_cn)30743 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_strided_cn) {
30744 for (uint32_t n = 3; n < 4; n++) {
30745 for (size_t k = 1; k <= 20; k += 5) {
30746 GemmMicrokernelTester()
30747 .mr(4)
30748 .nr(2)
30749 .kr(4)
30750 .sr(1)
30751 .m(4)
30752 .n(n)
30753 .k(k)
30754 .cn_stride(5)
30755 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30756 }
30757 }
30758 }
30759
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_gt_2_subtile)30760 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_subtile) {
30761 for (uint32_t n = 3; n < 4; n++) {
30762 for (size_t k = 1; k <= 20; k += 5) {
30763 for (uint32_t m = 1; m <= 4; m++) {
30764 GemmMicrokernelTester()
30765 .mr(4)
30766 .nr(2)
30767 .kr(4)
30768 .sr(1)
30769 .m(m)
30770 .n(n)
30771 .k(k)
30772 .iterations(1)
30773 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30774 }
30775 }
30776 }
30777 }
30778
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_div_2)30779 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2) {
30780 for (uint32_t n = 4; n <= 6; n += 2) {
30781 for (size_t k = 1; k <= 20; k += 5) {
30782 GemmMicrokernelTester()
30783 .mr(4)
30784 .nr(2)
30785 .kr(4)
30786 .sr(1)
30787 .m(4)
30788 .n(n)
30789 .k(k)
30790 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30791 }
30792 }
30793 }
30794
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_div_2_strided_cn)30795 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_strided_cn) {
30796 for (uint32_t n = 4; n <= 6; n += 2) {
30797 for (size_t k = 1; k <= 20; k += 5) {
30798 GemmMicrokernelTester()
30799 .mr(4)
30800 .nr(2)
30801 .kr(4)
30802 .sr(1)
30803 .m(4)
30804 .n(n)
30805 .k(k)
30806 .cn_stride(5)
30807 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30808 }
30809 }
30810 }
30811
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_div_2_subtile)30812 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_subtile) {
30813 for (uint32_t n = 4; n <= 6; n += 2) {
30814 for (size_t k = 1; k <= 20; k += 5) {
30815 for (uint32_t m = 1; m <= 4; m++) {
30816 GemmMicrokernelTester()
30817 .mr(4)
30818 .nr(2)
30819 .kr(4)
30820 .sr(1)
30821 .m(m)
30822 .n(n)
30823 .k(k)
30824 .iterations(1)
30825 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30826 }
30827 }
30828 }
30829 }
30830
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,small_kernel)30831 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, small_kernel) {
30832 for (size_t k = 1; k <= 20; k += 5) {
30833 GemmMicrokernelTester()
30834 .mr(4)
30835 .nr(2)
30836 .kr(4)
30837 .sr(1)
30838 .m(4)
30839 .n(2)
30840 .k(k)
30841 .ks(3)
30842 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30843 }
30844 }
30845
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,small_kernel_subtile)30846 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, small_kernel_subtile) {
30847 for (size_t k = 1; k <= 20; k += 5) {
30848 for (uint32_t n = 1; n <= 2; n++) {
30849 for (uint32_t m = 1; m <= 4; m++) {
30850 GemmMicrokernelTester()
30851 .mr(4)
30852 .nr(2)
30853 .kr(4)
30854 .sr(1)
30855 .m(m)
30856 .n(n)
30857 .k(k)
30858 .ks(3)
30859 .iterations(1)
30860 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30861 }
30862 }
30863 }
30864 }
30865
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_gt_2_small_kernel)30866 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_small_kernel) {
30867 for (uint32_t n = 3; n < 4; n++) {
30868 for (size_t k = 1; k <= 20; k += 5) {
30869 GemmMicrokernelTester()
30870 .mr(4)
30871 .nr(2)
30872 .kr(4)
30873 .sr(1)
30874 .m(4)
30875 .n(n)
30876 .k(k)
30877 .ks(3)
30878 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30879 }
30880 }
30881 }
30882
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,n_div_2_small_kernel)30883 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_small_kernel) {
30884 for (uint32_t n = 4; n <= 6; n += 2) {
30885 for (size_t k = 1; k <= 20; k += 5) {
30886 GemmMicrokernelTester()
30887 .mr(4)
30888 .nr(2)
30889 .kr(4)
30890 .sr(1)
30891 .m(4)
30892 .n(n)
30893 .k(k)
30894 .ks(3)
30895 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30896 }
30897 }
30898 }
30899
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,strided_cm_subtile)30900 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cm_subtile) {
30901 for (size_t k = 1; k <= 20; k += 5) {
30902 for (uint32_t n = 1; n <= 2; n++) {
30903 for (uint32_t m = 1; m <= 4; m++) {
30904 GemmMicrokernelTester()
30905 .mr(4)
30906 .nr(2)
30907 .kr(4)
30908 .sr(1)
30909 .m(m)
30910 .n(n)
30911 .k(k)
30912 .cm_stride(5)
30913 .iterations(1)
30914 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30915 }
30916 }
30917 }
30918 }
30919
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,a_offset)30920 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, a_offset) {
30921 for (size_t k = 1; k <= 20; k += 5) {
30922 GemmMicrokernelTester()
30923 .mr(4)
30924 .nr(2)
30925 .kr(4)
30926 .sr(1)
30927 .m(4)
30928 .n(2)
30929 .k(k)
30930 .ks(3)
30931 .a_offset(83)
30932 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30933 }
30934 }
30935
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,zero)30936 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, zero) {
30937 for (size_t k = 1; k <= 20; k += 5) {
30938 for (uint32_t mz = 0; mz < 4; mz++) {
30939 GemmMicrokernelTester()
30940 .mr(4)
30941 .nr(2)
30942 .kr(4)
30943 .sr(1)
30944 .m(4)
30945 .n(2)
30946 .k(k)
30947 .ks(3)
30948 .a_offset(83)
30949 .zero_index(mz)
30950 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30951 }
30952 }
30953 }
30954
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,qmin)30955 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, qmin) {
30956 GemmMicrokernelTester()
30957 .mr(4)
30958 .nr(2)
30959 .kr(4)
30960 .sr(1)
30961 .m(4)
30962 .n(2)
30963 .k(4)
30964 .qmin(128)
30965 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30966 }
30967
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,qmax)30968 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, qmax) {
30969 GemmMicrokernelTester()
30970 .mr(4)
30971 .nr(2)
30972 .kr(4)
30973 .sr(1)
30974 .m(4)
30975 .n(2)
30976 .k(4)
30977 .qmax(128)
30978 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30979 }
30980
TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86,strided_cm)30981 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cm) {
30982 GemmMicrokernelTester()
30983 .mr(4)
30984 .nr(2)
30985 .kr(4)
30986 .sr(1)
30987 .m(4)
30988 .n(2)
30989 .k(4)
30990 .cm_stride(5)
30991 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30992 }
30993 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30994
30995
30996 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)30997 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
30998 GemmMicrokernelTester()
30999 .mr(4)
31000 .nr(8)
31001 .kr(1)
31002 .sr(1)
31003 .m(4)
31004 .n(8)
31005 .k(1)
31006 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31007 }
31008
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,strided_cn)31009 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
31010 GemmMicrokernelTester()
31011 .mr(4)
31012 .nr(8)
31013 .kr(1)
31014 .sr(1)
31015 .m(4)
31016 .n(8)
31017 .k(1)
31018 .cn_stride(11)
31019 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31020 }
31021
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)31022 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
31023 for (uint32_t n = 1; n <= 8; n++) {
31024 for (uint32_t m = 1; m <= 4; m++) {
31025 GemmMicrokernelTester()
31026 .mr(4)
31027 .nr(8)
31028 .kr(1)
31029 .sr(1)
31030 .m(m)
31031 .n(n)
31032 .k(1)
31033 .iterations(1)
31034 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31035 }
31036 }
31037 }
31038
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)31039 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
31040 for (uint32_t m = 1; m <= 4; m++) {
31041 GemmMicrokernelTester()
31042 .mr(4)
31043 .nr(8)
31044 .kr(1)
31045 .sr(1)
31046 .m(m)
31047 .n(8)
31048 .k(1)
31049 .iterations(1)
31050 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31051 }
31052 }
31053
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)31054 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
31055 for (uint32_t n = 1; n <= 8; n++) {
31056 GemmMicrokernelTester()
31057 .mr(4)
31058 .nr(8)
31059 .kr(1)
31060 .sr(1)
31061 .m(4)
31062 .n(n)
31063 .k(1)
31064 .iterations(1)
31065 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31066 }
31067 }
31068
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)31069 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
31070 for (size_t k = 2; k < 10; k++) {
31071 GemmMicrokernelTester()
31072 .mr(4)
31073 .nr(8)
31074 .kr(1)
31075 .sr(1)
31076 .m(4)
31077 .n(8)
31078 .k(k)
31079 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31080 }
31081 }
31082
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)31083 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
31084 for (size_t k = 2; k < 10; k++) {
31085 for (uint32_t n = 1; n <= 8; n++) {
31086 for (uint32_t m = 1; m <= 4; m++) {
31087 GemmMicrokernelTester()
31088 .mr(4)
31089 .nr(8)
31090 .kr(1)
31091 .sr(1)
31092 .m(m)
31093 .n(n)
31094 .k(k)
31095 .iterations(1)
31096 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31097 }
31098 }
31099 }
31100 }
31101
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)31102 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
31103 for (uint32_t n = 9; n < 16; n++) {
31104 for (size_t k = 1; k <= 5; k += 2) {
31105 GemmMicrokernelTester()
31106 .mr(4)
31107 .nr(8)
31108 .kr(1)
31109 .sr(1)
31110 .m(4)
31111 .n(n)
31112 .k(k)
31113 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31114 }
31115 }
31116 }
31117
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)31118 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
31119 for (uint32_t n = 9; n < 16; n++) {
31120 for (size_t k = 1; k <= 5; k += 2) {
31121 GemmMicrokernelTester()
31122 .mr(4)
31123 .nr(8)
31124 .kr(1)
31125 .sr(1)
31126 .m(4)
31127 .n(n)
31128 .k(k)
31129 .cn_stride(11)
31130 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31131 }
31132 }
31133 }
31134
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)31135 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
31136 for (uint32_t n = 9; n < 16; n++) {
31137 for (size_t k = 1; k <= 5; k += 2) {
31138 for (uint32_t m = 1; m <= 4; m++) {
31139 GemmMicrokernelTester()
31140 .mr(4)
31141 .nr(8)
31142 .kr(1)
31143 .sr(1)
31144 .m(m)
31145 .n(n)
31146 .k(k)
31147 .iterations(1)
31148 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31149 }
31150 }
31151 }
31152 }
31153
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8)31154 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
31155 for (uint32_t n = 16; n <= 24; n += 8) {
31156 for (size_t k = 1; k <= 5; k += 2) {
31157 GemmMicrokernelTester()
31158 .mr(4)
31159 .nr(8)
31160 .kr(1)
31161 .sr(1)
31162 .m(4)
31163 .n(n)
31164 .k(k)
31165 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31166 }
31167 }
31168 }
31169
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)31170 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
31171 for (uint32_t n = 16; n <= 24; n += 8) {
31172 for (size_t k = 1; k <= 5; k += 2) {
31173 GemmMicrokernelTester()
31174 .mr(4)
31175 .nr(8)
31176 .kr(1)
31177 .sr(1)
31178 .m(4)
31179 .n(n)
31180 .k(k)
31181 .cn_stride(11)
31182 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31183 }
31184 }
31185 }
31186
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)31187 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
31188 for (uint32_t n = 16; n <= 24; n += 8) {
31189 for (size_t k = 1; k <= 5; k += 2) {
31190 for (uint32_t m = 1; m <= 4; m++) {
31191 GemmMicrokernelTester()
31192 .mr(4)
31193 .nr(8)
31194 .kr(1)
31195 .sr(1)
31196 .m(m)
31197 .n(n)
31198 .k(k)
31199 .iterations(1)
31200 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31201 }
31202 }
31203 }
31204 }
31205
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,small_kernel)31206 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
31207 for (size_t k = 1; k <= 5; k += 2) {
31208 GemmMicrokernelTester()
31209 .mr(4)
31210 .nr(8)
31211 .kr(1)
31212 .sr(1)
31213 .m(4)
31214 .n(8)
31215 .k(k)
31216 .ks(3)
31217 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31218 }
31219 }
31220
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,small_kernel_subtile)31221 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
31222 for (size_t k = 1; k <= 5; k += 2) {
31223 for (uint32_t n = 1; n <= 8; n++) {
31224 for (uint32_t m = 1; m <= 4; m++) {
31225 GemmMicrokernelTester()
31226 .mr(4)
31227 .nr(8)
31228 .kr(1)
31229 .sr(1)
31230 .m(m)
31231 .n(n)
31232 .k(k)
31233 .ks(3)
31234 .iterations(1)
31235 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31236 }
31237 }
31238 }
31239 }
31240
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_small_kernel)31241 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
31242 for (uint32_t n = 9; n < 16; n++) {
31243 for (size_t k = 1; k <= 5; k += 2) {
31244 GemmMicrokernelTester()
31245 .mr(4)
31246 .nr(8)
31247 .kr(1)
31248 .sr(1)
31249 .m(4)
31250 .n(n)
31251 .k(k)
31252 .ks(3)
31253 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31254 }
31255 }
31256 }
31257
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8_small_kernel)31258 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
31259 for (uint32_t n = 16; n <= 24; n += 8) {
31260 for (size_t k = 1; k <= 5; k += 2) {
31261 GemmMicrokernelTester()
31262 .mr(4)
31263 .nr(8)
31264 .kr(1)
31265 .sr(1)
31266 .m(4)
31267 .n(n)
31268 .k(k)
31269 .ks(3)
31270 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31271 }
31272 }
31273 }
31274
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)31275 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
31276 for (size_t k = 1; k <= 5; k += 2) {
31277 for (uint32_t n = 1; n <= 8; n++) {
31278 for (uint32_t m = 1; m <= 4; m++) {
31279 GemmMicrokernelTester()
31280 .mr(4)
31281 .nr(8)
31282 .kr(1)
31283 .sr(1)
31284 .m(m)
31285 .n(n)
31286 .k(k)
31287 .cm_stride(11)
31288 .iterations(1)
31289 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31290 }
31291 }
31292 }
31293 }
31294
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,a_offset)31295 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
31296 for (size_t k = 1; k <= 5; k += 2) {
31297 GemmMicrokernelTester()
31298 .mr(4)
31299 .nr(8)
31300 .kr(1)
31301 .sr(1)
31302 .m(4)
31303 .n(8)
31304 .k(k)
31305 .ks(3)
31306 .a_offset(23)
31307 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31308 }
31309 }
31310
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,zero)31311 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, zero) {
31312 for (size_t k = 1; k <= 5; k += 2) {
31313 for (uint32_t mz = 0; mz < 4; mz++) {
31314 GemmMicrokernelTester()
31315 .mr(4)
31316 .nr(8)
31317 .kr(1)
31318 .sr(1)
31319 .m(4)
31320 .n(8)
31321 .k(k)
31322 .ks(3)
31323 .a_offset(23)
31324 .zero_index(mz)
31325 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31326 }
31327 }
31328 }
31329
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,qmin)31330 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmin) {
31331 GemmMicrokernelTester()
31332 .mr(4)
31333 .nr(8)
31334 .kr(1)
31335 .sr(1)
31336 .m(4)
31337 .n(8)
31338 .k(1)
31339 .qmin(128)
31340 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31341 }
31342
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,qmax)31343 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmax) {
31344 GemmMicrokernelTester()
31345 .mr(4)
31346 .nr(8)
31347 .kr(1)
31348 .sr(1)
31349 .m(4)
31350 .n(8)
31351 .k(1)
31352 .qmax(128)
31353 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31354 }
31355
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,strided_cm)31356 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
31357 GemmMicrokernelTester()
31358 .mr(4)
31359 .nr(8)
31360 .kr(1)
31361 .sr(1)
31362 .m(4)
31363 .n(8)
31364 .k(1)
31365 .cm_stride(11)
31366 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31367 }
31368 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31369
31370
31371 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)31372 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
31373 GemmMicrokernelTester()
31374 .mr(5)
31375 .nr(8)
31376 .kr(1)
31377 .sr(1)
31378 .m(5)
31379 .n(8)
31380 .k(1)
31381 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31382 }
31383
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)31384 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
31385 GemmMicrokernelTester()
31386 .mr(5)
31387 .nr(8)
31388 .kr(1)
31389 .sr(1)
31390 .m(5)
31391 .n(8)
31392 .k(1)
31393 .cn_stride(11)
31394 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31395 }
31396
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)31397 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
31398 for (uint32_t n = 1; n <= 8; n++) {
31399 for (uint32_t m = 1; m <= 5; m++) {
31400 GemmMicrokernelTester()
31401 .mr(5)
31402 .nr(8)
31403 .kr(1)
31404 .sr(1)
31405 .m(m)
31406 .n(n)
31407 .k(1)
31408 .iterations(1)
31409 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31410 }
31411 }
31412 }
31413
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)31414 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
31415 for (uint32_t m = 1; m <= 5; m++) {
31416 GemmMicrokernelTester()
31417 .mr(5)
31418 .nr(8)
31419 .kr(1)
31420 .sr(1)
31421 .m(m)
31422 .n(8)
31423 .k(1)
31424 .iterations(1)
31425 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31426 }
31427 }
31428
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)31429 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
31430 for (uint32_t n = 1; n <= 8; n++) {
31431 GemmMicrokernelTester()
31432 .mr(5)
31433 .nr(8)
31434 .kr(1)
31435 .sr(1)
31436 .m(5)
31437 .n(n)
31438 .k(1)
31439 .iterations(1)
31440 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31441 }
31442 }
31443
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)31444 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
31445 for (size_t k = 2; k < 10; k++) {
31446 GemmMicrokernelTester()
31447 .mr(5)
31448 .nr(8)
31449 .kr(1)
31450 .sr(1)
31451 .m(5)
31452 .n(8)
31453 .k(k)
31454 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31455 }
31456 }
31457
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)31458 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
31459 for (size_t k = 2; k < 10; k++) {
31460 for (uint32_t n = 1; n <= 8; n++) {
31461 for (uint32_t m = 1; m <= 5; m++) {
31462 GemmMicrokernelTester()
31463 .mr(5)
31464 .nr(8)
31465 .kr(1)
31466 .sr(1)
31467 .m(m)
31468 .n(n)
31469 .k(k)
31470 .iterations(1)
31471 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31472 }
31473 }
31474 }
31475 }
31476
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)31477 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
31478 for (uint32_t n = 9; n < 16; n++) {
31479 for (size_t k = 1; k <= 5; k += 2) {
31480 GemmMicrokernelTester()
31481 .mr(5)
31482 .nr(8)
31483 .kr(1)
31484 .sr(1)
31485 .m(5)
31486 .n(n)
31487 .k(k)
31488 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31489 }
31490 }
31491 }
31492
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)31493 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
31494 for (uint32_t n = 9; n < 16; n++) {
31495 for (size_t k = 1; k <= 5; k += 2) {
31496 GemmMicrokernelTester()
31497 .mr(5)
31498 .nr(8)
31499 .kr(1)
31500 .sr(1)
31501 .m(5)
31502 .n(n)
31503 .k(k)
31504 .cn_stride(11)
31505 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31506 }
31507 }
31508 }
31509
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)31510 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
31511 for (uint32_t n = 9; n < 16; n++) {
31512 for (size_t k = 1; k <= 5; k += 2) {
31513 for (uint32_t m = 1; m <= 5; m++) {
31514 GemmMicrokernelTester()
31515 .mr(5)
31516 .nr(8)
31517 .kr(1)
31518 .sr(1)
31519 .m(m)
31520 .n(n)
31521 .k(k)
31522 .iterations(1)
31523 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31524 }
31525 }
31526 }
31527 }
31528
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)31529 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
31530 for (uint32_t n = 16; n <= 24; n += 8) {
31531 for (size_t k = 1; k <= 5; k += 2) {
31532 GemmMicrokernelTester()
31533 .mr(5)
31534 .nr(8)
31535 .kr(1)
31536 .sr(1)
31537 .m(5)
31538 .n(n)
31539 .k(k)
31540 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31541 }
31542 }
31543 }
31544
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)31545 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
31546 for (uint32_t n = 16; n <= 24; n += 8) {
31547 for (size_t k = 1; k <= 5; k += 2) {
31548 GemmMicrokernelTester()
31549 .mr(5)
31550 .nr(8)
31551 .kr(1)
31552 .sr(1)
31553 .m(5)
31554 .n(n)
31555 .k(k)
31556 .cn_stride(11)
31557 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31558 }
31559 }
31560 }
31561
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)31562 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
31563 for (uint32_t n = 16; n <= 24; n += 8) {
31564 for (size_t k = 1; k <= 5; k += 2) {
31565 for (uint32_t m = 1; m <= 5; m++) {
31566 GemmMicrokernelTester()
31567 .mr(5)
31568 .nr(8)
31569 .kr(1)
31570 .sr(1)
31571 .m(m)
31572 .n(n)
31573 .k(k)
31574 .iterations(1)
31575 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31576 }
31577 }
31578 }
31579 }
31580
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,small_kernel)31581 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
31582 for (size_t k = 1; k <= 5; k += 2) {
31583 GemmMicrokernelTester()
31584 .mr(5)
31585 .nr(8)
31586 .kr(1)
31587 .sr(1)
31588 .m(5)
31589 .n(8)
31590 .k(k)
31591 .ks(3)
31592 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31593 }
31594 }
31595
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,small_kernel_subtile)31596 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
31597 for (size_t k = 1; k <= 5; k += 2) {
31598 for (uint32_t n = 1; n <= 8; n++) {
31599 for (uint32_t m = 1; m <= 5; m++) {
31600 GemmMicrokernelTester()
31601 .mr(5)
31602 .nr(8)
31603 .kr(1)
31604 .sr(1)
31605 .m(m)
31606 .n(n)
31607 .k(k)
31608 .ks(3)
31609 .iterations(1)
31610 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31611 }
31612 }
31613 }
31614 }
31615
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_small_kernel)31616 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
31617 for (uint32_t n = 9; n < 16; n++) {
31618 for (size_t k = 1; k <= 5; k += 2) {
31619 GemmMicrokernelTester()
31620 .mr(5)
31621 .nr(8)
31622 .kr(1)
31623 .sr(1)
31624 .m(5)
31625 .n(n)
31626 .k(k)
31627 .ks(3)
31628 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31629 }
31630 }
31631 }
31632
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_small_kernel)31633 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
31634 for (uint32_t n = 16; n <= 24; n += 8) {
31635 for (size_t k = 1; k <= 5; k += 2) {
31636 GemmMicrokernelTester()
31637 .mr(5)
31638 .nr(8)
31639 .kr(1)
31640 .sr(1)
31641 .m(5)
31642 .n(n)
31643 .k(k)
31644 .ks(3)
31645 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31646 }
31647 }
31648 }
31649
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)31650 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
31651 for (size_t k = 1; k <= 5; k += 2) {
31652 for (uint32_t n = 1; n <= 8; n++) {
31653 for (uint32_t m = 1; m <= 5; m++) {
31654 GemmMicrokernelTester()
31655 .mr(5)
31656 .nr(8)
31657 .kr(1)
31658 .sr(1)
31659 .m(m)
31660 .n(n)
31661 .k(k)
31662 .cm_stride(11)
31663 .iterations(1)
31664 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31665 }
31666 }
31667 }
31668 }
31669
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,a_offset)31670 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
31671 for (size_t k = 1; k <= 5; k += 2) {
31672 GemmMicrokernelTester()
31673 .mr(5)
31674 .nr(8)
31675 .kr(1)
31676 .sr(1)
31677 .m(5)
31678 .n(8)
31679 .k(k)
31680 .ks(3)
31681 .a_offset(29)
31682 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31683 }
31684 }
31685
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,zero)31686 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, zero) {
31687 for (size_t k = 1; k <= 5; k += 2) {
31688 for (uint32_t mz = 0; mz < 5; mz++) {
31689 GemmMicrokernelTester()
31690 .mr(5)
31691 .nr(8)
31692 .kr(1)
31693 .sr(1)
31694 .m(5)
31695 .n(8)
31696 .k(k)
31697 .ks(3)
31698 .a_offset(29)
31699 .zero_index(mz)
31700 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31701 }
31702 }
31703 }
31704
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,qmin)31705 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
31706 GemmMicrokernelTester()
31707 .mr(5)
31708 .nr(8)
31709 .kr(1)
31710 .sr(1)
31711 .m(5)
31712 .n(8)
31713 .k(1)
31714 .qmin(128)
31715 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31716 }
31717
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,qmax)31718 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
31719 GemmMicrokernelTester()
31720 .mr(5)
31721 .nr(8)
31722 .kr(1)
31723 .sr(1)
31724 .m(5)
31725 .n(8)
31726 .k(1)
31727 .qmax(128)
31728 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31729 }
31730
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)31731 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
31732 GemmMicrokernelTester()
31733 .mr(5)
31734 .nr(8)
31735 .kr(1)
31736 .sr(1)
31737 .m(5)
31738 .n(8)
31739 .k(1)
31740 .cm_stride(11)
31741 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31742 }
31743 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31744
31745
31746 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4)31747 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
31748 GemmMicrokernelTester()
31749 .mr(6)
31750 .nr(8)
31751 .kr(1)
31752 .sr(1)
31753 .m(6)
31754 .n(8)
31755 .k(4)
31756 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31757 }
31758
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,strided_cn)31759 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cn) {
31760 GemmMicrokernelTester()
31761 .mr(6)
31762 .nr(8)
31763 .kr(1)
31764 .sr(1)
31765 .m(6)
31766 .n(8)
31767 .k(4)
31768 .cn_stride(11)
31769 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31770 }
31771
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)31772 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
31773 for (uint32_t n = 1; n <= 8; n++) {
31774 for (uint32_t m = 1; m <= 6; m++) {
31775 GemmMicrokernelTester()
31776 .mr(6)
31777 .nr(8)
31778 .kr(1)
31779 .sr(1)
31780 .m(m)
31781 .n(n)
31782 .k(4)
31783 .iterations(1)
31784 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31785 }
31786 }
31787 }
31788
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)31789 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
31790 for (uint32_t m = 1; m <= 6; m++) {
31791 GemmMicrokernelTester()
31792 .mr(6)
31793 .nr(8)
31794 .kr(1)
31795 .sr(1)
31796 .m(m)
31797 .n(8)
31798 .k(4)
31799 .iterations(1)
31800 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31801 }
31802 }
31803
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)31804 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
31805 for (uint32_t n = 1; n <= 8; n++) {
31806 GemmMicrokernelTester()
31807 .mr(6)
31808 .nr(8)
31809 .kr(1)
31810 .sr(1)
31811 .m(6)
31812 .n(n)
31813 .k(4)
31814 .iterations(1)
31815 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31816 }
31817 }
31818
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_lt_4)31819 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
31820 for (size_t k = 1; k < 4; k++) {
31821 GemmMicrokernelTester()
31822 .mr(6)
31823 .nr(8)
31824 .kr(1)
31825 .sr(1)
31826 .m(6)
31827 .n(8)
31828 .k(k)
31829 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31830 }
31831 }
31832
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)31833 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
31834 for (size_t k = 1; k < 4; k++) {
31835 for (uint32_t n = 1; n <= 8; n++) {
31836 for (uint32_t m = 1; m <= 6; m++) {
31837 GemmMicrokernelTester()
31838 .mr(6)
31839 .nr(8)
31840 .kr(1)
31841 .sr(1)
31842 .m(m)
31843 .n(n)
31844 .k(k)
31845 .iterations(1)
31846 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31847 }
31848 }
31849 }
31850 }
31851
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_gt_4)31852 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
31853 for (size_t k = 5; k < 8; k++) {
31854 GemmMicrokernelTester()
31855 .mr(6)
31856 .nr(8)
31857 .kr(1)
31858 .sr(1)
31859 .m(6)
31860 .n(8)
31861 .k(k)
31862 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31863 }
31864 }
31865
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)31866 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
31867 for (size_t k = 5; k < 8; k++) {
31868 for (uint32_t n = 1; n <= 8; n++) {
31869 for (uint32_t m = 1; m <= 6; m++) {
31870 GemmMicrokernelTester()
31871 .mr(6)
31872 .nr(8)
31873 .kr(1)
31874 .sr(1)
31875 .m(m)
31876 .n(n)
31877 .k(k)
31878 .iterations(1)
31879 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31880 }
31881 }
31882 }
31883 }
31884
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_div_4)31885 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4) {
31886 for (size_t k = 8; k <= 40; k += 4) {
31887 GemmMicrokernelTester()
31888 .mr(6)
31889 .nr(8)
31890 .kr(1)
31891 .sr(1)
31892 .m(6)
31893 .n(8)
31894 .k(k)
31895 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31896 }
31897 }
31898
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)31899 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
31900 for (size_t k = 8; k <= 40; k += 4) {
31901 for (uint32_t n = 1; n <= 8; n++) {
31902 for (uint32_t m = 1; m <= 6; m++) {
31903 GemmMicrokernelTester()
31904 .mr(6)
31905 .nr(8)
31906 .kr(1)
31907 .sr(1)
31908 .m(m)
31909 .n(n)
31910 .k(k)
31911 .iterations(1)
31912 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31913 }
31914 }
31915 }
31916 }
31917
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8)31918 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
31919 for (uint32_t n = 9; n < 16; n++) {
31920 for (size_t k = 1; k <= 20; k += 5) {
31921 GemmMicrokernelTester()
31922 .mr(6)
31923 .nr(8)
31924 .kr(1)
31925 .sr(1)
31926 .m(6)
31927 .n(n)
31928 .k(k)
31929 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31930 }
31931 }
31932 }
31933
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)31934 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
31935 for (uint32_t n = 9; n < 16; n++) {
31936 for (size_t k = 1; k <= 20; k += 5) {
31937 GemmMicrokernelTester()
31938 .mr(6)
31939 .nr(8)
31940 .kr(1)
31941 .sr(1)
31942 .m(6)
31943 .n(n)
31944 .k(k)
31945 .cn_stride(11)
31946 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31947 }
31948 }
31949 }
31950
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)31951 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
31952 for (uint32_t n = 9; n < 16; n++) {
31953 for (size_t k = 1; k <= 20; k += 5) {
31954 for (uint32_t m = 1; m <= 6; m++) {
31955 GemmMicrokernelTester()
31956 .mr(6)
31957 .nr(8)
31958 .kr(1)
31959 .sr(1)
31960 .m(m)
31961 .n(n)
31962 .k(k)
31963 .iterations(1)
31964 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31965 }
31966 }
31967 }
31968 }
31969
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8)31970 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8) {
31971 for (uint32_t n = 16; n <= 24; n += 8) {
31972 for (size_t k = 1; k <= 20; k += 5) {
31973 GemmMicrokernelTester()
31974 .mr(6)
31975 .nr(8)
31976 .kr(1)
31977 .sr(1)
31978 .m(6)
31979 .n(n)
31980 .k(k)
31981 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31982 }
31983 }
31984 }
31985
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)31986 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
31987 for (uint32_t n = 16; n <= 24; n += 8) {
31988 for (size_t k = 1; k <= 20; k += 5) {
31989 GemmMicrokernelTester()
31990 .mr(6)
31991 .nr(8)
31992 .kr(1)
31993 .sr(1)
31994 .m(6)
31995 .n(n)
31996 .k(k)
31997 .cn_stride(11)
31998 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
31999 }
32000 }
32001 }
32002
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)32003 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
32004 for (uint32_t n = 16; n <= 24; n += 8) {
32005 for (size_t k = 1; k <= 20; k += 5) {
32006 for (uint32_t m = 1; m <= 6; m++) {
32007 GemmMicrokernelTester()
32008 .mr(6)
32009 .nr(8)
32010 .kr(1)
32011 .sr(1)
32012 .m(m)
32013 .n(n)
32014 .k(k)
32015 .iterations(1)
32016 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32017 }
32018 }
32019 }
32020 }
32021
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,small_kernel)32022 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, small_kernel) {
32023 for (size_t k = 1; k <= 20; k += 5) {
32024 GemmMicrokernelTester()
32025 .mr(6)
32026 .nr(8)
32027 .kr(1)
32028 .sr(1)
32029 .m(6)
32030 .n(8)
32031 .k(k)
32032 .ks(3)
32033 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32034 }
32035 }
32036
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,small_kernel_subtile)32037 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
32038 for (size_t k = 1; k <= 20; k += 5) {
32039 for (uint32_t n = 1; n <= 8; n++) {
32040 for (uint32_t m = 1; m <= 6; m++) {
32041 GemmMicrokernelTester()
32042 .mr(6)
32043 .nr(8)
32044 .kr(1)
32045 .sr(1)
32046 .m(m)
32047 .n(n)
32048 .k(k)
32049 .ks(3)
32050 .iterations(1)
32051 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32052 }
32053 }
32054 }
32055 }
32056
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8_small_kernel)32057 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
32058 for (uint32_t n = 9; n < 16; n++) {
32059 for (size_t k = 1; k <= 20; k += 5) {
32060 GemmMicrokernelTester()
32061 .mr(6)
32062 .nr(8)
32063 .kr(1)
32064 .sr(1)
32065 .m(6)
32066 .n(n)
32067 .k(k)
32068 .ks(3)
32069 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32070 }
32071 }
32072 }
32073
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8_small_kernel)32074 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
32075 for (uint32_t n = 16; n <= 24; n += 8) {
32076 for (size_t k = 1; k <= 20; k += 5) {
32077 GemmMicrokernelTester()
32078 .mr(6)
32079 .nr(8)
32080 .kr(1)
32081 .sr(1)
32082 .m(6)
32083 .n(n)
32084 .k(k)
32085 .ks(3)
32086 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32087 }
32088 }
32089 }
32090
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)32091 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
32092 for (size_t k = 1; k <= 20; k += 5) {
32093 for (uint32_t n = 1; n <= 8; n++) {
32094 for (uint32_t m = 1; m <= 6; m++) {
32095 GemmMicrokernelTester()
32096 .mr(6)
32097 .nr(8)
32098 .kr(1)
32099 .sr(1)
32100 .m(m)
32101 .n(n)
32102 .k(k)
32103 .cm_stride(11)
32104 .iterations(1)
32105 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32106 }
32107 }
32108 }
32109 }
32110
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,a_offset)32111 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, a_offset) {
32112 for (size_t k = 1; k <= 20; k += 5) {
32113 GemmMicrokernelTester()
32114 .mr(6)
32115 .nr(8)
32116 .kr(1)
32117 .sr(1)
32118 .m(6)
32119 .n(8)
32120 .k(k)
32121 .ks(3)
32122 .a_offset(127)
32123 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32124 }
32125 }
32126
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,zero)32127 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, zero) {
32128 for (size_t k = 1; k <= 20; k += 5) {
32129 for (uint32_t mz = 0; mz < 6; mz++) {
32130 GemmMicrokernelTester()
32131 .mr(6)
32132 .nr(8)
32133 .kr(1)
32134 .sr(1)
32135 .m(6)
32136 .n(8)
32137 .k(k)
32138 .ks(3)
32139 .a_offset(127)
32140 .zero_index(mz)
32141 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32142 }
32143 }
32144 }
32145
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,qmin)32146 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmin) {
32147 GemmMicrokernelTester()
32148 .mr(6)
32149 .nr(8)
32150 .kr(1)
32151 .sr(1)
32152 .m(6)
32153 .n(8)
32154 .k(4)
32155 .qmin(128)
32156 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32157 }
32158
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,qmax)32159 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmax) {
32160 GemmMicrokernelTester()
32161 .mr(6)
32162 .nr(8)
32163 .kr(1)
32164 .sr(1)
32165 .m(6)
32166 .n(8)
32167 .k(4)
32168 .qmax(128)
32169 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32170 }
32171
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT,strided_cm)32172 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm) {
32173 GemmMicrokernelTester()
32174 .mr(6)
32175 .nr(8)
32176 .kr(1)
32177 .sr(1)
32178 .m(6)
32179 .n(8)
32180 .k(4)
32181 .cm_stride(11)
32182 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
32183 }
32184 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32185
32186
32187 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)32188 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
32189 GemmMicrokernelTester()
32190 .mr(6)
32191 .nr(8)
32192 .kr(1)
32193 .sr(1)
32194 .m(6)
32195 .n(8)
32196 .k(1)
32197 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32198 }
32199
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,strided_cn)32200 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
32201 GemmMicrokernelTester()
32202 .mr(6)
32203 .nr(8)
32204 .kr(1)
32205 .sr(1)
32206 .m(6)
32207 .n(8)
32208 .k(1)
32209 .cn_stride(11)
32210 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32211 }
32212
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)32213 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
32214 for (uint32_t n = 1; n <= 8; n++) {
32215 for (uint32_t m = 1; m <= 6; m++) {
32216 GemmMicrokernelTester()
32217 .mr(6)
32218 .nr(8)
32219 .kr(1)
32220 .sr(1)
32221 .m(m)
32222 .n(n)
32223 .k(1)
32224 .iterations(1)
32225 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32226 }
32227 }
32228 }
32229
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)32230 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
32231 for (uint32_t m = 1; m <= 6; m++) {
32232 GemmMicrokernelTester()
32233 .mr(6)
32234 .nr(8)
32235 .kr(1)
32236 .sr(1)
32237 .m(m)
32238 .n(8)
32239 .k(1)
32240 .iterations(1)
32241 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32242 }
32243 }
32244
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)32245 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
32246 for (uint32_t n = 1; n <= 8; n++) {
32247 GemmMicrokernelTester()
32248 .mr(6)
32249 .nr(8)
32250 .kr(1)
32251 .sr(1)
32252 .m(6)
32253 .n(n)
32254 .k(1)
32255 .iterations(1)
32256 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32257 }
32258 }
32259
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)32260 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
32261 for (size_t k = 2; k < 10; k++) {
32262 GemmMicrokernelTester()
32263 .mr(6)
32264 .nr(8)
32265 .kr(1)
32266 .sr(1)
32267 .m(6)
32268 .n(8)
32269 .k(k)
32270 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32271 }
32272 }
32273
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)32274 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
32275 for (size_t k = 2; k < 10; k++) {
32276 for (uint32_t n = 1; n <= 8; n++) {
32277 for (uint32_t m = 1; m <= 6; m++) {
32278 GemmMicrokernelTester()
32279 .mr(6)
32280 .nr(8)
32281 .kr(1)
32282 .sr(1)
32283 .m(m)
32284 .n(n)
32285 .k(k)
32286 .iterations(1)
32287 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32288 }
32289 }
32290 }
32291 }
32292
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)32293 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
32294 for (uint32_t n = 9; n < 16; n++) {
32295 for (size_t k = 1; k <= 5; k += 2) {
32296 GemmMicrokernelTester()
32297 .mr(6)
32298 .nr(8)
32299 .kr(1)
32300 .sr(1)
32301 .m(6)
32302 .n(n)
32303 .k(k)
32304 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32305 }
32306 }
32307 }
32308
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)32309 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
32310 for (uint32_t n = 9; n < 16; n++) {
32311 for (size_t k = 1; k <= 5; k += 2) {
32312 GemmMicrokernelTester()
32313 .mr(6)
32314 .nr(8)
32315 .kr(1)
32316 .sr(1)
32317 .m(6)
32318 .n(n)
32319 .k(k)
32320 .cn_stride(11)
32321 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32322 }
32323 }
32324 }
32325
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)32326 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
32327 for (uint32_t n = 9; n < 16; n++) {
32328 for (size_t k = 1; k <= 5; k += 2) {
32329 for (uint32_t m = 1; m <= 6; m++) {
32330 GemmMicrokernelTester()
32331 .mr(6)
32332 .nr(8)
32333 .kr(1)
32334 .sr(1)
32335 .m(m)
32336 .n(n)
32337 .k(k)
32338 .iterations(1)
32339 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32340 }
32341 }
32342 }
32343 }
32344
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8)32345 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
32346 for (uint32_t n = 16; n <= 24; n += 8) {
32347 for (size_t k = 1; k <= 5; k += 2) {
32348 GemmMicrokernelTester()
32349 .mr(6)
32350 .nr(8)
32351 .kr(1)
32352 .sr(1)
32353 .m(6)
32354 .n(n)
32355 .k(k)
32356 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32357 }
32358 }
32359 }
32360
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)32361 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
32362 for (uint32_t n = 16; n <= 24; n += 8) {
32363 for (size_t k = 1; k <= 5; k += 2) {
32364 GemmMicrokernelTester()
32365 .mr(6)
32366 .nr(8)
32367 .kr(1)
32368 .sr(1)
32369 .m(6)
32370 .n(n)
32371 .k(k)
32372 .cn_stride(11)
32373 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32374 }
32375 }
32376 }
32377
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)32378 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
32379 for (uint32_t n = 16; n <= 24; n += 8) {
32380 for (size_t k = 1; k <= 5; k += 2) {
32381 for (uint32_t m = 1; m <= 6; m++) {
32382 GemmMicrokernelTester()
32383 .mr(6)
32384 .nr(8)
32385 .kr(1)
32386 .sr(1)
32387 .m(m)
32388 .n(n)
32389 .k(k)
32390 .iterations(1)
32391 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32392 }
32393 }
32394 }
32395 }
32396
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,small_kernel)32397 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
32398 for (size_t k = 1; k <= 5; k += 2) {
32399 GemmMicrokernelTester()
32400 .mr(6)
32401 .nr(8)
32402 .kr(1)
32403 .sr(1)
32404 .m(6)
32405 .n(8)
32406 .k(k)
32407 .ks(3)
32408 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32409 }
32410 }
32411
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,small_kernel_subtile)32412 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
32413 for (size_t k = 1; k <= 5; k += 2) {
32414 for (uint32_t n = 1; n <= 8; n++) {
32415 for (uint32_t m = 1; m <= 6; m++) {
32416 GemmMicrokernelTester()
32417 .mr(6)
32418 .nr(8)
32419 .kr(1)
32420 .sr(1)
32421 .m(m)
32422 .n(n)
32423 .k(k)
32424 .ks(3)
32425 .iterations(1)
32426 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32427 }
32428 }
32429 }
32430 }
32431
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_small_kernel)32432 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
32433 for (uint32_t n = 9; n < 16; n++) {
32434 for (size_t k = 1; k <= 5; k += 2) {
32435 GemmMicrokernelTester()
32436 .mr(6)
32437 .nr(8)
32438 .kr(1)
32439 .sr(1)
32440 .m(6)
32441 .n(n)
32442 .k(k)
32443 .ks(3)
32444 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32445 }
32446 }
32447 }
32448
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8_small_kernel)32449 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
32450 for (uint32_t n = 16; n <= 24; n += 8) {
32451 for (size_t k = 1; k <= 5; k += 2) {
32452 GemmMicrokernelTester()
32453 .mr(6)
32454 .nr(8)
32455 .kr(1)
32456 .sr(1)
32457 .m(6)
32458 .n(n)
32459 .k(k)
32460 .ks(3)
32461 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32462 }
32463 }
32464 }
32465
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)32466 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
32467 for (size_t k = 1; k <= 5; k += 2) {
32468 for (uint32_t n = 1; n <= 8; n++) {
32469 for (uint32_t m = 1; m <= 6; m++) {
32470 GemmMicrokernelTester()
32471 .mr(6)
32472 .nr(8)
32473 .kr(1)
32474 .sr(1)
32475 .m(m)
32476 .n(n)
32477 .k(k)
32478 .cm_stride(11)
32479 .iterations(1)
32480 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32481 }
32482 }
32483 }
32484 }
32485
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,a_offset)32486 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
32487 for (size_t k = 1; k <= 5; k += 2) {
32488 GemmMicrokernelTester()
32489 .mr(6)
32490 .nr(8)
32491 .kr(1)
32492 .sr(1)
32493 .m(6)
32494 .n(8)
32495 .k(k)
32496 .ks(3)
32497 .a_offset(37)
32498 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32499 }
32500 }
32501
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,zero)32502 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, zero) {
32503 for (size_t k = 1; k <= 5; k += 2) {
32504 for (uint32_t mz = 0; mz < 6; mz++) {
32505 GemmMicrokernelTester()
32506 .mr(6)
32507 .nr(8)
32508 .kr(1)
32509 .sr(1)
32510 .m(6)
32511 .n(8)
32512 .k(k)
32513 .ks(3)
32514 .a_offset(37)
32515 .zero_index(mz)
32516 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32517 }
32518 }
32519 }
32520
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,qmin)32521 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmin) {
32522 GemmMicrokernelTester()
32523 .mr(6)
32524 .nr(8)
32525 .kr(1)
32526 .sr(1)
32527 .m(6)
32528 .n(8)
32529 .k(1)
32530 .qmin(128)
32531 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32532 }
32533
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,qmax)32534 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmax) {
32535 GemmMicrokernelTester()
32536 .mr(6)
32537 .nr(8)
32538 .kr(1)
32539 .sr(1)
32540 .m(6)
32541 .n(8)
32542 .k(1)
32543 .qmax(128)
32544 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32545 }
32546
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,strided_cm)32547 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
32548 GemmMicrokernelTester()
32549 .mr(6)
32550 .nr(8)
32551 .kr(1)
32552 .sr(1)
32553 .m(6)
32554 .n(8)
32555 .k(1)
32556 .cm_stride(11)
32557 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32558 }
32559 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32560
32561
32562 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4)32563 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4) {
32564 GemmMicrokernelTester()
32565 .mr(6)
32566 .nr(8)
32567 .kr(1)
32568 .sr(1)
32569 .m(6)
32570 .n(8)
32571 .k(4)
32572 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32573 }
32574
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,strided_cn)32575 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cn) {
32576 GemmMicrokernelTester()
32577 .mr(6)
32578 .nr(8)
32579 .kr(1)
32580 .sr(1)
32581 .m(6)
32582 .n(8)
32583 .k(4)
32584 .cn_stride(11)
32585 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32586 }
32587
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)32588 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
32589 for (uint32_t n = 1; n <= 8; n++) {
32590 for (uint32_t m = 1; m <= 6; m++) {
32591 GemmMicrokernelTester()
32592 .mr(6)
32593 .nr(8)
32594 .kr(1)
32595 .sr(1)
32596 .m(m)
32597 .n(n)
32598 .k(4)
32599 .iterations(1)
32600 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32601 }
32602 }
32603 }
32604
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)32605 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
32606 for (uint32_t m = 1; m <= 6; m++) {
32607 GemmMicrokernelTester()
32608 .mr(6)
32609 .nr(8)
32610 .kr(1)
32611 .sr(1)
32612 .m(m)
32613 .n(8)
32614 .k(4)
32615 .iterations(1)
32616 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32617 }
32618 }
32619
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)32620 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
32621 for (uint32_t n = 1; n <= 8; n++) {
32622 GemmMicrokernelTester()
32623 .mr(6)
32624 .nr(8)
32625 .kr(1)
32626 .sr(1)
32627 .m(6)
32628 .n(n)
32629 .k(4)
32630 .iterations(1)
32631 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32632 }
32633 }
32634
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_lt_4)32635 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4) {
32636 for (size_t k = 1; k < 4; k++) {
32637 GemmMicrokernelTester()
32638 .mr(6)
32639 .nr(8)
32640 .kr(1)
32641 .sr(1)
32642 .m(6)
32643 .n(8)
32644 .k(k)
32645 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32646 }
32647 }
32648
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)32649 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
32650 for (size_t k = 1; k < 4; k++) {
32651 for (uint32_t n = 1; n <= 8; n++) {
32652 for (uint32_t m = 1; m <= 6; m++) {
32653 GemmMicrokernelTester()
32654 .mr(6)
32655 .nr(8)
32656 .kr(1)
32657 .sr(1)
32658 .m(m)
32659 .n(n)
32660 .k(k)
32661 .iterations(1)
32662 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32663 }
32664 }
32665 }
32666 }
32667
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_gt_4)32668 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4) {
32669 for (size_t k = 5; k < 8; k++) {
32670 GemmMicrokernelTester()
32671 .mr(6)
32672 .nr(8)
32673 .kr(1)
32674 .sr(1)
32675 .m(6)
32676 .n(8)
32677 .k(k)
32678 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32679 }
32680 }
32681
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)32682 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
32683 for (size_t k = 5; k < 8; k++) {
32684 for (uint32_t n = 1; n <= 8; n++) {
32685 for (uint32_t m = 1; m <= 6; m++) {
32686 GemmMicrokernelTester()
32687 .mr(6)
32688 .nr(8)
32689 .kr(1)
32690 .sr(1)
32691 .m(m)
32692 .n(n)
32693 .k(k)
32694 .iterations(1)
32695 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32696 }
32697 }
32698 }
32699 }
32700
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_div_4)32701 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4) {
32702 for (size_t k = 8; k <= 40; k += 4) {
32703 GemmMicrokernelTester()
32704 .mr(6)
32705 .nr(8)
32706 .kr(1)
32707 .sr(1)
32708 .m(6)
32709 .n(8)
32710 .k(k)
32711 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32712 }
32713 }
32714
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)32715 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
32716 for (size_t k = 8; k <= 40; k += 4) {
32717 for (uint32_t n = 1; n <= 8; n++) {
32718 for (uint32_t m = 1; m <= 6; m++) {
32719 GemmMicrokernelTester()
32720 .mr(6)
32721 .nr(8)
32722 .kr(1)
32723 .sr(1)
32724 .m(m)
32725 .n(n)
32726 .k(k)
32727 .iterations(1)
32728 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32729 }
32730 }
32731 }
32732 }
32733
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8)32734 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8) {
32735 for (uint32_t n = 9; n < 16; n++) {
32736 for (size_t k = 1; k <= 20; k += 5) {
32737 GemmMicrokernelTester()
32738 .mr(6)
32739 .nr(8)
32740 .kr(1)
32741 .sr(1)
32742 .m(6)
32743 .n(n)
32744 .k(k)
32745 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32746 }
32747 }
32748 }
32749
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)32750 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
32751 for (uint32_t n = 9; n < 16; n++) {
32752 for (size_t k = 1; k <= 20; k += 5) {
32753 GemmMicrokernelTester()
32754 .mr(6)
32755 .nr(8)
32756 .kr(1)
32757 .sr(1)
32758 .m(6)
32759 .n(n)
32760 .k(k)
32761 .cn_stride(11)
32762 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32763 }
32764 }
32765 }
32766
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)32767 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
32768 for (uint32_t n = 9; n < 16; n++) {
32769 for (size_t k = 1; k <= 20; k += 5) {
32770 for (uint32_t m = 1; m <= 6; m++) {
32771 GemmMicrokernelTester()
32772 .mr(6)
32773 .nr(8)
32774 .kr(1)
32775 .sr(1)
32776 .m(m)
32777 .n(n)
32778 .k(k)
32779 .iterations(1)
32780 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32781 }
32782 }
32783 }
32784 }
32785
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8)32786 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8) {
32787 for (uint32_t n = 16; n <= 24; n += 8) {
32788 for (size_t k = 1; k <= 20; k += 5) {
32789 GemmMicrokernelTester()
32790 .mr(6)
32791 .nr(8)
32792 .kr(1)
32793 .sr(1)
32794 .m(6)
32795 .n(n)
32796 .k(k)
32797 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32798 }
32799 }
32800 }
32801
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)32802 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
32803 for (uint32_t n = 16; n <= 24; n += 8) {
32804 for (size_t k = 1; k <= 20; k += 5) {
32805 GemmMicrokernelTester()
32806 .mr(6)
32807 .nr(8)
32808 .kr(1)
32809 .sr(1)
32810 .m(6)
32811 .n(n)
32812 .k(k)
32813 .cn_stride(11)
32814 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32815 }
32816 }
32817 }
32818
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)32819 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
32820 for (uint32_t n = 16; n <= 24; n += 8) {
32821 for (size_t k = 1; k <= 20; k += 5) {
32822 for (uint32_t m = 1; m <= 6; m++) {
32823 GemmMicrokernelTester()
32824 .mr(6)
32825 .nr(8)
32826 .kr(1)
32827 .sr(1)
32828 .m(m)
32829 .n(n)
32830 .k(k)
32831 .iterations(1)
32832 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32833 }
32834 }
32835 }
32836 }
32837
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,small_kernel)32838 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, small_kernel) {
32839 for (size_t k = 1; k <= 20; k += 5) {
32840 GemmMicrokernelTester()
32841 .mr(6)
32842 .nr(8)
32843 .kr(1)
32844 .sr(1)
32845 .m(6)
32846 .n(8)
32847 .k(k)
32848 .ks(3)
32849 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32850 }
32851 }
32852
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,small_kernel_subtile)32853 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
32854 for (size_t k = 1; k <= 20; k += 5) {
32855 for (uint32_t n = 1; n <= 8; n++) {
32856 for (uint32_t m = 1; m <= 6; m++) {
32857 GemmMicrokernelTester()
32858 .mr(6)
32859 .nr(8)
32860 .kr(1)
32861 .sr(1)
32862 .m(m)
32863 .n(n)
32864 .k(k)
32865 .ks(3)
32866 .iterations(1)
32867 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32868 }
32869 }
32870 }
32871 }
32872
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8_small_kernel)32873 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
32874 for (uint32_t n = 9; n < 16; n++) {
32875 for (size_t k = 1; k <= 20; k += 5) {
32876 GemmMicrokernelTester()
32877 .mr(6)
32878 .nr(8)
32879 .kr(1)
32880 .sr(1)
32881 .m(6)
32882 .n(n)
32883 .k(k)
32884 .ks(3)
32885 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32886 }
32887 }
32888 }
32889
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8_small_kernel)32890 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
32891 for (uint32_t n = 16; n <= 24; n += 8) {
32892 for (size_t k = 1; k <= 20; k += 5) {
32893 GemmMicrokernelTester()
32894 .mr(6)
32895 .nr(8)
32896 .kr(1)
32897 .sr(1)
32898 .m(6)
32899 .n(n)
32900 .k(k)
32901 .ks(3)
32902 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32903 }
32904 }
32905 }
32906
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)32907 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
32908 for (size_t k = 1; k <= 20; k += 5) {
32909 for (uint32_t n = 1; n <= 8; n++) {
32910 for (uint32_t m = 1; m <= 6; m++) {
32911 GemmMicrokernelTester()
32912 .mr(6)
32913 .nr(8)
32914 .kr(1)
32915 .sr(1)
32916 .m(m)
32917 .n(n)
32918 .k(k)
32919 .cm_stride(11)
32920 .iterations(1)
32921 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32922 }
32923 }
32924 }
32925 }
32926
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,a_offset)32927 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, a_offset) {
32928 for (size_t k = 1; k <= 20; k += 5) {
32929 GemmMicrokernelTester()
32930 .mr(6)
32931 .nr(8)
32932 .kr(1)
32933 .sr(1)
32934 .m(6)
32935 .n(8)
32936 .k(k)
32937 .ks(3)
32938 .a_offset(127)
32939 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32940 }
32941 }
32942
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,zero)32943 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, zero) {
32944 for (size_t k = 1; k <= 20; k += 5) {
32945 for (uint32_t mz = 0; mz < 6; mz++) {
32946 GemmMicrokernelTester()
32947 .mr(6)
32948 .nr(8)
32949 .kr(1)
32950 .sr(1)
32951 .m(6)
32952 .n(8)
32953 .k(k)
32954 .ks(3)
32955 .a_offset(127)
32956 .zero_index(mz)
32957 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32958 }
32959 }
32960 }
32961
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,qmin)32962 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmin) {
32963 GemmMicrokernelTester()
32964 .mr(6)
32965 .nr(8)
32966 .kr(1)
32967 .sr(1)
32968 .m(6)
32969 .n(8)
32970 .k(4)
32971 .qmin(128)
32972 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32973 }
32974
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,qmax)32975 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmax) {
32976 GemmMicrokernelTester()
32977 .mr(6)
32978 .nr(8)
32979 .kr(1)
32980 .sr(1)
32981 .m(6)
32982 .n(8)
32983 .k(4)
32984 .qmax(128)
32985 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32986 }
32987
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT,strided_cm)32988 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm) {
32989 GemmMicrokernelTester()
32990 .mr(6)
32991 .nr(8)
32992 .kr(1)
32993 .sr(1)
32994 .m(6)
32995 .n(8)
32996 .k(4)
32997 .cm_stride(11)
32998 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
32999 }
33000 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33001
33002
33003 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4)33004 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4) {
33005 GemmMicrokernelTester()
33006 .mr(6)
33007 .nr(8)
33008 .kr(1)
33009 .sr(4)
33010 .m(6)
33011 .n(8)
33012 .k(4)
33013 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33014 }
33015
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,strided_cn)33016 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cn) {
33017 GemmMicrokernelTester()
33018 .mr(6)
33019 .nr(8)
33020 .kr(1)
33021 .sr(4)
33022 .m(6)
33023 .n(8)
33024 .k(4)
33025 .cn_stride(11)
33026 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33027 }
33028
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4_subtile)33029 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
33030 for (uint32_t n = 1; n <= 8; n++) {
33031 for (uint32_t m = 1; m <= 6; m++) {
33032 GemmMicrokernelTester()
33033 .mr(6)
33034 .nr(8)
33035 .kr(1)
33036 .sr(4)
33037 .m(m)
33038 .n(n)
33039 .k(4)
33040 .iterations(1)
33041 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33042 }
33043 }
33044 }
33045
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)33046 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
33047 for (uint32_t m = 1; m <= 6; m++) {
33048 GemmMicrokernelTester()
33049 .mr(6)
33050 .nr(8)
33051 .kr(1)
33052 .sr(4)
33053 .m(m)
33054 .n(8)
33055 .k(4)
33056 .iterations(1)
33057 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33058 }
33059 }
33060
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)33061 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
33062 for (uint32_t n = 1; n <= 8; n++) {
33063 GemmMicrokernelTester()
33064 .mr(6)
33065 .nr(8)
33066 .kr(1)
33067 .sr(4)
33068 .m(6)
33069 .n(n)
33070 .k(4)
33071 .iterations(1)
33072 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33073 }
33074 }
33075
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_lt_4)33076 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4) {
33077 for (size_t k = 1; k < 4; k++) {
33078 GemmMicrokernelTester()
33079 .mr(6)
33080 .nr(8)
33081 .kr(1)
33082 .sr(4)
33083 .m(6)
33084 .n(8)
33085 .k(k)
33086 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33087 }
33088 }
33089
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_lt_4_subtile)33090 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
33091 for (size_t k = 1; k < 4; k++) {
33092 for (uint32_t n = 1; n <= 8; n++) {
33093 for (uint32_t m = 1; m <= 6; m++) {
33094 GemmMicrokernelTester()
33095 .mr(6)
33096 .nr(8)
33097 .kr(1)
33098 .sr(4)
33099 .m(m)
33100 .n(n)
33101 .k(k)
33102 .iterations(1)
33103 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33104 }
33105 }
33106 }
33107 }
33108
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_gt_4)33109 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4) {
33110 for (size_t k = 5; k < 8; k++) {
33111 GemmMicrokernelTester()
33112 .mr(6)
33113 .nr(8)
33114 .kr(1)
33115 .sr(4)
33116 .m(6)
33117 .n(8)
33118 .k(k)
33119 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33120 }
33121 }
33122
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_gt_4_subtile)33123 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
33124 for (size_t k = 5; k < 8; k++) {
33125 for (uint32_t n = 1; n <= 8; n++) {
33126 for (uint32_t m = 1; m <= 6; m++) {
33127 GemmMicrokernelTester()
33128 .mr(6)
33129 .nr(8)
33130 .kr(1)
33131 .sr(4)
33132 .m(m)
33133 .n(n)
33134 .k(k)
33135 .iterations(1)
33136 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33137 }
33138 }
33139 }
33140 }
33141
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_div_4)33142 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4) {
33143 for (size_t k = 8; k <= 40; k += 4) {
33144 GemmMicrokernelTester()
33145 .mr(6)
33146 .nr(8)
33147 .kr(1)
33148 .sr(4)
33149 .m(6)
33150 .n(8)
33151 .k(k)
33152 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33153 }
33154 }
33155
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,k_div_4_subtile)33156 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_subtile) {
33157 for (size_t k = 8; k <= 40; k += 4) {
33158 for (uint32_t n = 1; n <= 8; n++) {
33159 for (uint32_t m = 1; m <= 6; m++) {
33160 GemmMicrokernelTester()
33161 .mr(6)
33162 .nr(8)
33163 .kr(1)
33164 .sr(4)
33165 .m(m)
33166 .n(n)
33167 .k(k)
33168 .iterations(1)
33169 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33170 }
33171 }
33172 }
33173 }
33174
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8)33175 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8) {
33176 for (uint32_t n = 9; n < 16; n++) {
33177 for (size_t k = 1; k <= 20; k += 5) {
33178 GemmMicrokernelTester()
33179 .mr(6)
33180 .nr(8)
33181 .kr(1)
33182 .sr(4)
33183 .m(6)
33184 .n(n)
33185 .k(k)
33186 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33187 }
33188 }
33189 }
33190
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)33191 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
33192 for (uint32_t n = 9; n < 16; n++) {
33193 for (size_t k = 1; k <= 20; k += 5) {
33194 GemmMicrokernelTester()
33195 .mr(6)
33196 .nr(8)
33197 .kr(1)
33198 .sr(4)
33199 .m(6)
33200 .n(n)
33201 .k(k)
33202 .cn_stride(11)
33203 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33204 }
33205 }
33206 }
33207
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8_subtile)33208 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
33209 for (uint32_t n = 9; n < 16; n++) {
33210 for (size_t k = 1; k <= 20; k += 5) {
33211 for (uint32_t m = 1; m <= 6; m++) {
33212 GemmMicrokernelTester()
33213 .mr(6)
33214 .nr(8)
33215 .kr(1)
33216 .sr(4)
33217 .m(m)
33218 .n(n)
33219 .k(k)
33220 .iterations(1)
33221 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33222 }
33223 }
33224 }
33225 }
33226
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8)33227 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8) {
33228 for (uint32_t n = 16; n <= 24; n += 8) {
33229 for (size_t k = 1; k <= 20; k += 5) {
33230 GemmMicrokernelTester()
33231 .mr(6)
33232 .nr(8)
33233 .kr(1)
33234 .sr(4)
33235 .m(6)
33236 .n(n)
33237 .k(k)
33238 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33239 }
33240 }
33241 }
33242
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8_strided_cn)33243 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
33244 for (uint32_t n = 16; n <= 24; n += 8) {
33245 for (size_t k = 1; k <= 20; k += 5) {
33246 GemmMicrokernelTester()
33247 .mr(6)
33248 .nr(8)
33249 .kr(1)
33250 .sr(4)
33251 .m(6)
33252 .n(n)
33253 .k(k)
33254 .cn_stride(11)
33255 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33256 }
33257 }
33258 }
33259
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8_subtile)33260 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_subtile) {
33261 for (uint32_t n = 16; n <= 24; n += 8) {
33262 for (size_t k = 1; k <= 20; k += 5) {
33263 for (uint32_t m = 1; m <= 6; m++) {
33264 GemmMicrokernelTester()
33265 .mr(6)
33266 .nr(8)
33267 .kr(1)
33268 .sr(4)
33269 .m(m)
33270 .n(n)
33271 .k(k)
33272 .iterations(1)
33273 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33274 }
33275 }
33276 }
33277 }
33278
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,small_kernel)33279 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, small_kernel) {
33280 for (size_t k = 1; k <= 20; k += 5) {
33281 GemmMicrokernelTester()
33282 .mr(6)
33283 .nr(8)
33284 .kr(1)
33285 .sr(4)
33286 .m(6)
33287 .n(8)
33288 .k(k)
33289 .ks(3)
33290 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33291 }
33292 }
33293
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,small_kernel_subtile)33294 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, small_kernel_subtile) {
33295 for (size_t k = 1; k <= 20; k += 5) {
33296 for (uint32_t n = 1; n <= 8; n++) {
33297 for (uint32_t m = 1; m <= 6; m++) {
33298 GemmMicrokernelTester()
33299 .mr(6)
33300 .nr(8)
33301 .kr(1)
33302 .sr(4)
33303 .m(m)
33304 .n(n)
33305 .k(k)
33306 .ks(3)
33307 .iterations(1)
33308 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33309 }
33310 }
33311 }
33312 }
33313
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8_small_kernel)33314 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
33315 for (uint32_t n = 9; n < 16; n++) {
33316 for (size_t k = 1; k <= 20; k += 5) {
33317 GemmMicrokernelTester()
33318 .mr(6)
33319 .nr(8)
33320 .kr(1)
33321 .sr(4)
33322 .m(6)
33323 .n(n)
33324 .k(k)
33325 .ks(3)
33326 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33327 }
33328 }
33329 }
33330
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8_small_kernel)33331 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
33332 for (uint32_t n = 16; n <= 24; n += 8) {
33333 for (size_t k = 1; k <= 20; k += 5) {
33334 GemmMicrokernelTester()
33335 .mr(6)
33336 .nr(8)
33337 .kr(1)
33338 .sr(4)
33339 .m(6)
33340 .n(n)
33341 .k(k)
33342 .ks(3)
33343 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33344 }
33345 }
33346 }
33347
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,strided_cm_subtile)33348 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm_subtile) {
33349 for (size_t k = 1; k <= 20; k += 5) {
33350 for (uint32_t n = 1; n <= 8; n++) {
33351 for (uint32_t m = 1; m <= 6; m++) {
33352 GemmMicrokernelTester()
33353 .mr(6)
33354 .nr(8)
33355 .kr(1)
33356 .sr(4)
33357 .m(m)
33358 .n(n)
33359 .k(k)
33360 .cm_stride(11)
33361 .iterations(1)
33362 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33363 }
33364 }
33365 }
33366 }
33367
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,a_offset)33368 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, a_offset) {
33369 for (size_t k = 1; k <= 20; k += 5) {
33370 GemmMicrokernelTester()
33371 .mr(6)
33372 .nr(8)
33373 .kr(1)
33374 .sr(4)
33375 .m(6)
33376 .n(8)
33377 .k(k)
33378 .ks(3)
33379 .a_offset(127)
33380 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33381 }
33382 }
33383
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,zero)33384 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, zero) {
33385 for (size_t k = 1; k <= 20; k += 5) {
33386 for (uint32_t mz = 0; mz < 6; mz++) {
33387 GemmMicrokernelTester()
33388 .mr(6)
33389 .nr(8)
33390 .kr(1)
33391 .sr(4)
33392 .m(6)
33393 .n(8)
33394 .k(k)
33395 .ks(3)
33396 .a_offset(127)
33397 .zero_index(mz)
33398 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33399 }
33400 }
33401 }
33402
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,qmin)33403 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, qmin) {
33404 GemmMicrokernelTester()
33405 .mr(6)
33406 .nr(8)
33407 .kr(1)
33408 .sr(4)
33409 .m(6)
33410 .n(8)
33411 .k(4)
33412 .qmin(128)
33413 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33414 }
33415
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,qmax)33416 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, qmax) {
33417 GemmMicrokernelTester()
33418 .mr(6)
33419 .nr(8)
33420 .kr(1)
33421 .sr(4)
33422 .m(6)
33423 .n(8)
33424 .k(4)
33425 .qmax(128)
33426 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33427 }
33428
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM,strided_cm)33429 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm) {
33430 GemmMicrokernelTester()
33431 .mr(6)
33432 .nr(8)
33433 .kr(1)
33434 .sr(4)
33435 .m(6)
33436 .n(8)
33437 .k(4)
33438 .cm_stride(11)
33439 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
33440 }
33441 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33442
33443
33444 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4)33445 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4) {
33446 GemmMicrokernelTester()
33447 .mr(6)
33448 .nr(8)
33449 .kr(1)
33450 .sr(4)
33451 .m(6)
33452 .n(8)
33453 .k(4)
33454 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33455 }
33456
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,strided_cn)33457 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cn) {
33458 GemmMicrokernelTester()
33459 .mr(6)
33460 .nr(8)
33461 .kr(1)
33462 .sr(4)
33463 .m(6)
33464 .n(8)
33465 .k(4)
33466 .cn_stride(11)
33467 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33468 }
33469
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4_subtile)33470 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile) {
33471 for (uint32_t n = 1; n <= 8; n++) {
33472 for (uint32_t m = 1; m <= 6; m++) {
33473 GemmMicrokernelTester()
33474 .mr(6)
33475 .nr(8)
33476 .kr(1)
33477 .sr(4)
33478 .m(m)
33479 .n(n)
33480 .k(4)
33481 .iterations(1)
33482 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33483 }
33484 }
33485 }
33486
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4_subtile_m)33487 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
33488 for (uint32_t m = 1; m <= 6; m++) {
33489 GemmMicrokernelTester()
33490 .mr(6)
33491 .nr(8)
33492 .kr(1)
33493 .sr(4)
33494 .m(m)
33495 .n(8)
33496 .k(4)
33497 .iterations(1)
33498 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33499 }
33500 }
33501
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4_subtile_n)33502 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
33503 for (uint32_t n = 1; n <= 8; n++) {
33504 GemmMicrokernelTester()
33505 .mr(6)
33506 .nr(8)
33507 .kr(1)
33508 .sr(4)
33509 .m(6)
33510 .n(n)
33511 .k(4)
33512 .iterations(1)
33513 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33514 }
33515 }
33516
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_lt_4)33517 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4) {
33518 for (size_t k = 1; k < 4; k++) {
33519 GemmMicrokernelTester()
33520 .mr(6)
33521 .nr(8)
33522 .kr(1)
33523 .sr(4)
33524 .m(6)
33525 .n(8)
33526 .k(k)
33527 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33528 }
33529 }
33530
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_lt_4_subtile)33531 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_subtile) {
33532 for (size_t k = 1; k < 4; k++) {
33533 for (uint32_t n = 1; n <= 8; n++) {
33534 for (uint32_t m = 1; m <= 6; m++) {
33535 GemmMicrokernelTester()
33536 .mr(6)
33537 .nr(8)
33538 .kr(1)
33539 .sr(4)
33540 .m(m)
33541 .n(n)
33542 .k(k)
33543 .iterations(1)
33544 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33545 }
33546 }
33547 }
33548 }
33549
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_gt_4)33550 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4) {
33551 for (size_t k = 5; k < 8; k++) {
33552 GemmMicrokernelTester()
33553 .mr(6)
33554 .nr(8)
33555 .kr(1)
33556 .sr(4)
33557 .m(6)
33558 .n(8)
33559 .k(k)
33560 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33561 }
33562 }
33563
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_gt_4_subtile)33564 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_subtile) {
33565 for (size_t k = 5; k < 8; k++) {
33566 for (uint32_t n = 1; n <= 8; n++) {
33567 for (uint32_t m = 1; m <= 6; m++) {
33568 GemmMicrokernelTester()
33569 .mr(6)
33570 .nr(8)
33571 .kr(1)
33572 .sr(4)
33573 .m(m)
33574 .n(n)
33575 .k(k)
33576 .iterations(1)
33577 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33578 }
33579 }
33580 }
33581 }
33582
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_div_4)33583 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_div_4) {
33584 for (size_t k = 8; k <= 40; k += 4) {
33585 GemmMicrokernelTester()
33586 .mr(6)
33587 .nr(8)
33588 .kr(1)
33589 .sr(4)
33590 .m(6)
33591 .n(8)
33592 .k(k)
33593 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33594 }
33595 }
33596
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,k_div_4_subtile)33597 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_subtile) {
33598 for (size_t k = 8; k <= 40; k += 4) {
33599 for (uint32_t n = 1; n <= 8; n++) {
33600 for (uint32_t m = 1; m <= 6; m++) {
33601 GemmMicrokernelTester()
33602 .mr(6)
33603 .nr(8)
33604 .kr(1)
33605 .sr(4)
33606 .m(m)
33607 .n(n)
33608 .k(k)
33609 .iterations(1)
33610 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33611 }
33612 }
33613 }
33614 }
33615
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8)33616 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8) {
33617 for (uint32_t n = 9; n < 16; n++) {
33618 for (size_t k = 1; k <= 20; k += 5) {
33619 GemmMicrokernelTester()
33620 .mr(6)
33621 .nr(8)
33622 .kr(1)
33623 .sr(4)
33624 .m(6)
33625 .n(n)
33626 .k(k)
33627 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33628 }
33629 }
33630 }
33631
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8_strided_cn)33632 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
33633 for (uint32_t n = 9; n < 16; n++) {
33634 for (size_t k = 1; k <= 20; k += 5) {
33635 GemmMicrokernelTester()
33636 .mr(6)
33637 .nr(8)
33638 .kr(1)
33639 .sr(4)
33640 .m(6)
33641 .n(n)
33642 .k(k)
33643 .cn_stride(11)
33644 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33645 }
33646 }
33647 }
33648
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8_subtile)33649 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_subtile) {
33650 for (uint32_t n = 9; n < 16; n++) {
33651 for (size_t k = 1; k <= 20; k += 5) {
33652 for (uint32_t m = 1; m <= 6; m++) {
33653 GemmMicrokernelTester()
33654 .mr(6)
33655 .nr(8)
33656 .kr(1)
33657 .sr(4)
33658 .m(m)
33659 .n(n)
33660 .k(k)
33661 .iterations(1)
33662 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33663 }
33664 }
33665 }
33666 }
33667
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_div_8)33668 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8) {
33669 for (uint32_t n = 16; n <= 24; n += 8) {
33670 for (size_t k = 1; k <= 20; k += 5) {
33671 GemmMicrokernelTester()
33672 .mr(6)
33673 .nr(8)
33674 .kr(1)
33675 .sr(4)
33676 .m(6)
33677 .n(n)
33678 .k(k)
33679 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33680 }
33681 }
33682 }
33683
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_div_8_strided_cn)33684 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
33685 for (uint32_t n = 16; n <= 24; n += 8) {
33686 for (size_t k = 1; k <= 20; k += 5) {
33687 GemmMicrokernelTester()
33688 .mr(6)
33689 .nr(8)
33690 .kr(1)
33691 .sr(4)
33692 .m(6)
33693 .n(n)
33694 .k(k)
33695 .cn_stride(11)
33696 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33697 }
33698 }
33699 }
33700
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_div_8_subtile)33701 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_subtile) {
33702 for (uint32_t n = 16; n <= 24; n += 8) {
33703 for (size_t k = 1; k <= 20; k += 5) {
33704 for (uint32_t m = 1; m <= 6; m++) {
33705 GemmMicrokernelTester()
33706 .mr(6)
33707 .nr(8)
33708 .kr(1)
33709 .sr(4)
33710 .m(m)
33711 .n(n)
33712 .k(k)
33713 .iterations(1)
33714 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33715 }
33716 }
33717 }
33718 }
33719
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,small_kernel)33720 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, small_kernel) {
33721 for (size_t k = 1; k <= 20; k += 5) {
33722 GemmMicrokernelTester()
33723 .mr(6)
33724 .nr(8)
33725 .kr(1)
33726 .sr(4)
33727 .m(6)
33728 .n(8)
33729 .k(k)
33730 .ks(3)
33731 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33732 }
33733 }
33734
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,small_kernel_subtile)33735 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, small_kernel_subtile) {
33736 for (size_t k = 1; k <= 20; k += 5) {
33737 for (uint32_t n = 1; n <= 8; n++) {
33738 for (uint32_t m = 1; m <= 6; m++) {
33739 GemmMicrokernelTester()
33740 .mr(6)
33741 .nr(8)
33742 .kr(1)
33743 .sr(4)
33744 .m(m)
33745 .n(n)
33746 .k(k)
33747 .ks(3)
33748 .iterations(1)
33749 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33750 }
33751 }
33752 }
33753 }
33754
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8_small_kernel)33755 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
33756 for (uint32_t n = 9; n < 16; n++) {
33757 for (size_t k = 1; k <= 20; k += 5) {
33758 GemmMicrokernelTester()
33759 .mr(6)
33760 .nr(8)
33761 .kr(1)
33762 .sr(4)
33763 .m(6)
33764 .n(n)
33765 .k(k)
33766 .ks(3)
33767 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33768 }
33769 }
33770 }
33771
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,n_div_8_small_kernel)33772 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
33773 for (uint32_t n = 16; n <= 24; n += 8) {
33774 for (size_t k = 1; k <= 20; k += 5) {
33775 GemmMicrokernelTester()
33776 .mr(6)
33777 .nr(8)
33778 .kr(1)
33779 .sr(4)
33780 .m(6)
33781 .n(n)
33782 .k(k)
33783 .ks(3)
33784 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33785 }
33786 }
33787 }
33788
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,strided_cm_subtile)33789 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cm_subtile) {
33790 for (size_t k = 1; k <= 20; k += 5) {
33791 for (uint32_t n = 1; n <= 8; n++) {
33792 for (uint32_t m = 1; m <= 6; m++) {
33793 GemmMicrokernelTester()
33794 .mr(6)
33795 .nr(8)
33796 .kr(1)
33797 .sr(4)
33798 .m(m)
33799 .n(n)
33800 .k(k)
33801 .cm_stride(11)
33802 .iterations(1)
33803 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33804 }
33805 }
33806 }
33807 }
33808
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,a_offset)33809 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, a_offset) {
33810 for (size_t k = 1; k <= 20; k += 5) {
33811 GemmMicrokernelTester()
33812 .mr(6)
33813 .nr(8)
33814 .kr(1)
33815 .sr(4)
33816 .m(6)
33817 .n(8)
33818 .k(k)
33819 .ks(3)
33820 .a_offset(127)
33821 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33822 }
33823 }
33824
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,zero)33825 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, zero) {
33826 for (size_t k = 1; k <= 20; k += 5) {
33827 for (uint32_t mz = 0; mz < 6; mz++) {
33828 GemmMicrokernelTester()
33829 .mr(6)
33830 .nr(8)
33831 .kr(1)
33832 .sr(4)
33833 .m(6)
33834 .n(8)
33835 .k(k)
33836 .ks(3)
33837 .a_offset(127)
33838 .zero_index(mz)
33839 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33840 }
33841 }
33842 }
33843
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,qmin)33844 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, qmin) {
33845 GemmMicrokernelTester()
33846 .mr(6)
33847 .nr(8)
33848 .kr(1)
33849 .sr(4)
33850 .m(6)
33851 .n(8)
33852 .k(4)
33853 .qmin(128)
33854 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33855 }
33856
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,qmax)33857 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, qmax) {
33858 GemmMicrokernelTester()
33859 .mr(6)
33860 .nr(8)
33861 .kr(1)
33862 .sr(4)
33863 .m(6)
33864 .n(8)
33865 .k(4)
33866 .qmax(128)
33867 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33868 }
33869
TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86,strided_cm)33870 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cm) {
33871 GemmMicrokernelTester()
33872 .mr(6)
33873 .nr(8)
33874 .kr(1)
33875 .sr(4)
33876 .m(6)
33877 .n(8)
33878 .k(4)
33879 .cm_stride(11)
33880 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
33881 }
33882 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33883
33884
33885 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)33886 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
33887 GemmMicrokernelTester()
33888 .mr(1)
33889 .nr(8)
33890 .kr(1)
33891 .sr(1)
33892 .m(1)
33893 .n(8)
33894 .k(1)
33895 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33896 }
33897
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)33898 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
33899 GemmMicrokernelTester()
33900 .mr(1)
33901 .nr(8)
33902 .kr(1)
33903 .sr(1)
33904 .m(1)
33905 .n(8)
33906 .k(1)
33907 .cn_stride(11)
33908 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33909 }
33910
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)33911 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
33912 for (uint32_t n = 1; n <= 8; n++) {
33913 for (uint32_t m = 1; m <= 1; m++) {
33914 GemmMicrokernelTester()
33915 .mr(1)
33916 .nr(8)
33917 .kr(1)
33918 .sr(1)
33919 .m(m)
33920 .n(n)
33921 .k(1)
33922 .iterations(1)
33923 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33924 }
33925 }
33926 }
33927
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)33928 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
33929 for (uint32_t m = 1; m <= 1; m++) {
33930 GemmMicrokernelTester()
33931 .mr(1)
33932 .nr(8)
33933 .kr(1)
33934 .sr(1)
33935 .m(m)
33936 .n(8)
33937 .k(1)
33938 .iterations(1)
33939 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33940 }
33941 }
33942
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)33943 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
33944 for (uint32_t n = 1; n <= 8; n++) {
33945 GemmMicrokernelTester()
33946 .mr(1)
33947 .nr(8)
33948 .kr(1)
33949 .sr(1)
33950 .m(1)
33951 .n(n)
33952 .k(1)
33953 .iterations(1)
33954 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33955 }
33956 }
33957
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)33958 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
33959 for (size_t k = 2; k < 10; k++) {
33960 GemmMicrokernelTester()
33961 .mr(1)
33962 .nr(8)
33963 .kr(1)
33964 .sr(1)
33965 .m(1)
33966 .n(8)
33967 .k(k)
33968 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33969 }
33970 }
33971
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)33972 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
33973 for (size_t k = 2; k < 10; k++) {
33974 for (uint32_t n = 1; n <= 8; n++) {
33975 for (uint32_t m = 1; m <= 1; m++) {
33976 GemmMicrokernelTester()
33977 .mr(1)
33978 .nr(8)
33979 .kr(1)
33980 .sr(1)
33981 .m(m)
33982 .n(n)
33983 .k(k)
33984 .iterations(1)
33985 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33986 }
33987 }
33988 }
33989 }
33990
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)33991 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
33992 for (uint32_t n = 9; n < 16; n++) {
33993 for (size_t k = 1; k <= 5; k += 2) {
33994 GemmMicrokernelTester()
33995 .mr(1)
33996 .nr(8)
33997 .kr(1)
33998 .sr(1)
33999 .m(1)
34000 .n(n)
34001 .k(k)
34002 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34003 }
34004 }
34005 }
34006
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)34007 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
34008 for (uint32_t n = 9; n < 16; n++) {
34009 for (size_t k = 1; k <= 5; k += 2) {
34010 GemmMicrokernelTester()
34011 .mr(1)
34012 .nr(8)
34013 .kr(1)
34014 .sr(1)
34015 .m(1)
34016 .n(n)
34017 .k(k)
34018 .cn_stride(11)
34019 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34020 }
34021 }
34022 }
34023
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)34024 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
34025 for (uint32_t n = 9; n < 16; n++) {
34026 for (size_t k = 1; k <= 5; k += 2) {
34027 for (uint32_t m = 1; m <= 1; m++) {
34028 GemmMicrokernelTester()
34029 .mr(1)
34030 .nr(8)
34031 .kr(1)
34032 .sr(1)
34033 .m(m)
34034 .n(n)
34035 .k(k)
34036 .iterations(1)
34037 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34038 }
34039 }
34040 }
34041 }
34042
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)34043 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
34044 for (uint32_t n = 16; n <= 24; n += 8) {
34045 for (size_t k = 1; k <= 5; k += 2) {
34046 GemmMicrokernelTester()
34047 .mr(1)
34048 .nr(8)
34049 .kr(1)
34050 .sr(1)
34051 .m(1)
34052 .n(n)
34053 .k(k)
34054 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34055 }
34056 }
34057 }
34058
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)34059 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
34060 for (uint32_t n = 16; n <= 24; n += 8) {
34061 for (size_t k = 1; k <= 5; k += 2) {
34062 GemmMicrokernelTester()
34063 .mr(1)
34064 .nr(8)
34065 .kr(1)
34066 .sr(1)
34067 .m(1)
34068 .n(n)
34069 .k(k)
34070 .cn_stride(11)
34071 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34072 }
34073 }
34074 }
34075
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)34076 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
34077 for (uint32_t n = 16; n <= 24; n += 8) {
34078 for (size_t k = 1; k <= 5; k += 2) {
34079 for (uint32_t m = 1; m <= 1; m++) {
34080 GemmMicrokernelTester()
34081 .mr(1)
34082 .nr(8)
34083 .kr(1)
34084 .sr(1)
34085 .m(m)
34086 .n(n)
34087 .k(k)
34088 .iterations(1)
34089 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34090 }
34091 }
34092 }
34093 }
34094
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel)34095 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel) {
34096 for (size_t k = 1; k <= 5; k += 2) {
34097 GemmMicrokernelTester()
34098 .mr(1)
34099 .nr(8)
34100 .kr(1)
34101 .sr(1)
34102 .m(1)
34103 .n(8)
34104 .k(k)
34105 .ks(3)
34106 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34107 }
34108 }
34109
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel_subtile)34110 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel_subtile) {
34111 for (size_t k = 1; k <= 5; k += 2) {
34112 for (uint32_t n = 1; n <= 8; n++) {
34113 for (uint32_t m = 1; m <= 1; m++) {
34114 GemmMicrokernelTester()
34115 .mr(1)
34116 .nr(8)
34117 .kr(1)
34118 .sr(1)
34119 .m(m)
34120 .n(n)
34121 .k(k)
34122 .ks(3)
34123 .iterations(1)
34124 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34125 }
34126 }
34127 }
34128 }
34129
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_small_kernel)34130 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_small_kernel) {
34131 for (uint32_t n = 9; n < 16; n++) {
34132 for (size_t k = 1; k <= 5; k += 2) {
34133 GemmMicrokernelTester()
34134 .mr(1)
34135 .nr(8)
34136 .kr(1)
34137 .sr(1)
34138 .m(1)
34139 .n(n)
34140 .k(k)
34141 .ks(3)
34142 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34143 }
34144 }
34145 }
34146
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_small_kernel)34147 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_small_kernel) {
34148 for (uint32_t n = 16; n <= 24; n += 8) {
34149 for (size_t k = 1; k <= 5; k += 2) {
34150 GemmMicrokernelTester()
34151 .mr(1)
34152 .nr(8)
34153 .kr(1)
34154 .sr(1)
34155 .m(1)
34156 .n(n)
34157 .k(k)
34158 .ks(3)
34159 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34160 }
34161 }
34162 }
34163
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)34164 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
34165 for (size_t k = 1; k <= 5; k += 2) {
34166 for (uint32_t n = 1; n <= 8; n++) {
34167 for (uint32_t m = 1; m <= 1; m++) {
34168 GemmMicrokernelTester()
34169 .mr(1)
34170 .nr(8)
34171 .kr(1)
34172 .sr(1)
34173 .m(m)
34174 .n(n)
34175 .k(k)
34176 .cm_stride(11)
34177 .iterations(1)
34178 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34179 }
34180 }
34181 }
34182 }
34183
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,a_offset)34184 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, a_offset) {
34185 for (size_t k = 1; k <= 5; k += 2) {
34186 GemmMicrokernelTester()
34187 .mr(1)
34188 .nr(8)
34189 .kr(1)
34190 .sr(1)
34191 .m(1)
34192 .n(8)
34193 .k(k)
34194 .ks(3)
34195 .a_offset(7)
34196 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34197 }
34198 }
34199
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,zero)34200 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, zero) {
34201 for (size_t k = 1; k <= 5; k += 2) {
34202 for (uint32_t mz = 0; mz < 1; mz++) {
34203 GemmMicrokernelTester()
34204 .mr(1)
34205 .nr(8)
34206 .kr(1)
34207 .sr(1)
34208 .m(1)
34209 .n(8)
34210 .k(k)
34211 .ks(3)
34212 .a_offset(7)
34213 .zero_index(mz)
34214 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34215 }
34216 }
34217 }
34218
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)34219 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
34220 GemmMicrokernelTester()
34221 .mr(1)
34222 .nr(8)
34223 .kr(1)
34224 .sr(1)
34225 .m(1)
34226 .n(8)
34227 .k(1)
34228 .qmin(128)
34229 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34230 }
34231
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)34232 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
34233 GemmMicrokernelTester()
34234 .mr(1)
34235 .nr(8)
34236 .kr(1)
34237 .sr(1)
34238 .m(1)
34239 .n(8)
34240 .k(1)
34241 .qmax(128)
34242 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34243 }
34244
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)34245 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
34246 GemmMicrokernelTester()
34247 .mr(1)
34248 .nr(8)
34249 .kr(1)
34250 .sr(1)
34251 .m(1)
34252 .n(8)
34253 .k(1)
34254 .cm_stride(11)
34255 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34256 }
34257 #endif // XNN_ARCH_WASMRELAXEDSIMD
34258
34259
34260 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)34261 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
34262 GemmMicrokernelTester()
34263 .mr(1)
34264 .nr(8)
34265 .kr(1)
34266 .sr(1)
34267 .m(1)
34268 .n(8)
34269 .k(4)
34270 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34271 }
34272
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,strided_cn)34273 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
34274 GemmMicrokernelTester()
34275 .mr(1)
34276 .nr(8)
34277 .kr(1)
34278 .sr(1)
34279 .m(1)
34280 .n(8)
34281 .k(4)
34282 .cn_stride(11)
34283 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34284 }
34285
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)34286 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
34287 for (uint32_t n = 1; n <= 8; n++) {
34288 for (uint32_t m = 1; m <= 1; m++) {
34289 GemmMicrokernelTester()
34290 .mr(1)
34291 .nr(8)
34292 .kr(1)
34293 .sr(1)
34294 .m(m)
34295 .n(n)
34296 .k(4)
34297 .iterations(1)
34298 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34299 }
34300 }
34301 }
34302
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)34303 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
34304 for (uint32_t m = 1; m <= 1; m++) {
34305 GemmMicrokernelTester()
34306 .mr(1)
34307 .nr(8)
34308 .kr(1)
34309 .sr(1)
34310 .m(m)
34311 .n(8)
34312 .k(4)
34313 .iterations(1)
34314 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34315 }
34316 }
34317
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)34318 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
34319 for (uint32_t n = 1; n <= 8; n++) {
34320 GemmMicrokernelTester()
34321 .mr(1)
34322 .nr(8)
34323 .kr(1)
34324 .sr(1)
34325 .m(1)
34326 .n(n)
34327 .k(4)
34328 .iterations(1)
34329 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34330 }
34331 }
34332
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)34333 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
34334 for (size_t k = 1; k < 4; k++) {
34335 GemmMicrokernelTester()
34336 .mr(1)
34337 .nr(8)
34338 .kr(1)
34339 .sr(1)
34340 .m(1)
34341 .n(8)
34342 .k(k)
34343 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34344 }
34345 }
34346
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)34347 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
34348 for (size_t k = 1; k < 4; k++) {
34349 for (uint32_t n = 1; n <= 8; n++) {
34350 for (uint32_t m = 1; m <= 1; m++) {
34351 GemmMicrokernelTester()
34352 .mr(1)
34353 .nr(8)
34354 .kr(1)
34355 .sr(1)
34356 .m(m)
34357 .n(n)
34358 .k(k)
34359 .iterations(1)
34360 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34361 }
34362 }
34363 }
34364 }
34365
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)34366 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
34367 for (size_t k = 5; k < 8; k++) {
34368 GemmMicrokernelTester()
34369 .mr(1)
34370 .nr(8)
34371 .kr(1)
34372 .sr(1)
34373 .m(1)
34374 .n(8)
34375 .k(k)
34376 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34377 }
34378 }
34379
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)34380 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
34381 for (size_t k = 5; k < 8; k++) {
34382 for (uint32_t n = 1; n <= 8; n++) {
34383 for (uint32_t m = 1; m <= 1; m++) {
34384 GemmMicrokernelTester()
34385 .mr(1)
34386 .nr(8)
34387 .kr(1)
34388 .sr(1)
34389 .m(m)
34390 .n(n)
34391 .k(k)
34392 .iterations(1)
34393 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34394 }
34395 }
34396 }
34397 }
34398
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_div_4)34399 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
34400 for (size_t k = 8; k <= 40; k += 4) {
34401 GemmMicrokernelTester()
34402 .mr(1)
34403 .nr(8)
34404 .kr(1)
34405 .sr(1)
34406 .m(1)
34407 .n(8)
34408 .k(k)
34409 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34410 }
34411 }
34412
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)34413 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
34414 for (size_t k = 8; k <= 40; k += 4) {
34415 for (uint32_t n = 1; n <= 8; n++) {
34416 for (uint32_t m = 1; m <= 1; m++) {
34417 GemmMicrokernelTester()
34418 .mr(1)
34419 .nr(8)
34420 .kr(1)
34421 .sr(1)
34422 .m(m)
34423 .n(n)
34424 .k(k)
34425 .iterations(1)
34426 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34427 }
34428 }
34429 }
34430 }
34431
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)34432 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
34433 for (uint32_t n = 9; n < 16; n++) {
34434 for (size_t k = 1; k <= 20; k += 5) {
34435 GemmMicrokernelTester()
34436 .mr(1)
34437 .nr(8)
34438 .kr(1)
34439 .sr(1)
34440 .m(1)
34441 .n(n)
34442 .k(k)
34443 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34444 }
34445 }
34446 }
34447
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)34448 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
34449 for (uint32_t n = 9; n < 16; n++) {
34450 for (size_t k = 1; k <= 20; k += 5) {
34451 GemmMicrokernelTester()
34452 .mr(1)
34453 .nr(8)
34454 .kr(1)
34455 .sr(1)
34456 .m(1)
34457 .n(n)
34458 .k(k)
34459 .cn_stride(11)
34460 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34461 }
34462 }
34463 }
34464
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)34465 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
34466 for (uint32_t n = 9; n < 16; n++) {
34467 for (size_t k = 1; k <= 20; k += 5) {
34468 for (uint32_t m = 1; m <= 1; m++) {
34469 GemmMicrokernelTester()
34470 .mr(1)
34471 .nr(8)
34472 .kr(1)
34473 .sr(1)
34474 .m(m)
34475 .n(n)
34476 .k(k)
34477 .iterations(1)
34478 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34479 }
34480 }
34481 }
34482 }
34483
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8)34484 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
34485 for (uint32_t n = 16; n <= 24; n += 8) {
34486 for (size_t k = 1; k <= 20; k += 5) {
34487 GemmMicrokernelTester()
34488 .mr(1)
34489 .nr(8)
34490 .kr(1)
34491 .sr(1)
34492 .m(1)
34493 .n(n)
34494 .k(k)
34495 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34496 }
34497 }
34498 }
34499
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)34500 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
34501 for (uint32_t n = 16; n <= 24; n += 8) {
34502 for (size_t k = 1; k <= 20; k += 5) {
34503 GemmMicrokernelTester()
34504 .mr(1)
34505 .nr(8)
34506 .kr(1)
34507 .sr(1)
34508 .m(1)
34509 .n(n)
34510 .k(k)
34511 .cn_stride(11)
34512 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34513 }
34514 }
34515 }
34516
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)34517 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
34518 for (uint32_t n = 16; n <= 24; n += 8) {
34519 for (size_t k = 1; k <= 20; k += 5) {
34520 for (uint32_t m = 1; m <= 1; m++) {
34521 GemmMicrokernelTester()
34522 .mr(1)
34523 .nr(8)
34524 .kr(1)
34525 .sr(1)
34526 .m(m)
34527 .n(n)
34528 .k(k)
34529 .iterations(1)
34530 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34531 }
34532 }
34533 }
34534 }
34535
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,small_kernel)34536 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, small_kernel) {
34537 for (size_t k = 1; k <= 20; k += 5) {
34538 GemmMicrokernelTester()
34539 .mr(1)
34540 .nr(8)
34541 .kr(1)
34542 .sr(1)
34543 .m(1)
34544 .n(8)
34545 .k(k)
34546 .ks(3)
34547 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34548 }
34549 }
34550
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,small_kernel_subtile)34551 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, small_kernel_subtile) {
34552 for (size_t k = 1; k <= 20; k += 5) {
34553 for (uint32_t n = 1; n <= 8; n++) {
34554 for (uint32_t m = 1; m <= 1; m++) {
34555 GemmMicrokernelTester()
34556 .mr(1)
34557 .nr(8)
34558 .kr(1)
34559 .sr(1)
34560 .m(m)
34561 .n(n)
34562 .k(k)
34563 .ks(3)
34564 .iterations(1)
34565 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34566 }
34567 }
34568 }
34569 }
34570
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_small_kernel)34571 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_small_kernel) {
34572 for (uint32_t n = 9; n < 16; n++) {
34573 for (size_t k = 1; k <= 20; k += 5) {
34574 GemmMicrokernelTester()
34575 .mr(1)
34576 .nr(8)
34577 .kr(1)
34578 .sr(1)
34579 .m(1)
34580 .n(n)
34581 .k(k)
34582 .ks(3)
34583 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34584 }
34585 }
34586 }
34587
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8_small_kernel)34588 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8_small_kernel) {
34589 for (uint32_t n = 16; n <= 24; n += 8) {
34590 for (size_t k = 1; k <= 20; k += 5) {
34591 GemmMicrokernelTester()
34592 .mr(1)
34593 .nr(8)
34594 .kr(1)
34595 .sr(1)
34596 .m(1)
34597 .n(n)
34598 .k(k)
34599 .ks(3)
34600 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34601 }
34602 }
34603 }
34604
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)34605 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
34606 for (size_t k = 1; k <= 20; k += 5) {
34607 for (uint32_t n = 1; n <= 8; n++) {
34608 for (uint32_t m = 1; m <= 1; m++) {
34609 GemmMicrokernelTester()
34610 .mr(1)
34611 .nr(8)
34612 .kr(1)
34613 .sr(1)
34614 .m(m)
34615 .n(n)
34616 .k(k)
34617 .cm_stride(11)
34618 .iterations(1)
34619 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34620 }
34621 }
34622 }
34623 }
34624
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,a_offset)34625 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, a_offset) {
34626 for (size_t k = 1; k <= 20; k += 5) {
34627 GemmMicrokernelTester()
34628 .mr(1)
34629 .nr(8)
34630 .kr(1)
34631 .sr(1)
34632 .m(1)
34633 .n(8)
34634 .k(k)
34635 .ks(3)
34636 .a_offset(23)
34637 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34638 }
34639 }
34640
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,zero)34641 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, zero) {
34642 for (size_t k = 1; k <= 20; k += 5) {
34643 for (uint32_t mz = 0; mz < 1; mz++) {
34644 GemmMicrokernelTester()
34645 .mr(1)
34646 .nr(8)
34647 .kr(1)
34648 .sr(1)
34649 .m(1)
34650 .n(8)
34651 .k(k)
34652 .ks(3)
34653 .a_offset(23)
34654 .zero_index(mz)
34655 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34656 }
34657 }
34658 }
34659
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,qmin)34660 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, qmin) {
34661 GemmMicrokernelTester()
34662 .mr(1)
34663 .nr(8)
34664 .kr(1)
34665 .sr(1)
34666 .m(1)
34667 .n(8)
34668 .k(4)
34669 .qmin(128)
34670 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34671 }
34672
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,qmax)34673 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, qmax) {
34674 GemmMicrokernelTester()
34675 .mr(1)
34676 .nr(8)
34677 .kr(1)
34678 .sr(1)
34679 .m(1)
34680 .n(8)
34681 .k(4)
34682 .qmax(128)
34683 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34684 }
34685
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,strided_cm)34686 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
34687 GemmMicrokernelTester()
34688 .mr(1)
34689 .nr(8)
34690 .kr(1)
34691 .sr(1)
34692 .m(1)
34693 .n(8)
34694 .k(4)
34695 .cm_stride(11)
34696 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34697 }
34698 #endif // XNN_ARCH_WASMRELAXEDSIMD
34699
34700
34701 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4)34702 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4) {
34703 GemmMicrokernelTester()
34704 .mr(1)
34705 .nr(8)
34706 .kr(1)
34707 .sr(4)
34708 .m(1)
34709 .n(8)
34710 .k(4)
34711 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34712 }
34713
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,strided_cn)34714 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, strided_cn) {
34715 GemmMicrokernelTester()
34716 .mr(1)
34717 .nr(8)
34718 .kr(1)
34719 .sr(4)
34720 .m(1)
34721 .n(8)
34722 .k(4)
34723 .cn_stride(11)
34724 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34725 }
34726
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)34727 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
34728 for (uint32_t n = 1; n <= 8; n++) {
34729 for (uint32_t m = 1; m <= 1; m++) {
34730 GemmMicrokernelTester()
34731 .mr(1)
34732 .nr(8)
34733 .kr(1)
34734 .sr(4)
34735 .m(m)
34736 .n(n)
34737 .k(4)
34738 .iterations(1)
34739 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34740 }
34741 }
34742 }
34743
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)34744 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
34745 for (uint32_t m = 1; m <= 1; m++) {
34746 GemmMicrokernelTester()
34747 .mr(1)
34748 .nr(8)
34749 .kr(1)
34750 .sr(4)
34751 .m(m)
34752 .n(8)
34753 .k(4)
34754 .iterations(1)
34755 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34756 }
34757 }
34758
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)34759 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
34760 for (uint32_t n = 1; n <= 8; n++) {
34761 GemmMicrokernelTester()
34762 .mr(1)
34763 .nr(8)
34764 .kr(1)
34765 .sr(4)
34766 .m(1)
34767 .n(n)
34768 .k(4)
34769 .iterations(1)
34770 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34771 }
34772 }
34773
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_lt_4)34774 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_lt_4) {
34775 for (size_t k = 1; k < 4; k++) {
34776 GemmMicrokernelTester()
34777 .mr(1)
34778 .nr(8)
34779 .kr(1)
34780 .sr(4)
34781 .m(1)
34782 .n(8)
34783 .k(k)
34784 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34785 }
34786 }
34787
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)34788 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
34789 for (size_t k = 1; k < 4; k++) {
34790 for (uint32_t n = 1; n <= 8; n++) {
34791 for (uint32_t m = 1; m <= 1; m++) {
34792 GemmMicrokernelTester()
34793 .mr(1)
34794 .nr(8)
34795 .kr(1)
34796 .sr(4)
34797 .m(m)
34798 .n(n)
34799 .k(k)
34800 .iterations(1)
34801 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34802 }
34803 }
34804 }
34805 }
34806
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_gt_4)34807 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_gt_4) {
34808 for (size_t k = 5; k < 8; k++) {
34809 GemmMicrokernelTester()
34810 .mr(1)
34811 .nr(8)
34812 .kr(1)
34813 .sr(4)
34814 .m(1)
34815 .n(8)
34816 .k(k)
34817 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34818 }
34819 }
34820
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)34821 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
34822 for (size_t k = 5; k < 8; k++) {
34823 for (uint32_t n = 1; n <= 8; n++) {
34824 for (uint32_t m = 1; m <= 1; m++) {
34825 GemmMicrokernelTester()
34826 .mr(1)
34827 .nr(8)
34828 .kr(1)
34829 .sr(4)
34830 .m(m)
34831 .n(n)
34832 .k(k)
34833 .iterations(1)
34834 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34835 }
34836 }
34837 }
34838 }
34839
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_div_4)34840 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_div_4) {
34841 for (size_t k = 8; k <= 40; k += 4) {
34842 GemmMicrokernelTester()
34843 .mr(1)
34844 .nr(8)
34845 .kr(1)
34846 .sr(4)
34847 .m(1)
34848 .n(8)
34849 .k(k)
34850 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34851 }
34852 }
34853
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,k_div_4_subtile)34854 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
34855 for (size_t k = 8; k <= 40; k += 4) {
34856 for (uint32_t n = 1; n <= 8; n++) {
34857 for (uint32_t m = 1; m <= 1; m++) {
34858 GemmMicrokernelTester()
34859 .mr(1)
34860 .nr(8)
34861 .kr(1)
34862 .sr(4)
34863 .m(m)
34864 .n(n)
34865 .k(k)
34866 .iterations(1)
34867 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34868 }
34869 }
34870 }
34871 }
34872
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8)34873 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8) {
34874 for (uint32_t n = 9; n < 16; n++) {
34875 for (size_t k = 1; k <= 20; k += 5) {
34876 GemmMicrokernelTester()
34877 .mr(1)
34878 .nr(8)
34879 .kr(1)
34880 .sr(4)
34881 .m(1)
34882 .n(n)
34883 .k(k)
34884 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34885 }
34886 }
34887 }
34888
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)34889 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
34890 for (uint32_t n = 9; n < 16; n++) {
34891 for (size_t k = 1; k <= 20; k += 5) {
34892 GemmMicrokernelTester()
34893 .mr(1)
34894 .nr(8)
34895 .kr(1)
34896 .sr(4)
34897 .m(1)
34898 .n(n)
34899 .k(k)
34900 .cn_stride(11)
34901 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34902 }
34903 }
34904 }
34905
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)34906 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
34907 for (uint32_t n = 9; n < 16; n++) {
34908 for (size_t k = 1; k <= 20; k += 5) {
34909 for (uint32_t m = 1; m <= 1; m++) {
34910 GemmMicrokernelTester()
34911 .mr(1)
34912 .nr(8)
34913 .kr(1)
34914 .sr(4)
34915 .m(m)
34916 .n(n)
34917 .k(k)
34918 .iterations(1)
34919 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34920 }
34921 }
34922 }
34923 }
34924
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8)34925 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8) {
34926 for (uint32_t n = 16; n <= 24; n += 8) {
34927 for (size_t k = 1; k <= 20; k += 5) {
34928 GemmMicrokernelTester()
34929 .mr(1)
34930 .nr(8)
34931 .kr(1)
34932 .sr(4)
34933 .m(1)
34934 .n(n)
34935 .k(k)
34936 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34937 }
34938 }
34939 }
34940
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)34941 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
34942 for (uint32_t n = 16; n <= 24; n += 8) {
34943 for (size_t k = 1; k <= 20; k += 5) {
34944 GemmMicrokernelTester()
34945 .mr(1)
34946 .nr(8)
34947 .kr(1)
34948 .sr(4)
34949 .m(1)
34950 .n(n)
34951 .k(k)
34952 .cn_stride(11)
34953 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34954 }
34955 }
34956 }
34957
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8_subtile)34958 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
34959 for (uint32_t n = 16; n <= 24; n += 8) {
34960 for (size_t k = 1; k <= 20; k += 5) {
34961 for (uint32_t m = 1; m <= 1; m++) {
34962 GemmMicrokernelTester()
34963 .mr(1)
34964 .nr(8)
34965 .kr(1)
34966 .sr(4)
34967 .m(m)
34968 .n(n)
34969 .k(k)
34970 .iterations(1)
34971 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34972 }
34973 }
34974 }
34975 }
34976
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,small_kernel)34977 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, small_kernel) {
34978 for (size_t k = 1; k <= 20; k += 5) {
34979 GemmMicrokernelTester()
34980 .mr(1)
34981 .nr(8)
34982 .kr(1)
34983 .sr(4)
34984 .m(1)
34985 .n(8)
34986 .k(k)
34987 .ks(3)
34988 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34989 }
34990 }
34991
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,small_kernel_subtile)34992 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, small_kernel_subtile) {
34993 for (size_t k = 1; k <= 20; k += 5) {
34994 for (uint32_t n = 1; n <= 8; n++) {
34995 for (uint32_t m = 1; m <= 1; m++) {
34996 GemmMicrokernelTester()
34997 .mr(1)
34998 .nr(8)
34999 .kr(1)
35000 .sr(4)
35001 .m(m)
35002 .n(n)
35003 .k(k)
35004 .ks(3)
35005 .iterations(1)
35006 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35007 }
35008 }
35009 }
35010 }
35011
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8_small_kernel)35012 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8_small_kernel) {
35013 for (uint32_t n = 9; n < 16; n++) {
35014 for (size_t k = 1; k <= 20; k += 5) {
35015 GemmMicrokernelTester()
35016 .mr(1)
35017 .nr(8)
35018 .kr(1)
35019 .sr(4)
35020 .m(1)
35021 .n(n)
35022 .k(k)
35023 .ks(3)
35024 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35025 }
35026 }
35027 }
35028
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8_small_kernel)35029 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8_small_kernel) {
35030 for (uint32_t n = 16; n <= 24; n += 8) {
35031 for (size_t k = 1; k <= 20; k += 5) {
35032 GemmMicrokernelTester()
35033 .mr(1)
35034 .nr(8)
35035 .kr(1)
35036 .sr(4)
35037 .m(1)
35038 .n(n)
35039 .k(k)
35040 .ks(3)
35041 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35042 }
35043 }
35044 }
35045
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,strided_cm_subtile)35046 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
35047 for (size_t k = 1; k <= 20; k += 5) {
35048 for (uint32_t n = 1; n <= 8; n++) {
35049 for (uint32_t m = 1; m <= 1; m++) {
35050 GemmMicrokernelTester()
35051 .mr(1)
35052 .nr(8)
35053 .kr(1)
35054 .sr(4)
35055 .m(m)
35056 .n(n)
35057 .k(k)
35058 .cm_stride(11)
35059 .iterations(1)
35060 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35061 }
35062 }
35063 }
35064 }
35065
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,a_offset)35066 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, a_offset) {
35067 for (size_t k = 1; k <= 20; k += 5) {
35068 GemmMicrokernelTester()
35069 .mr(1)
35070 .nr(8)
35071 .kr(1)
35072 .sr(4)
35073 .m(1)
35074 .n(8)
35075 .k(k)
35076 .ks(3)
35077 .a_offset(23)
35078 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35079 }
35080 }
35081
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,zero)35082 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, zero) {
35083 for (size_t k = 1; k <= 20; k += 5) {
35084 for (uint32_t mz = 0; mz < 1; mz++) {
35085 GemmMicrokernelTester()
35086 .mr(1)
35087 .nr(8)
35088 .kr(1)
35089 .sr(4)
35090 .m(1)
35091 .n(8)
35092 .k(k)
35093 .ks(3)
35094 .a_offset(23)
35095 .zero_index(mz)
35096 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35097 }
35098 }
35099 }
35100
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,qmin)35101 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, qmin) {
35102 GemmMicrokernelTester()
35103 .mr(1)
35104 .nr(8)
35105 .kr(1)
35106 .sr(4)
35107 .m(1)
35108 .n(8)
35109 .k(4)
35110 .qmin(128)
35111 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35112 }
35113
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,qmax)35114 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, qmax) {
35115 GemmMicrokernelTester()
35116 .mr(1)
35117 .nr(8)
35118 .kr(1)
35119 .sr(4)
35120 .m(1)
35121 .n(8)
35122 .k(4)
35123 .qmax(128)
35124 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35125 }
35126
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD,strided_cm)35127 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD, strided_cm) {
35128 GemmMicrokernelTester()
35129 .mr(1)
35130 .nr(8)
35131 .kr(1)
35132 .sr(4)
35133 .m(1)
35134 .n(8)
35135 .k(4)
35136 .cm_stride(11)
35137 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35138 }
35139 #endif // XNN_ARCH_WASMRELAXEDSIMD
35140
35141
35142 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)35143 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
35144 GemmMicrokernelTester()
35145 .mr(1)
35146 .nr(8)
35147 .kr(1)
35148 .sr(4)
35149 .m(1)
35150 .n(8)
35151 .k(4)
35152 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35153 }
35154
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,strided_cn)35155 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
35156 GemmMicrokernelTester()
35157 .mr(1)
35158 .nr(8)
35159 .kr(1)
35160 .sr(4)
35161 .m(1)
35162 .n(8)
35163 .k(4)
35164 .cn_stride(11)
35165 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35166 }
35167
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)35168 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
35169 for (uint32_t n = 1; n <= 8; n++) {
35170 for (uint32_t m = 1; m <= 1; m++) {
35171 GemmMicrokernelTester()
35172 .mr(1)
35173 .nr(8)
35174 .kr(1)
35175 .sr(4)
35176 .m(m)
35177 .n(n)
35178 .k(4)
35179 .iterations(1)
35180 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35181 }
35182 }
35183 }
35184
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)35185 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
35186 for (uint32_t m = 1; m <= 1; m++) {
35187 GemmMicrokernelTester()
35188 .mr(1)
35189 .nr(8)
35190 .kr(1)
35191 .sr(4)
35192 .m(m)
35193 .n(8)
35194 .k(4)
35195 .iterations(1)
35196 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35197 }
35198 }
35199
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)35200 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
35201 for (uint32_t n = 1; n <= 8; n++) {
35202 GemmMicrokernelTester()
35203 .mr(1)
35204 .nr(8)
35205 .kr(1)
35206 .sr(4)
35207 .m(1)
35208 .n(n)
35209 .k(4)
35210 .iterations(1)
35211 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35212 }
35213 }
35214
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)35215 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
35216 for (size_t k = 1; k < 4; k++) {
35217 GemmMicrokernelTester()
35218 .mr(1)
35219 .nr(8)
35220 .kr(1)
35221 .sr(4)
35222 .m(1)
35223 .n(8)
35224 .k(k)
35225 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35226 }
35227 }
35228
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)35229 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
35230 for (size_t k = 1; k < 4; k++) {
35231 for (uint32_t n = 1; n <= 8; n++) {
35232 for (uint32_t m = 1; m <= 1; m++) {
35233 GemmMicrokernelTester()
35234 .mr(1)
35235 .nr(8)
35236 .kr(1)
35237 .sr(4)
35238 .m(m)
35239 .n(n)
35240 .k(k)
35241 .iterations(1)
35242 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35243 }
35244 }
35245 }
35246 }
35247
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)35248 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
35249 for (size_t k = 5; k < 8; k++) {
35250 GemmMicrokernelTester()
35251 .mr(1)
35252 .nr(8)
35253 .kr(1)
35254 .sr(4)
35255 .m(1)
35256 .n(8)
35257 .k(k)
35258 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35259 }
35260 }
35261
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)35262 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
35263 for (size_t k = 5; k < 8; k++) {
35264 for (uint32_t n = 1; n <= 8; n++) {
35265 for (uint32_t m = 1; m <= 1; m++) {
35266 GemmMicrokernelTester()
35267 .mr(1)
35268 .nr(8)
35269 .kr(1)
35270 .sr(4)
35271 .m(m)
35272 .n(n)
35273 .k(k)
35274 .iterations(1)
35275 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35276 }
35277 }
35278 }
35279 }
35280
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4)35281 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
35282 for (size_t k = 8; k <= 40; k += 4) {
35283 GemmMicrokernelTester()
35284 .mr(1)
35285 .nr(8)
35286 .kr(1)
35287 .sr(4)
35288 .m(1)
35289 .n(8)
35290 .k(k)
35291 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35292 }
35293 }
35294
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)35295 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
35296 for (size_t k = 8; k <= 40; k += 4) {
35297 for (uint32_t n = 1; n <= 8; n++) {
35298 for (uint32_t m = 1; m <= 1; m++) {
35299 GemmMicrokernelTester()
35300 .mr(1)
35301 .nr(8)
35302 .kr(1)
35303 .sr(4)
35304 .m(m)
35305 .n(n)
35306 .k(k)
35307 .iterations(1)
35308 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35309 }
35310 }
35311 }
35312 }
35313
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)35314 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
35315 for (uint32_t n = 9; n < 16; n++) {
35316 for (size_t k = 1; k <= 20; k += 5) {
35317 GemmMicrokernelTester()
35318 .mr(1)
35319 .nr(8)
35320 .kr(1)
35321 .sr(4)
35322 .m(1)
35323 .n(n)
35324 .k(k)
35325 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35326 }
35327 }
35328 }
35329
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)35330 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
35331 for (uint32_t n = 9; n < 16; n++) {
35332 for (size_t k = 1; k <= 20; k += 5) {
35333 GemmMicrokernelTester()
35334 .mr(1)
35335 .nr(8)
35336 .kr(1)
35337 .sr(4)
35338 .m(1)
35339 .n(n)
35340 .k(k)
35341 .cn_stride(11)
35342 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35343 }
35344 }
35345 }
35346
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)35347 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
35348 for (uint32_t n = 9; n < 16; n++) {
35349 for (size_t k = 1; k <= 20; k += 5) {
35350 for (uint32_t m = 1; m <= 1; m++) {
35351 GemmMicrokernelTester()
35352 .mr(1)
35353 .nr(8)
35354 .kr(1)
35355 .sr(4)
35356 .m(m)
35357 .n(n)
35358 .k(k)
35359 .iterations(1)
35360 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35361 }
35362 }
35363 }
35364 }
35365
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8)35366 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
35367 for (uint32_t n = 16; n <= 24; n += 8) {
35368 for (size_t k = 1; k <= 20; k += 5) {
35369 GemmMicrokernelTester()
35370 .mr(1)
35371 .nr(8)
35372 .kr(1)
35373 .sr(4)
35374 .m(1)
35375 .n(n)
35376 .k(k)
35377 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35378 }
35379 }
35380 }
35381
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)35382 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
35383 for (uint32_t n = 16; n <= 24; n += 8) {
35384 for (size_t k = 1; k <= 20; k += 5) {
35385 GemmMicrokernelTester()
35386 .mr(1)
35387 .nr(8)
35388 .kr(1)
35389 .sr(4)
35390 .m(1)
35391 .n(n)
35392 .k(k)
35393 .cn_stride(11)
35394 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35395 }
35396 }
35397 }
35398
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)35399 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
35400 for (uint32_t n = 16; n <= 24; n += 8) {
35401 for (size_t k = 1; k <= 20; k += 5) {
35402 for (uint32_t m = 1; m <= 1; m++) {
35403 GemmMicrokernelTester()
35404 .mr(1)
35405 .nr(8)
35406 .kr(1)
35407 .sr(4)
35408 .m(m)
35409 .n(n)
35410 .k(k)
35411 .iterations(1)
35412 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35413 }
35414 }
35415 }
35416 }
35417
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,small_kernel)35418 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
35419 for (size_t k = 1; k <= 20; k += 5) {
35420 GemmMicrokernelTester()
35421 .mr(1)
35422 .nr(8)
35423 .kr(1)
35424 .sr(4)
35425 .m(1)
35426 .n(8)
35427 .k(k)
35428 .ks(3)
35429 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35430 }
35431 }
35432
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)35433 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
35434 for (size_t k = 1; k <= 20; k += 5) {
35435 for (uint32_t n = 1; n <= 8; n++) {
35436 for (uint32_t m = 1; m <= 1; m++) {
35437 GemmMicrokernelTester()
35438 .mr(1)
35439 .nr(8)
35440 .kr(1)
35441 .sr(4)
35442 .m(m)
35443 .n(n)
35444 .k(k)
35445 .ks(3)
35446 .iterations(1)
35447 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35448 }
35449 }
35450 }
35451 }
35452
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)35453 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
35454 for (uint32_t n = 9; n < 16; n++) {
35455 for (size_t k = 1; k <= 20; k += 5) {
35456 GemmMicrokernelTester()
35457 .mr(1)
35458 .nr(8)
35459 .kr(1)
35460 .sr(4)
35461 .m(1)
35462 .n(n)
35463 .k(k)
35464 .ks(3)
35465 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35466 }
35467 }
35468 }
35469
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)35470 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
35471 for (uint32_t n = 16; n <= 24; n += 8) {
35472 for (size_t k = 1; k <= 20; k += 5) {
35473 GemmMicrokernelTester()
35474 .mr(1)
35475 .nr(8)
35476 .kr(1)
35477 .sr(4)
35478 .m(1)
35479 .n(n)
35480 .k(k)
35481 .ks(3)
35482 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35483 }
35484 }
35485 }
35486
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)35487 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
35488 for (size_t k = 1; k <= 20; k += 5) {
35489 for (uint32_t n = 1; n <= 8; n++) {
35490 for (uint32_t m = 1; m <= 1; m++) {
35491 GemmMicrokernelTester()
35492 .mr(1)
35493 .nr(8)
35494 .kr(1)
35495 .sr(4)
35496 .m(m)
35497 .n(n)
35498 .k(k)
35499 .cm_stride(11)
35500 .iterations(1)
35501 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35502 }
35503 }
35504 }
35505 }
35506
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,a_offset)35507 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
35508 for (size_t k = 1; k <= 20; k += 5) {
35509 GemmMicrokernelTester()
35510 .mr(1)
35511 .nr(8)
35512 .kr(1)
35513 .sr(4)
35514 .m(1)
35515 .n(8)
35516 .k(k)
35517 .ks(3)
35518 .a_offset(23)
35519 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35520 }
35521 }
35522
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,zero)35523 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, zero) {
35524 for (size_t k = 1; k <= 20; k += 5) {
35525 for (uint32_t mz = 0; mz < 1; mz++) {
35526 GemmMicrokernelTester()
35527 .mr(1)
35528 .nr(8)
35529 .kr(1)
35530 .sr(4)
35531 .m(1)
35532 .n(8)
35533 .k(k)
35534 .ks(3)
35535 .a_offset(23)
35536 .zero_index(mz)
35537 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35538 }
35539 }
35540 }
35541
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,qmin)35542 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, qmin) {
35543 GemmMicrokernelTester()
35544 .mr(1)
35545 .nr(8)
35546 .kr(1)
35547 .sr(4)
35548 .m(1)
35549 .n(8)
35550 .k(4)
35551 .qmin(128)
35552 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35553 }
35554
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,qmax)35555 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, qmax) {
35556 GemmMicrokernelTester()
35557 .mr(1)
35558 .nr(8)
35559 .kr(1)
35560 .sr(4)
35561 .m(1)
35562 .n(8)
35563 .k(4)
35564 .qmax(128)
35565 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35566 }
35567
TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm)35568 TEST(F32_IGEMM_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
35569 GemmMicrokernelTester()
35570 .mr(1)
35571 .nr(8)
35572 .kr(1)
35573 .sr(4)
35574 .m(1)
35575 .n(8)
35576 .k(4)
35577 .cm_stride(11)
35578 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35579 }
35580 #endif // XNN_ARCH_WASMRELAXEDSIMD
35581
35582
35583 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)35584 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
35585 GemmMicrokernelTester()
35586 .mr(3)
35587 .nr(8)
35588 .kr(1)
35589 .sr(1)
35590 .m(3)
35591 .n(8)
35592 .k(1)
35593 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35594 }
35595
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)35596 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
35597 GemmMicrokernelTester()
35598 .mr(3)
35599 .nr(8)
35600 .kr(1)
35601 .sr(1)
35602 .m(3)
35603 .n(8)
35604 .k(1)
35605 .cn_stride(11)
35606 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35607 }
35608
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)35609 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
35610 for (uint32_t n = 1; n <= 8; n++) {
35611 for (uint32_t m = 1; m <= 3; m++) {
35612 GemmMicrokernelTester()
35613 .mr(3)
35614 .nr(8)
35615 .kr(1)
35616 .sr(1)
35617 .m(m)
35618 .n(n)
35619 .k(1)
35620 .iterations(1)
35621 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35622 }
35623 }
35624 }
35625
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)35626 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
35627 for (uint32_t m = 1; m <= 3; m++) {
35628 GemmMicrokernelTester()
35629 .mr(3)
35630 .nr(8)
35631 .kr(1)
35632 .sr(1)
35633 .m(m)
35634 .n(8)
35635 .k(1)
35636 .iterations(1)
35637 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35638 }
35639 }
35640
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)35641 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
35642 for (uint32_t n = 1; n <= 8; n++) {
35643 GemmMicrokernelTester()
35644 .mr(3)
35645 .nr(8)
35646 .kr(1)
35647 .sr(1)
35648 .m(3)
35649 .n(n)
35650 .k(1)
35651 .iterations(1)
35652 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35653 }
35654 }
35655
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)35656 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
35657 for (size_t k = 2; k < 10; k++) {
35658 GemmMicrokernelTester()
35659 .mr(3)
35660 .nr(8)
35661 .kr(1)
35662 .sr(1)
35663 .m(3)
35664 .n(8)
35665 .k(k)
35666 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35667 }
35668 }
35669
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)35670 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
35671 for (size_t k = 2; k < 10; k++) {
35672 for (uint32_t n = 1; n <= 8; n++) {
35673 for (uint32_t m = 1; m <= 3; m++) {
35674 GemmMicrokernelTester()
35675 .mr(3)
35676 .nr(8)
35677 .kr(1)
35678 .sr(1)
35679 .m(m)
35680 .n(n)
35681 .k(k)
35682 .iterations(1)
35683 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35684 }
35685 }
35686 }
35687 }
35688
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)35689 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
35690 for (uint32_t n = 9; n < 16; n++) {
35691 for (size_t k = 1; k <= 5; k += 2) {
35692 GemmMicrokernelTester()
35693 .mr(3)
35694 .nr(8)
35695 .kr(1)
35696 .sr(1)
35697 .m(3)
35698 .n(n)
35699 .k(k)
35700 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35701 }
35702 }
35703 }
35704
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)35705 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
35706 for (uint32_t n = 9; n < 16; n++) {
35707 for (size_t k = 1; k <= 5; k += 2) {
35708 GemmMicrokernelTester()
35709 .mr(3)
35710 .nr(8)
35711 .kr(1)
35712 .sr(1)
35713 .m(3)
35714 .n(n)
35715 .k(k)
35716 .cn_stride(11)
35717 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35718 }
35719 }
35720 }
35721
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)35722 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
35723 for (uint32_t n = 9; n < 16; n++) {
35724 for (size_t k = 1; k <= 5; k += 2) {
35725 for (uint32_t m = 1; m <= 3; m++) {
35726 GemmMicrokernelTester()
35727 .mr(3)
35728 .nr(8)
35729 .kr(1)
35730 .sr(1)
35731 .m(m)
35732 .n(n)
35733 .k(k)
35734 .iterations(1)
35735 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35736 }
35737 }
35738 }
35739 }
35740
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)35741 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
35742 for (uint32_t n = 16; n <= 24; n += 8) {
35743 for (size_t k = 1; k <= 5; k += 2) {
35744 GemmMicrokernelTester()
35745 .mr(3)
35746 .nr(8)
35747 .kr(1)
35748 .sr(1)
35749 .m(3)
35750 .n(n)
35751 .k(k)
35752 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35753 }
35754 }
35755 }
35756
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)35757 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
35758 for (uint32_t n = 16; n <= 24; n += 8) {
35759 for (size_t k = 1; k <= 5; k += 2) {
35760 GemmMicrokernelTester()
35761 .mr(3)
35762 .nr(8)
35763 .kr(1)
35764 .sr(1)
35765 .m(3)
35766 .n(n)
35767 .k(k)
35768 .cn_stride(11)
35769 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35770 }
35771 }
35772 }
35773
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)35774 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
35775 for (uint32_t n = 16; n <= 24; n += 8) {
35776 for (size_t k = 1; k <= 5; k += 2) {
35777 for (uint32_t m = 1; m <= 3; m++) {
35778 GemmMicrokernelTester()
35779 .mr(3)
35780 .nr(8)
35781 .kr(1)
35782 .sr(1)
35783 .m(m)
35784 .n(n)
35785 .k(k)
35786 .iterations(1)
35787 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35788 }
35789 }
35790 }
35791 }
35792
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel)35793 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel) {
35794 for (size_t k = 1; k <= 5; k += 2) {
35795 GemmMicrokernelTester()
35796 .mr(3)
35797 .nr(8)
35798 .kr(1)
35799 .sr(1)
35800 .m(3)
35801 .n(8)
35802 .k(k)
35803 .ks(3)
35804 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35805 }
35806 }
35807
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel_subtile)35808 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel_subtile) {
35809 for (size_t k = 1; k <= 5; k += 2) {
35810 for (uint32_t n = 1; n <= 8; n++) {
35811 for (uint32_t m = 1; m <= 3; m++) {
35812 GemmMicrokernelTester()
35813 .mr(3)
35814 .nr(8)
35815 .kr(1)
35816 .sr(1)
35817 .m(m)
35818 .n(n)
35819 .k(k)
35820 .ks(3)
35821 .iterations(1)
35822 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35823 }
35824 }
35825 }
35826 }
35827
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_small_kernel)35828 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_small_kernel) {
35829 for (uint32_t n = 9; n < 16; n++) {
35830 for (size_t k = 1; k <= 5; k += 2) {
35831 GemmMicrokernelTester()
35832 .mr(3)
35833 .nr(8)
35834 .kr(1)
35835 .sr(1)
35836 .m(3)
35837 .n(n)
35838 .k(k)
35839 .ks(3)
35840 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35841 }
35842 }
35843 }
35844
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_small_kernel)35845 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_small_kernel) {
35846 for (uint32_t n = 16; n <= 24; n += 8) {
35847 for (size_t k = 1; k <= 5; k += 2) {
35848 GemmMicrokernelTester()
35849 .mr(3)
35850 .nr(8)
35851 .kr(1)
35852 .sr(1)
35853 .m(3)
35854 .n(n)
35855 .k(k)
35856 .ks(3)
35857 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35858 }
35859 }
35860 }
35861
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)35862 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
35863 for (size_t k = 1; k <= 5; k += 2) {
35864 for (uint32_t n = 1; n <= 8; n++) {
35865 for (uint32_t m = 1; m <= 3; m++) {
35866 GemmMicrokernelTester()
35867 .mr(3)
35868 .nr(8)
35869 .kr(1)
35870 .sr(1)
35871 .m(m)
35872 .n(n)
35873 .k(k)
35874 .cm_stride(11)
35875 .iterations(1)
35876 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35877 }
35878 }
35879 }
35880 }
35881
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,a_offset)35882 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, a_offset) {
35883 for (size_t k = 1; k <= 5; k += 2) {
35884 GemmMicrokernelTester()
35885 .mr(3)
35886 .nr(8)
35887 .kr(1)
35888 .sr(1)
35889 .m(3)
35890 .n(8)
35891 .k(k)
35892 .ks(3)
35893 .a_offset(17)
35894 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35895 }
35896 }
35897
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,zero)35898 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, zero) {
35899 for (size_t k = 1; k <= 5; k += 2) {
35900 for (uint32_t mz = 0; mz < 3; mz++) {
35901 GemmMicrokernelTester()
35902 .mr(3)
35903 .nr(8)
35904 .kr(1)
35905 .sr(1)
35906 .m(3)
35907 .n(8)
35908 .k(k)
35909 .ks(3)
35910 .a_offset(17)
35911 .zero_index(mz)
35912 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35913 }
35914 }
35915 }
35916
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)35917 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
35918 GemmMicrokernelTester()
35919 .mr(3)
35920 .nr(8)
35921 .kr(1)
35922 .sr(1)
35923 .m(3)
35924 .n(8)
35925 .k(1)
35926 .qmin(128)
35927 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35928 }
35929
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)35930 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
35931 GemmMicrokernelTester()
35932 .mr(3)
35933 .nr(8)
35934 .kr(1)
35935 .sr(1)
35936 .m(3)
35937 .n(8)
35938 .k(1)
35939 .qmax(128)
35940 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35941 }
35942
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)35943 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
35944 GemmMicrokernelTester()
35945 .mr(3)
35946 .nr(8)
35947 .kr(1)
35948 .sr(1)
35949 .m(3)
35950 .n(8)
35951 .k(1)
35952 .cm_stride(11)
35953 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35954 }
35955 #endif // XNN_ARCH_WASMRELAXEDSIMD
35956
35957
35958 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4)35959 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4) {
35960 GemmMicrokernelTester()
35961 .mr(3)
35962 .nr(8)
35963 .kr(1)
35964 .sr(4)
35965 .m(3)
35966 .n(8)
35967 .k(4)
35968 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35969 }
35970
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,strided_cn)35971 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, strided_cn) {
35972 GemmMicrokernelTester()
35973 .mr(3)
35974 .nr(8)
35975 .kr(1)
35976 .sr(4)
35977 .m(3)
35978 .n(8)
35979 .k(4)
35980 .cn_stride(11)
35981 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35982 }
35983
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)35984 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
35985 for (uint32_t n = 1; n <= 8; n++) {
35986 for (uint32_t m = 1; m <= 3; m++) {
35987 GemmMicrokernelTester()
35988 .mr(3)
35989 .nr(8)
35990 .kr(1)
35991 .sr(4)
35992 .m(m)
35993 .n(n)
35994 .k(4)
35995 .iterations(1)
35996 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35997 }
35998 }
35999 }
36000
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)36001 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
36002 for (uint32_t m = 1; m <= 3; m++) {
36003 GemmMicrokernelTester()
36004 .mr(3)
36005 .nr(8)
36006 .kr(1)
36007 .sr(4)
36008 .m(m)
36009 .n(8)
36010 .k(4)
36011 .iterations(1)
36012 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36013 }
36014 }
36015
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)36016 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
36017 for (uint32_t n = 1; n <= 8; n++) {
36018 GemmMicrokernelTester()
36019 .mr(3)
36020 .nr(8)
36021 .kr(1)
36022 .sr(4)
36023 .m(3)
36024 .n(n)
36025 .k(4)
36026 .iterations(1)
36027 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36028 }
36029 }
36030
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_lt_4)36031 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_lt_4) {
36032 for (size_t k = 1; k < 4; k++) {
36033 GemmMicrokernelTester()
36034 .mr(3)
36035 .nr(8)
36036 .kr(1)
36037 .sr(4)
36038 .m(3)
36039 .n(8)
36040 .k(k)
36041 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36042 }
36043 }
36044
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)36045 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
36046 for (size_t k = 1; k < 4; k++) {
36047 for (uint32_t n = 1; n <= 8; n++) {
36048 for (uint32_t m = 1; m <= 3; m++) {
36049 GemmMicrokernelTester()
36050 .mr(3)
36051 .nr(8)
36052 .kr(1)
36053 .sr(4)
36054 .m(m)
36055 .n(n)
36056 .k(k)
36057 .iterations(1)
36058 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36059 }
36060 }
36061 }
36062 }
36063
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_gt_4)36064 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_gt_4) {
36065 for (size_t k = 5; k < 8; k++) {
36066 GemmMicrokernelTester()
36067 .mr(3)
36068 .nr(8)
36069 .kr(1)
36070 .sr(4)
36071 .m(3)
36072 .n(8)
36073 .k(k)
36074 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36075 }
36076 }
36077
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)36078 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
36079 for (size_t k = 5; k < 8; k++) {
36080 for (uint32_t n = 1; n <= 8; n++) {
36081 for (uint32_t m = 1; m <= 3; m++) {
36082 GemmMicrokernelTester()
36083 .mr(3)
36084 .nr(8)
36085 .kr(1)
36086 .sr(4)
36087 .m(m)
36088 .n(n)
36089 .k(k)
36090 .iterations(1)
36091 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36092 }
36093 }
36094 }
36095 }
36096
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_div_4)36097 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_div_4) {
36098 for (size_t k = 8; k <= 40; k += 4) {
36099 GemmMicrokernelTester()
36100 .mr(3)
36101 .nr(8)
36102 .kr(1)
36103 .sr(4)
36104 .m(3)
36105 .n(8)
36106 .k(k)
36107 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36108 }
36109 }
36110
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,k_div_4_subtile)36111 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
36112 for (size_t k = 8; k <= 40; k += 4) {
36113 for (uint32_t n = 1; n <= 8; n++) {
36114 for (uint32_t m = 1; m <= 3; m++) {
36115 GemmMicrokernelTester()
36116 .mr(3)
36117 .nr(8)
36118 .kr(1)
36119 .sr(4)
36120 .m(m)
36121 .n(n)
36122 .k(k)
36123 .iterations(1)
36124 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36125 }
36126 }
36127 }
36128 }
36129
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8)36130 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8) {
36131 for (uint32_t n = 9; n < 16; n++) {
36132 for (size_t k = 1; k <= 20; k += 5) {
36133 GemmMicrokernelTester()
36134 .mr(3)
36135 .nr(8)
36136 .kr(1)
36137 .sr(4)
36138 .m(3)
36139 .n(n)
36140 .k(k)
36141 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36142 }
36143 }
36144 }
36145
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)36146 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
36147 for (uint32_t n = 9; n < 16; n++) {
36148 for (size_t k = 1; k <= 20; k += 5) {
36149 GemmMicrokernelTester()
36150 .mr(3)
36151 .nr(8)
36152 .kr(1)
36153 .sr(4)
36154 .m(3)
36155 .n(n)
36156 .k(k)
36157 .cn_stride(11)
36158 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36159 }
36160 }
36161 }
36162
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)36163 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
36164 for (uint32_t n = 9; n < 16; n++) {
36165 for (size_t k = 1; k <= 20; k += 5) {
36166 for (uint32_t m = 1; m <= 3; m++) {
36167 GemmMicrokernelTester()
36168 .mr(3)
36169 .nr(8)
36170 .kr(1)
36171 .sr(4)
36172 .m(m)
36173 .n(n)
36174 .k(k)
36175 .iterations(1)
36176 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36177 }
36178 }
36179 }
36180 }
36181
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8)36182 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8) {
36183 for (uint32_t n = 16; n <= 24; n += 8) {
36184 for (size_t k = 1; k <= 20; k += 5) {
36185 GemmMicrokernelTester()
36186 .mr(3)
36187 .nr(8)
36188 .kr(1)
36189 .sr(4)
36190 .m(3)
36191 .n(n)
36192 .k(k)
36193 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36194 }
36195 }
36196 }
36197
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)36198 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
36199 for (uint32_t n = 16; n <= 24; n += 8) {
36200 for (size_t k = 1; k <= 20; k += 5) {
36201 GemmMicrokernelTester()
36202 .mr(3)
36203 .nr(8)
36204 .kr(1)
36205 .sr(4)
36206 .m(3)
36207 .n(n)
36208 .k(k)
36209 .cn_stride(11)
36210 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36211 }
36212 }
36213 }
36214
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8_subtile)36215 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
36216 for (uint32_t n = 16; n <= 24; n += 8) {
36217 for (size_t k = 1; k <= 20; k += 5) {
36218 for (uint32_t m = 1; m <= 3; m++) {
36219 GemmMicrokernelTester()
36220 .mr(3)
36221 .nr(8)
36222 .kr(1)
36223 .sr(4)
36224 .m(m)
36225 .n(n)
36226 .k(k)
36227 .iterations(1)
36228 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36229 }
36230 }
36231 }
36232 }
36233
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,small_kernel)36234 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, small_kernel) {
36235 for (size_t k = 1; k <= 20; k += 5) {
36236 GemmMicrokernelTester()
36237 .mr(3)
36238 .nr(8)
36239 .kr(1)
36240 .sr(4)
36241 .m(3)
36242 .n(8)
36243 .k(k)
36244 .ks(3)
36245 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36246 }
36247 }
36248
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,small_kernel_subtile)36249 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, small_kernel_subtile) {
36250 for (size_t k = 1; k <= 20; k += 5) {
36251 for (uint32_t n = 1; n <= 8; n++) {
36252 for (uint32_t m = 1; m <= 3; m++) {
36253 GemmMicrokernelTester()
36254 .mr(3)
36255 .nr(8)
36256 .kr(1)
36257 .sr(4)
36258 .m(m)
36259 .n(n)
36260 .k(k)
36261 .ks(3)
36262 .iterations(1)
36263 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36264 }
36265 }
36266 }
36267 }
36268
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8_small_kernel)36269 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8_small_kernel) {
36270 for (uint32_t n = 9; n < 16; n++) {
36271 for (size_t k = 1; k <= 20; k += 5) {
36272 GemmMicrokernelTester()
36273 .mr(3)
36274 .nr(8)
36275 .kr(1)
36276 .sr(4)
36277 .m(3)
36278 .n(n)
36279 .k(k)
36280 .ks(3)
36281 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36282 }
36283 }
36284 }
36285
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8_small_kernel)36286 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8_small_kernel) {
36287 for (uint32_t n = 16; n <= 24; n += 8) {
36288 for (size_t k = 1; k <= 20; k += 5) {
36289 GemmMicrokernelTester()
36290 .mr(3)
36291 .nr(8)
36292 .kr(1)
36293 .sr(4)
36294 .m(3)
36295 .n(n)
36296 .k(k)
36297 .ks(3)
36298 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36299 }
36300 }
36301 }
36302
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,strided_cm_subtile)36303 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
36304 for (size_t k = 1; k <= 20; k += 5) {
36305 for (uint32_t n = 1; n <= 8; n++) {
36306 for (uint32_t m = 1; m <= 3; m++) {
36307 GemmMicrokernelTester()
36308 .mr(3)
36309 .nr(8)
36310 .kr(1)
36311 .sr(4)
36312 .m(m)
36313 .n(n)
36314 .k(k)
36315 .cm_stride(11)
36316 .iterations(1)
36317 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36318 }
36319 }
36320 }
36321 }
36322
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,a_offset)36323 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, a_offset) {
36324 for (size_t k = 1; k <= 20; k += 5) {
36325 GemmMicrokernelTester()
36326 .mr(3)
36327 .nr(8)
36328 .kr(1)
36329 .sr(4)
36330 .m(3)
36331 .n(8)
36332 .k(k)
36333 .ks(3)
36334 .a_offset(67)
36335 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36336 }
36337 }
36338
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,zero)36339 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, zero) {
36340 for (size_t k = 1; k <= 20; k += 5) {
36341 for (uint32_t mz = 0; mz < 3; mz++) {
36342 GemmMicrokernelTester()
36343 .mr(3)
36344 .nr(8)
36345 .kr(1)
36346 .sr(4)
36347 .m(3)
36348 .n(8)
36349 .k(k)
36350 .ks(3)
36351 .a_offset(67)
36352 .zero_index(mz)
36353 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36354 }
36355 }
36356 }
36357
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,qmin)36358 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, qmin) {
36359 GemmMicrokernelTester()
36360 .mr(3)
36361 .nr(8)
36362 .kr(1)
36363 .sr(4)
36364 .m(3)
36365 .n(8)
36366 .k(4)
36367 .qmin(128)
36368 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36369 }
36370
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,qmax)36371 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, qmax) {
36372 GemmMicrokernelTester()
36373 .mr(3)
36374 .nr(8)
36375 .kr(1)
36376 .sr(4)
36377 .m(3)
36378 .n(8)
36379 .k(4)
36380 .qmax(128)
36381 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36382 }
36383
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD,strided_cm)36384 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD, strided_cm) {
36385 GemmMicrokernelTester()
36386 .mr(3)
36387 .nr(8)
36388 .kr(1)
36389 .sr(4)
36390 .m(3)
36391 .n(8)
36392 .k(4)
36393 .cm_stride(11)
36394 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36395 }
36396 #endif // XNN_ARCH_WASMRELAXEDSIMD
36397
36398
36399 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_eq_4)36400 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_eq_4) {
36401 GemmMicrokernelTester()
36402 .mr(4)
36403 .nr(2)
36404 .kr(4)
36405 .sr(1)
36406 .m(4)
36407 .n(2)
36408 .k(4)
36409 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36410 }
36411
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,strided_cn)36412 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, strided_cn) {
36413 GemmMicrokernelTester()
36414 .mr(4)
36415 .nr(2)
36416 .kr(4)
36417 .sr(1)
36418 .m(4)
36419 .n(2)
36420 .k(4)
36421 .cn_stride(5)
36422 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36423 }
36424
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_eq_4_subtile)36425 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_eq_4_subtile) {
36426 for (uint32_t n = 1; n <= 2; n++) {
36427 for (uint32_t m = 1; m <= 4; m++) {
36428 GemmMicrokernelTester()
36429 .mr(4)
36430 .nr(2)
36431 .kr(4)
36432 .sr(1)
36433 .m(m)
36434 .n(n)
36435 .k(4)
36436 .iterations(1)
36437 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36438 }
36439 }
36440 }
36441
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_eq_4_subtile_m)36442 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
36443 for (uint32_t m = 1; m <= 4; m++) {
36444 GemmMicrokernelTester()
36445 .mr(4)
36446 .nr(2)
36447 .kr(4)
36448 .sr(1)
36449 .m(m)
36450 .n(2)
36451 .k(4)
36452 .iterations(1)
36453 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36454 }
36455 }
36456
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_eq_4_subtile_n)36457 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
36458 for (uint32_t n = 1; n <= 2; n++) {
36459 GemmMicrokernelTester()
36460 .mr(4)
36461 .nr(2)
36462 .kr(4)
36463 .sr(1)
36464 .m(4)
36465 .n(n)
36466 .k(4)
36467 .iterations(1)
36468 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36469 }
36470 }
36471
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_lt_4)36472 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_lt_4) {
36473 for (size_t k = 1; k < 4; k++) {
36474 GemmMicrokernelTester()
36475 .mr(4)
36476 .nr(2)
36477 .kr(4)
36478 .sr(1)
36479 .m(4)
36480 .n(2)
36481 .k(k)
36482 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36483 }
36484 }
36485
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_lt_4_subtile)36486 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_lt_4_subtile) {
36487 for (size_t k = 1; k < 4; k++) {
36488 for (uint32_t n = 1; n <= 2; n++) {
36489 for (uint32_t m = 1; m <= 4; m++) {
36490 GemmMicrokernelTester()
36491 .mr(4)
36492 .nr(2)
36493 .kr(4)
36494 .sr(1)
36495 .m(m)
36496 .n(n)
36497 .k(k)
36498 .iterations(1)
36499 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36500 }
36501 }
36502 }
36503 }
36504
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_gt_4)36505 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_gt_4) {
36506 for (size_t k = 5; k < 8; k++) {
36507 GemmMicrokernelTester()
36508 .mr(4)
36509 .nr(2)
36510 .kr(4)
36511 .sr(1)
36512 .m(4)
36513 .n(2)
36514 .k(k)
36515 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36516 }
36517 }
36518
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_gt_4_subtile)36519 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_gt_4_subtile) {
36520 for (size_t k = 5; k < 8; k++) {
36521 for (uint32_t n = 1; n <= 2; n++) {
36522 for (uint32_t m = 1; m <= 4; m++) {
36523 GemmMicrokernelTester()
36524 .mr(4)
36525 .nr(2)
36526 .kr(4)
36527 .sr(1)
36528 .m(m)
36529 .n(n)
36530 .k(k)
36531 .iterations(1)
36532 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36533 }
36534 }
36535 }
36536 }
36537
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_div_4)36538 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_div_4) {
36539 for (size_t k = 8; k <= 40; k += 4) {
36540 GemmMicrokernelTester()
36541 .mr(4)
36542 .nr(2)
36543 .kr(4)
36544 .sr(1)
36545 .m(4)
36546 .n(2)
36547 .k(k)
36548 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36549 }
36550 }
36551
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,k_div_4_subtile)36552 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, k_div_4_subtile) {
36553 for (size_t k = 8; k <= 40; k += 4) {
36554 for (uint32_t n = 1; n <= 2; n++) {
36555 for (uint32_t m = 1; m <= 4; m++) {
36556 GemmMicrokernelTester()
36557 .mr(4)
36558 .nr(2)
36559 .kr(4)
36560 .sr(1)
36561 .m(m)
36562 .n(n)
36563 .k(k)
36564 .iterations(1)
36565 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36566 }
36567 }
36568 }
36569 }
36570
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_gt_2)36571 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_gt_2) {
36572 for (uint32_t n = 3; n < 4; n++) {
36573 for (size_t k = 1; k <= 20; k += 5) {
36574 GemmMicrokernelTester()
36575 .mr(4)
36576 .nr(2)
36577 .kr(4)
36578 .sr(1)
36579 .m(4)
36580 .n(n)
36581 .k(k)
36582 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36583 }
36584 }
36585 }
36586
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_gt_2_strided_cn)36587 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_gt_2_strided_cn) {
36588 for (uint32_t n = 3; n < 4; n++) {
36589 for (size_t k = 1; k <= 20; k += 5) {
36590 GemmMicrokernelTester()
36591 .mr(4)
36592 .nr(2)
36593 .kr(4)
36594 .sr(1)
36595 .m(4)
36596 .n(n)
36597 .k(k)
36598 .cn_stride(5)
36599 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36600 }
36601 }
36602 }
36603
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_gt_2_subtile)36604 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_gt_2_subtile) {
36605 for (uint32_t n = 3; n < 4; n++) {
36606 for (size_t k = 1; k <= 20; k += 5) {
36607 for (uint32_t m = 1; m <= 4; m++) {
36608 GemmMicrokernelTester()
36609 .mr(4)
36610 .nr(2)
36611 .kr(4)
36612 .sr(1)
36613 .m(m)
36614 .n(n)
36615 .k(k)
36616 .iterations(1)
36617 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36618 }
36619 }
36620 }
36621 }
36622
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_div_2)36623 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_div_2) {
36624 for (uint32_t n = 4; n <= 6; n += 2) {
36625 for (size_t k = 1; k <= 20; k += 5) {
36626 GemmMicrokernelTester()
36627 .mr(4)
36628 .nr(2)
36629 .kr(4)
36630 .sr(1)
36631 .m(4)
36632 .n(n)
36633 .k(k)
36634 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36635 }
36636 }
36637 }
36638
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_div_2_strided_cn)36639 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_div_2_strided_cn) {
36640 for (uint32_t n = 4; n <= 6; n += 2) {
36641 for (size_t k = 1; k <= 20; k += 5) {
36642 GemmMicrokernelTester()
36643 .mr(4)
36644 .nr(2)
36645 .kr(4)
36646 .sr(1)
36647 .m(4)
36648 .n(n)
36649 .k(k)
36650 .cn_stride(5)
36651 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36652 }
36653 }
36654 }
36655
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_div_2_subtile)36656 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_div_2_subtile) {
36657 for (uint32_t n = 4; n <= 6; n += 2) {
36658 for (size_t k = 1; k <= 20; k += 5) {
36659 for (uint32_t m = 1; m <= 4; m++) {
36660 GemmMicrokernelTester()
36661 .mr(4)
36662 .nr(2)
36663 .kr(4)
36664 .sr(1)
36665 .m(m)
36666 .n(n)
36667 .k(k)
36668 .iterations(1)
36669 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36670 }
36671 }
36672 }
36673 }
36674
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,small_kernel)36675 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, small_kernel) {
36676 for (size_t k = 1; k <= 20; k += 5) {
36677 GemmMicrokernelTester()
36678 .mr(4)
36679 .nr(2)
36680 .kr(4)
36681 .sr(1)
36682 .m(4)
36683 .n(2)
36684 .k(k)
36685 .ks(3)
36686 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36687 }
36688 }
36689
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,small_kernel_subtile)36690 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, small_kernel_subtile) {
36691 for (size_t k = 1; k <= 20; k += 5) {
36692 for (uint32_t n = 1; n <= 2; n++) {
36693 for (uint32_t m = 1; m <= 4; m++) {
36694 GemmMicrokernelTester()
36695 .mr(4)
36696 .nr(2)
36697 .kr(4)
36698 .sr(1)
36699 .m(m)
36700 .n(n)
36701 .k(k)
36702 .ks(3)
36703 .iterations(1)
36704 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36705 }
36706 }
36707 }
36708 }
36709
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_gt_2_small_kernel)36710 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_gt_2_small_kernel) {
36711 for (uint32_t n = 3; n < 4; n++) {
36712 for (size_t k = 1; k <= 20; k += 5) {
36713 GemmMicrokernelTester()
36714 .mr(4)
36715 .nr(2)
36716 .kr(4)
36717 .sr(1)
36718 .m(4)
36719 .n(n)
36720 .k(k)
36721 .ks(3)
36722 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36723 }
36724 }
36725 }
36726
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,n_div_2_small_kernel)36727 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, n_div_2_small_kernel) {
36728 for (uint32_t n = 4; n <= 6; n += 2) {
36729 for (size_t k = 1; k <= 20; k += 5) {
36730 GemmMicrokernelTester()
36731 .mr(4)
36732 .nr(2)
36733 .kr(4)
36734 .sr(1)
36735 .m(4)
36736 .n(n)
36737 .k(k)
36738 .ks(3)
36739 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36740 }
36741 }
36742 }
36743
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,strided_cm_subtile)36744 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, strided_cm_subtile) {
36745 for (size_t k = 1; k <= 20; k += 5) {
36746 for (uint32_t n = 1; n <= 2; n++) {
36747 for (uint32_t m = 1; m <= 4; m++) {
36748 GemmMicrokernelTester()
36749 .mr(4)
36750 .nr(2)
36751 .kr(4)
36752 .sr(1)
36753 .m(m)
36754 .n(n)
36755 .k(k)
36756 .cm_stride(5)
36757 .iterations(1)
36758 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36759 }
36760 }
36761 }
36762 }
36763
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,a_offset)36764 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, a_offset) {
36765 for (size_t k = 1; k <= 20; k += 5) {
36766 GemmMicrokernelTester()
36767 .mr(4)
36768 .nr(2)
36769 .kr(4)
36770 .sr(1)
36771 .m(4)
36772 .n(2)
36773 .k(k)
36774 .ks(3)
36775 .a_offset(83)
36776 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36777 }
36778 }
36779
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,zero)36780 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, zero) {
36781 for (size_t k = 1; k <= 20; k += 5) {
36782 for (uint32_t mz = 0; mz < 4; mz++) {
36783 GemmMicrokernelTester()
36784 .mr(4)
36785 .nr(2)
36786 .kr(4)
36787 .sr(1)
36788 .m(4)
36789 .n(2)
36790 .k(k)
36791 .ks(3)
36792 .a_offset(83)
36793 .zero_index(mz)
36794 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36795 }
36796 }
36797 }
36798
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,qmin)36799 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, qmin) {
36800 GemmMicrokernelTester()
36801 .mr(4)
36802 .nr(2)
36803 .kr(4)
36804 .sr(1)
36805 .m(4)
36806 .n(2)
36807 .k(4)
36808 .qmin(128)
36809 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36810 }
36811
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,qmax)36812 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, qmax) {
36813 GemmMicrokernelTester()
36814 .mr(4)
36815 .nr(2)
36816 .kr(4)
36817 .sr(1)
36818 .m(4)
36819 .n(2)
36820 .k(4)
36821 .qmax(128)
36822 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36823 }
36824
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD,strided_cm)36825 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD, strided_cm) {
36826 GemmMicrokernelTester()
36827 .mr(4)
36828 .nr(2)
36829 .kr(4)
36830 .sr(1)
36831 .m(4)
36832 .n(2)
36833 .k(4)
36834 .cm_stride(5)
36835 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36836 }
36837 #endif // XNN_ARCH_WASMRELAXEDSIMD
36838
36839
36840 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4)36841 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4) {
36842 GemmMicrokernelTester()
36843 .mr(4)
36844 .nr(2)
36845 .kr(4)
36846 .sr(1)
36847 .m(4)
36848 .n(2)
36849 .k(4)
36850 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36851 }
36852
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,strided_cn)36853 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, strided_cn) {
36854 GemmMicrokernelTester()
36855 .mr(4)
36856 .nr(2)
36857 .kr(4)
36858 .sr(1)
36859 .m(4)
36860 .n(2)
36861 .k(4)
36862 .cn_stride(5)
36863 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36864 }
36865
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)36866 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
36867 for (uint32_t n = 1; n <= 2; n++) {
36868 for (uint32_t m = 1; m <= 4; m++) {
36869 GemmMicrokernelTester()
36870 .mr(4)
36871 .nr(2)
36872 .kr(4)
36873 .sr(1)
36874 .m(m)
36875 .n(n)
36876 .k(4)
36877 .iterations(1)
36878 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36879 }
36880 }
36881 }
36882
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)36883 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
36884 for (uint32_t m = 1; m <= 4; m++) {
36885 GemmMicrokernelTester()
36886 .mr(4)
36887 .nr(2)
36888 .kr(4)
36889 .sr(1)
36890 .m(m)
36891 .n(2)
36892 .k(4)
36893 .iterations(1)
36894 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36895 }
36896 }
36897
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)36898 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
36899 for (uint32_t n = 1; n <= 2; n++) {
36900 GemmMicrokernelTester()
36901 .mr(4)
36902 .nr(2)
36903 .kr(4)
36904 .sr(1)
36905 .m(4)
36906 .n(n)
36907 .k(4)
36908 .iterations(1)
36909 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36910 }
36911 }
36912
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4)36913 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4) {
36914 for (size_t k = 1; k < 4; k++) {
36915 GemmMicrokernelTester()
36916 .mr(4)
36917 .nr(2)
36918 .kr(4)
36919 .sr(1)
36920 .m(4)
36921 .n(2)
36922 .k(k)
36923 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36924 }
36925 }
36926
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)36927 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
36928 for (size_t k = 1; k < 4; k++) {
36929 for (uint32_t n = 1; n <= 2; n++) {
36930 for (uint32_t m = 1; m <= 4; m++) {
36931 GemmMicrokernelTester()
36932 .mr(4)
36933 .nr(2)
36934 .kr(4)
36935 .sr(1)
36936 .m(m)
36937 .n(n)
36938 .k(k)
36939 .iterations(1)
36940 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36941 }
36942 }
36943 }
36944 }
36945
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4)36946 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4) {
36947 for (size_t k = 5; k < 8; k++) {
36948 GemmMicrokernelTester()
36949 .mr(4)
36950 .nr(2)
36951 .kr(4)
36952 .sr(1)
36953 .m(4)
36954 .n(2)
36955 .k(k)
36956 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36957 }
36958 }
36959
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)36960 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
36961 for (size_t k = 5; k < 8; k++) {
36962 for (uint32_t n = 1; n <= 2; n++) {
36963 for (uint32_t m = 1; m <= 4; m++) {
36964 GemmMicrokernelTester()
36965 .mr(4)
36966 .nr(2)
36967 .kr(4)
36968 .sr(1)
36969 .m(m)
36970 .n(n)
36971 .k(k)
36972 .iterations(1)
36973 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36974 }
36975 }
36976 }
36977 }
36978
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4)36979 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4) {
36980 for (size_t k = 8; k <= 40; k += 4) {
36981 GemmMicrokernelTester()
36982 .mr(4)
36983 .nr(2)
36984 .kr(4)
36985 .sr(1)
36986 .m(4)
36987 .n(2)
36988 .k(k)
36989 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36990 }
36991 }
36992
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)36993 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
36994 for (size_t k = 8; k <= 40; k += 4) {
36995 for (uint32_t n = 1; n <= 2; n++) {
36996 for (uint32_t m = 1; m <= 4; m++) {
36997 GemmMicrokernelTester()
36998 .mr(4)
36999 .nr(2)
37000 .kr(4)
37001 .sr(1)
37002 .m(m)
37003 .n(n)
37004 .k(k)
37005 .iterations(1)
37006 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37007 }
37008 }
37009 }
37010 }
37011
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2)37012 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2) {
37013 for (uint32_t n = 3; n < 4; n++) {
37014 for (size_t k = 1; k <= 20; k += 5) {
37015 GemmMicrokernelTester()
37016 .mr(4)
37017 .nr(2)
37018 .kr(4)
37019 .sr(1)
37020 .m(4)
37021 .n(n)
37022 .k(k)
37023 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37024 }
37025 }
37026 }
37027
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_strided_cn)37028 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_strided_cn) {
37029 for (uint32_t n = 3; n < 4; n++) {
37030 for (size_t k = 1; k <= 20; k += 5) {
37031 GemmMicrokernelTester()
37032 .mr(4)
37033 .nr(2)
37034 .kr(4)
37035 .sr(1)
37036 .m(4)
37037 .n(n)
37038 .k(k)
37039 .cn_stride(5)
37040 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37041 }
37042 }
37043 }
37044
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_subtile)37045 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_subtile) {
37046 for (uint32_t n = 3; n < 4; n++) {
37047 for (size_t k = 1; k <= 20; k += 5) {
37048 for (uint32_t m = 1; m <= 4; m++) {
37049 GemmMicrokernelTester()
37050 .mr(4)
37051 .nr(2)
37052 .kr(4)
37053 .sr(1)
37054 .m(m)
37055 .n(n)
37056 .k(k)
37057 .iterations(1)
37058 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37059 }
37060 }
37061 }
37062 }
37063
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2)37064 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2) {
37065 for (uint32_t n = 4; n <= 6; n += 2) {
37066 for (size_t k = 1; k <= 20; k += 5) {
37067 GemmMicrokernelTester()
37068 .mr(4)
37069 .nr(2)
37070 .kr(4)
37071 .sr(1)
37072 .m(4)
37073 .n(n)
37074 .k(k)
37075 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37076 }
37077 }
37078 }
37079
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_strided_cn)37080 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_strided_cn) {
37081 for (uint32_t n = 4; n <= 6; n += 2) {
37082 for (size_t k = 1; k <= 20; k += 5) {
37083 GemmMicrokernelTester()
37084 .mr(4)
37085 .nr(2)
37086 .kr(4)
37087 .sr(1)
37088 .m(4)
37089 .n(n)
37090 .k(k)
37091 .cn_stride(5)
37092 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37093 }
37094 }
37095 }
37096
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_subtile)37097 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_subtile) {
37098 for (uint32_t n = 4; n <= 6; n += 2) {
37099 for (size_t k = 1; k <= 20; k += 5) {
37100 for (uint32_t m = 1; m <= 4; m++) {
37101 GemmMicrokernelTester()
37102 .mr(4)
37103 .nr(2)
37104 .kr(4)
37105 .sr(1)
37106 .m(m)
37107 .n(n)
37108 .k(k)
37109 .iterations(1)
37110 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37111 }
37112 }
37113 }
37114 }
37115
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,small_kernel)37116 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, small_kernel) {
37117 for (size_t k = 1; k <= 20; k += 5) {
37118 GemmMicrokernelTester()
37119 .mr(4)
37120 .nr(2)
37121 .kr(4)
37122 .sr(1)
37123 .m(4)
37124 .n(2)
37125 .k(k)
37126 .ks(3)
37127 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37128 }
37129 }
37130
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)37131 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
37132 for (size_t k = 1; k <= 20; k += 5) {
37133 for (uint32_t n = 1; n <= 2; n++) {
37134 for (uint32_t m = 1; m <= 4; m++) {
37135 GemmMicrokernelTester()
37136 .mr(4)
37137 .nr(2)
37138 .kr(4)
37139 .sr(1)
37140 .m(m)
37141 .n(n)
37142 .k(k)
37143 .ks(3)
37144 .iterations(1)
37145 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37146 }
37147 }
37148 }
37149 }
37150
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_small_kernel)37151 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_small_kernel) {
37152 for (uint32_t n = 3; n < 4; n++) {
37153 for (size_t k = 1; k <= 20; k += 5) {
37154 GemmMicrokernelTester()
37155 .mr(4)
37156 .nr(2)
37157 .kr(4)
37158 .sr(1)
37159 .m(4)
37160 .n(n)
37161 .k(k)
37162 .ks(3)
37163 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37164 }
37165 }
37166 }
37167
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_small_kernel)37168 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_small_kernel) {
37169 for (uint32_t n = 4; n <= 6; n += 2) {
37170 for (size_t k = 1; k <= 20; k += 5) {
37171 GemmMicrokernelTester()
37172 .mr(4)
37173 .nr(2)
37174 .kr(4)
37175 .sr(1)
37176 .m(4)
37177 .n(n)
37178 .k(k)
37179 .ks(3)
37180 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37181 }
37182 }
37183 }
37184
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)37185 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
37186 for (size_t k = 1; k <= 20; k += 5) {
37187 for (uint32_t n = 1; n <= 2; n++) {
37188 for (uint32_t m = 1; m <= 4; m++) {
37189 GemmMicrokernelTester()
37190 .mr(4)
37191 .nr(2)
37192 .kr(4)
37193 .sr(1)
37194 .m(m)
37195 .n(n)
37196 .k(k)
37197 .cm_stride(5)
37198 .iterations(1)
37199 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37200 }
37201 }
37202 }
37203 }
37204
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,a_offset)37205 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, a_offset) {
37206 for (size_t k = 1; k <= 20; k += 5) {
37207 GemmMicrokernelTester()
37208 .mr(4)
37209 .nr(2)
37210 .kr(4)
37211 .sr(1)
37212 .m(4)
37213 .n(2)
37214 .k(k)
37215 .ks(3)
37216 .a_offset(83)
37217 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37218 }
37219 }
37220
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,zero)37221 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, zero) {
37222 for (size_t k = 1; k <= 20; k += 5) {
37223 for (uint32_t mz = 0; mz < 4; mz++) {
37224 GemmMicrokernelTester()
37225 .mr(4)
37226 .nr(2)
37227 .kr(4)
37228 .sr(1)
37229 .m(4)
37230 .n(2)
37231 .k(k)
37232 .ks(3)
37233 .a_offset(83)
37234 .zero_index(mz)
37235 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37236 }
37237 }
37238 }
37239
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,qmin)37240 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, qmin) {
37241 GemmMicrokernelTester()
37242 .mr(4)
37243 .nr(2)
37244 .kr(4)
37245 .sr(1)
37246 .m(4)
37247 .n(2)
37248 .k(4)
37249 .qmin(128)
37250 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37251 }
37252
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,qmax)37253 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, qmax) {
37254 GemmMicrokernelTester()
37255 .mr(4)
37256 .nr(2)
37257 .kr(4)
37258 .sr(1)
37259 .m(4)
37260 .n(2)
37261 .k(4)
37262 .qmax(128)
37263 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37264 }
37265
TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm)37266 TEST(F32_IGEMM_MINMAX_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm) {
37267 GemmMicrokernelTester()
37268 .mr(4)
37269 .nr(2)
37270 .kr(4)
37271 .sr(1)
37272 .m(4)
37273 .n(2)
37274 .k(4)
37275 .cm_stride(5)
37276 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
37277 }
37278 #endif // XNN_ARCH_WASMRELAXEDSIMD
37279
37280
37281 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)37282 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
37283 GemmMicrokernelTester()
37284 .mr(4)
37285 .nr(8)
37286 .kr(1)
37287 .sr(1)
37288 .m(4)
37289 .n(8)
37290 .k(1)
37291 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37292 }
37293
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)37294 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
37295 GemmMicrokernelTester()
37296 .mr(4)
37297 .nr(8)
37298 .kr(1)
37299 .sr(1)
37300 .m(4)
37301 .n(8)
37302 .k(1)
37303 .cn_stride(11)
37304 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37305 }
37306
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)37307 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
37308 for (uint32_t n = 1; n <= 8; n++) {
37309 for (uint32_t m = 1; m <= 4; m++) {
37310 GemmMicrokernelTester()
37311 .mr(4)
37312 .nr(8)
37313 .kr(1)
37314 .sr(1)
37315 .m(m)
37316 .n(n)
37317 .k(1)
37318 .iterations(1)
37319 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37320 }
37321 }
37322 }
37323
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)37324 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
37325 for (uint32_t m = 1; m <= 4; m++) {
37326 GemmMicrokernelTester()
37327 .mr(4)
37328 .nr(8)
37329 .kr(1)
37330 .sr(1)
37331 .m(m)
37332 .n(8)
37333 .k(1)
37334 .iterations(1)
37335 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37336 }
37337 }
37338
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)37339 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
37340 for (uint32_t n = 1; n <= 8; n++) {
37341 GemmMicrokernelTester()
37342 .mr(4)
37343 .nr(8)
37344 .kr(1)
37345 .sr(1)
37346 .m(4)
37347 .n(n)
37348 .k(1)
37349 .iterations(1)
37350 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37351 }
37352 }
37353
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)37354 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
37355 for (size_t k = 2; k < 10; k++) {
37356 GemmMicrokernelTester()
37357 .mr(4)
37358 .nr(8)
37359 .kr(1)
37360 .sr(1)
37361 .m(4)
37362 .n(8)
37363 .k(k)
37364 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37365 }
37366 }
37367
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)37368 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
37369 for (size_t k = 2; k < 10; k++) {
37370 for (uint32_t n = 1; n <= 8; n++) {
37371 for (uint32_t m = 1; m <= 4; m++) {
37372 GemmMicrokernelTester()
37373 .mr(4)
37374 .nr(8)
37375 .kr(1)
37376 .sr(1)
37377 .m(m)
37378 .n(n)
37379 .k(k)
37380 .iterations(1)
37381 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37382 }
37383 }
37384 }
37385 }
37386
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)37387 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
37388 for (uint32_t n = 9; n < 16; n++) {
37389 for (size_t k = 1; k <= 5; k += 2) {
37390 GemmMicrokernelTester()
37391 .mr(4)
37392 .nr(8)
37393 .kr(1)
37394 .sr(1)
37395 .m(4)
37396 .n(n)
37397 .k(k)
37398 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37399 }
37400 }
37401 }
37402
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)37403 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
37404 for (uint32_t n = 9; n < 16; n++) {
37405 for (size_t k = 1; k <= 5; k += 2) {
37406 GemmMicrokernelTester()
37407 .mr(4)
37408 .nr(8)
37409 .kr(1)
37410 .sr(1)
37411 .m(4)
37412 .n(n)
37413 .k(k)
37414 .cn_stride(11)
37415 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37416 }
37417 }
37418 }
37419
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)37420 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
37421 for (uint32_t n = 9; n < 16; n++) {
37422 for (size_t k = 1; k <= 5; k += 2) {
37423 for (uint32_t m = 1; m <= 4; m++) {
37424 GemmMicrokernelTester()
37425 .mr(4)
37426 .nr(8)
37427 .kr(1)
37428 .sr(1)
37429 .m(m)
37430 .n(n)
37431 .k(k)
37432 .iterations(1)
37433 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37434 }
37435 }
37436 }
37437 }
37438
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)37439 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
37440 for (uint32_t n = 16; n <= 24; n += 8) {
37441 for (size_t k = 1; k <= 5; k += 2) {
37442 GemmMicrokernelTester()
37443 .mr(4)
37444 .nr(8)
37445 .kr(1)
37446 .sr(1)
37447 .m(4)
37448 .n(n)
37449 .k(k)
37450 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37451 }
37452 }
37453 }
37454
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)37455 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
37456 for (uint32_t n = 16; n <= 24; n += 8) {
37457 for (size_t k = 1; k <= 5; k += 2) {
37458 GemmMicrokernelTester()
37459 .mr(4)
37460 .nr(8)
37461 .kr(1)
37462 .sr(1)
37463 .m(4)
37464 .n(n)
37465 .k(k)
37466 .cn_stride(11)
37467 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37468 }
37469 }
37470 }
37471
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)37472 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
37473 for (uint32_t n = 16; n <= 24; n += 8) {
37474 for (size_t k = 1; k <= 5; k += 2) {
37475 for (uint32_t m = 1; m <= 4; m++) {
37476 GemmMicrokernelTester()
37477 .mr(4)
37478 .nr(8)
37479 .kr(1)
37480 .sr(1)
37481 .m(m)
37482 .n(n)
37483 .k(k)
37484 .iterations(1)
37485 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37486 }
37487 }
37488 }
37489 }
37490
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel)37491 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel) {
37492 for (size_t k = 1; k <= 5; k += 2) {
37493 GemmMicrokernelTester()
37494 .mr(4)
37495 .nr(8)
37496 .kr(1)
37497 .sr(1)
37498 .m(4)
37499 .n(8)
37500 .k(k)
37501 .ks(3)
37502 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37503 }
37504 }
37505
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel_subtile)37506 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel_subtile) {
37507 for (size_t k = 1; k <= 5; k += 2) {
37508 for (uint32_t n = 1; n <= 8; n++) {
37509 for (uint32_t m = 1; m <= 4; m++) {
37510 GemmMicrokernelTester()
37511 .mr(4)
37512 .nr(8)
37513 .kr(1)
37514 .sr(1)
37515 .m(m)
37516 .n(n)
37517 .k(k)
37518 .ks(3)
37519 .iterations(1)
37520 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37521 }
37522 }
37523 }
37524 }
37525
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_small_kernel)37526 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_small_kernel) {
37527 for (uint32_t n = 9; n < 16; n++) {
37528 for (size_t k = 1; k <= 5; k += 2) {
37529 GemmMicrokernelTester()
37530 .mr(4)
37531 .nr(8)
37532 .kr(1)
37533 .sr(1)
37534 .m(4)
37535 .n(n)
37536 .k(k)
37537 .ks(3)
37538 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37539 }
37540 }
37541 }
37542
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_small_kernel)37543 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_small_kernel) {
37544 for (uint32_t n = 16; n <= 24; n += 8) {
37545 for (size_t k = 1; k <= 5; k += 2) {
37546 GemmMicrokernelTester()
37547 .mr(4)
37548 .nr(8)
37549 .kr(1)
37550 .sr(1)
37551 .m(4)
37552 .n(n)
37553 .k(k)
37554 .ks(3)
37555 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37556 }
37557 }
37558 }
37559
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)37560 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
37561 for (size_t k = 1; k <= 5; k += 2) {
37562 for (uint32_t n = 1; n <= 8; n++) {
37563 for (uint32_t m = 1; m <= 4; m++) {
37564 GemmMicrokernelTester()
37565 .mr(4)
37566 .nr(8)
37567 .kr(1)
37568 .sr(1)
37569 .m(m)
37570 .n(n)
37571 .k(k)
37572 .cm_stride(11)
37573 .iterations(1)
37574 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37575 }
37576 }
37577 }
37578 }
37579
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,a_offset)37580 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, a_offset) {
37581 for (size_t k = 1; k <= 5; k += 2) {
37582 GemmMicrokernelTester()
37583 .mr(4)
37584 .nr(8)
37585 .kr(1)
37586 .sr(1)
37587 .m(4)
37588 .n(8)
37589 .k(k)
37590 .ks(3)
37591 .a_offset(23)
37592 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37593 }
37594 }
37595
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,zero)37596 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, zero) {
37597 for (size_t k = 1; k <= 5; k += 2) {
37598 for (uint32_t mz = 0; mz < 4; mz++) {
37599 GemmMicrokernelTester()
37600 .mr(4)
37601 .nr(8)
37602 .kr(1)
37603 .sr(1)
37604 .m(4)
37605 .n(8)
37606 .k(k)
37607 .ks(3)
37608 .a_offset(23)
37609 .zero_index(mz)
37610 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37611 }
37612 }
37613 }
37614
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)37615 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
37616 GemmMicrokernelTester()
37617 .mr(4)
37618 .nr(8)
37619 .kr(1)
37620 .sr(1)
37621 .m(4)
37622 .n(8)
37623 .k(1)
37624 .qmin(128)
37625 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37626 }
37627
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)37628 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
37629 GemmMicrokernelTester()
37630 .mr(4)
37631 .nr(8)
37632 .kr(1)
37633 .sr(1)
37634 .m(4)
37635 .n(8)
37636 .k(1)
37637 .qmax(128)
37638 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37639 }
37640
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)37641 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
37642 GemmMicrokernelTester()
37643 .mr(4)
37644 .nr(8)
37645 .kr(1)
37646 .sr(1)
37647 .m(4)
37648 .n(8)
37649 .k(1)
37650 .cm_stride(11)
37651 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
37652 }
37653 #endif // XNN_ARCH_WASMRELAXEDSIMD
37654
37655
37656 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4)37657 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4) {
37658 GemmMicrokernelTester()
37659 .mr(4)
37660 .nr(8)
37661 .kr(1)
37662 .sr(4)
37663 .m(4)
37664 .n(8)
37665 .k(4)
37666 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37667 }
37668
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,strided_cn)37669 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, strided_cn) {
37670 GemmMicrokernelTester()
37671 .mr(4)
37672 .nr(8)
37673 .kr(1)
37674 .sr(4)
37675 .m(4)
37676 .n(8)
37677 .k(4)
37678 .cn_stride(11)
37679 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37680 }
37681
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)37682 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
37683 for (uint32_t n = 1; n <= 8; n++) {
37684 for (uint32_t m = 1; m <= 4; m++) {
37685 GemmMicrokernelTester()
37686 .mr(4)
37687 .nr(8)
37688 .kr(1)
37689 .sr(4)
37690 .m(m)
37691 .n(n)
37692 .k(4)
37693 .iterations(1)
37694 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37695 }
37696 }
37697 }
37698
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)37699 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
37700 for (uint32_t m = 1; m <= 4; m++) {
37701 GemmMicrokernelTester()
37702 .mr(4)
37703 .nr(8)
37704 .kr(1)
37705 .sr(4)
37706 .m(m)
37707 .n(8)
37708 .k(4)
37709 .iterations(1)
37710 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37711 }
37712 }
37713
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)37714 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
37715 for (uint32_t n = 1; n <= 8; n++) {
37716 GemmMicrokernelTester()
37717 .mr(4)
37718 .nr(8)
37719 .kr(1)
37720 .sr(4)
37721 .m(4)
37722 .n(n)
37723 .k(4)
37724 .iterations(1)
37725 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37726 }
37727 }
37728
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_lt_4)37729 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_lt_4) {
37730 for (size_t k = 1; k < 4; k++) {
37731 GemmMicrokernelTester()
37732 .mr(4)
37733 .nr(8)
37734 .kr(1)
37735 .sr(4)
37736 .m(4)
37737 .n(8)
37738 .k(k)
37739 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37740 }
37741 }
37742
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)37743 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
37744 for (size_t k = 1; k < 4; k++) {
37745 for (uint32_t n = 1; n <= 8; n++) {
37746 for (uint32_t m = 1; m <= 4; m++) {
37747 GemmMicrokernelTester()
37748 .mr(4)
37749 .nr(8)
37750 .kr(1)
37751 .sr(4)
37752 .m(m)
37753 .n(n)
37754 .k(k)
37755 .iterations(1)
37756 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37757 }
37758 }
37759 }
37760 }
37761
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_gt_4)37762 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_gt_4) {
37763 for (size_t k = 5; k < 8; k++) {
37764 GemmMicrokernelTester()
37765 .mr(4)
37766 .nr(8)
37767 .kr(1)
37768 .sr(4)
37769 .m(4)
37770 .n(8)
37771 .k(k)
37772 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37773 }
37774 }
37775
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)37776 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
37777 for (size_t k = 5; k < 8; k++) {
37778 for (uint32_t n = 1; n <= 8; n++) {
37779 for (uint32_t m = 1; m <= 4; m++) {
37780 GemmMicrokernelTester()
37781 .mr(4)
37782 .nr(8)
37783 .kr(1)
37784 .sr(4)
37785 .m(m)
37786 .n(n)
37787 .k(k)
37788 .iterations(1)
37789 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37790 }
37791 }
37792 }
37793 }
37794
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_div_4)37795 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_div_4) {
37796 for (size_t k = 8; k <= 40; k += 4) {
37797 GemmMicrokernelTester()
37798 .mr(4)
37799 .nr(8)
37800 .kr(1)
37801 .sr(4)
37802 .m(4)
37803 .n(8)
37804 .k(k)
37805 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37806 }
37807 }
37808
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,k_div_4_subtile)37809 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
37810 for (size_t k = 8; k <= 40; k += 4) {
37811 for (uint32_t n = 1; n <= 8; n++) {
37812 for (uint32_t m = 1; m <= 4; m++) {
37813 GemmMicrokernelTester()
37814 .mr(4)
37815 .nr(8)
37816 .kr(1)
37817 .sr(4)
37818 .m(m)
37819 .n(n)
37820 .k(k)
37821 .iterations(1)
37822 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37823 }
37824 }
37825 }
37826 }
37827
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8)37828 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8) {
37829 for (uint32_t n = 9; n < 16; n++) {
37830 for (size_t k = 1; k <= 20; k += 5) {
37831 GemmMicrokernelTester()
37832 .mr(4)
37833 .nr(8)
37834 .kr(1)
37835 .sr(4)
37836 .m(4)
37837 .n(n)
37838 .k(k)
37839 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37840 }
37841 }
37842 }
37843
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)37844 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
37845 for (uint32_t n = 9; n < 16; n++) {
37846 for (size_t k = 1; k <= 20; k += 5) {
37847 GemmMicrokernelTester()
37848 .mr(4)
37849 .nr(8)
37850 .kr(1)
37851 .sr(4)
37852 .m(4)
37853 .n(n)
37854 .k(k)
37855 .cn_stride(11)
37856 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37857 }
37858 }
37859 }
37860
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)37861 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
37862 for (uint32_t n = 9; n < 16; n++) {
37863 for (size_t k = 1; k <= 20; k += 5) {
37864 for (uint32_t m = 1; m <= 4; m++) {
37865 GemmMicrokernelTester()
37866 .mr(4)
37867 .nr(8)
37868 .kr(1)
37869 .sr(4)
37870 .m(m)
37871 .n(n)
37872 .k(k)
37873 .iterations(1)
37874 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37875 }
37876 }
37877 }
37878 }
37879
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8)37880 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8) {
37881 for (uint32_t n = 16; n <= 24; n += 8) {
37882 for (size_t k = 1; k <= 20; k += 5) {
37883 GemmMicrokernelTester()
37884 .mr(4)
37885 .nr(8)
37886 .kr(1)
37887 .sr(4)
37888 .m(4)
37889 .n(n)
37890 .k(k)
37891 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37892 }
37893 }
37894 }
37895
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)37896 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
37897 for (uint32_t n = 16; n <= 24; n += 8) {
37898 for (size_t k = 1; k <= 20; k += 5) {
37899 GemmMicrokernelTester()
37900 .mr(4)
37901 .nr(8)
37902 .kr(1)
37903 .sr(4)
37904 .m(4)
37905 .n(n)
37906 .k(k)
37907 .cn_stride(11)
37908 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37909 }
37910 }
37911 }
37912
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8_subtile)37913 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
37914 for (uint32_t n = 16; n <= 24; n += 8) {
37915 for (size_t k = 1; k <= 20; k += 5) {
37916 for (uint32_t m = 1; m <= 4; m++) {
37917 GemmMicrokernelTester()
37918 .mr(4)
37919 .nr(8)
37920 .kr(1)
37921 .sr(4)
37922 .m(m)
37923 .n(n)
37924 .k(k)
37925 .iterations(1)
37926 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37927 }
37928 }
37929 }
37930 }
37931
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,small_kernel)37932 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, small_kernel) {
37933 for (size_t k = 1; k <= 20; k += 5) {
37934 GemmMicrokernelTester()
37935 .mr(4)
37936 .nr(8)
37937 .kr(1)
37938 .sr(4)
37939 .m(4)
37940 .n(8)
37941 .k(k)
37942 .ks(3)
37943 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37944 }
37945 }
37946
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,small_kernel_subtile)37947 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, small_kernel_subtile) {
37948 for (size_t k = 1; k <= 20; k += 5) {
37949 for (uint32_t n = 1; n <= 8; n++) {
37950 for (uint32_t m = 1; m <= 4; m++) {
37951 GemmMicrokernelTester()
37952 .mr(4)
37953 .nr(8)
37954 .kr(1)
37955 .sr(4)
37956 .m(m)
37957 .n(n)
37958 .k(k)
37959 .ks(3)
37960 .iterations(1)
37961 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37962 }
37963 }
37964 }
37965 }
37966
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8_small_kernel)37967 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8_small_kernel) {
37968 for (uint32_t n = 9; n < 16; n++) {
37969 for (size_t k = 1; k <= 20; k += 5) {
37970 GemmMicrokernelTester()
37971 .mr(4)
37972 .nr(8)
37973 .kr(1)
37974 .sr(4)
37975 .m(4)
37976 .n(n)
37977 .k(k)
37978 .ks(3)
37979 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37980 }
37981 }
37982 }
37983
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8_small_kernel)37984 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8_small_kernel) {
37985 for (uint32_t n = 16; n <= 24; n += 8) {
37986 for (size_t k = 1; k <= 20; k += 5) {
37987 GemmMicrokernelTester()
37988 .mr(4)
37989 .nr(8)
37990 .kr(1)
37991 .sr(4)
37992 .m(4)
37993 .n(n)
37994 .k(k)
37995 .ks(3)
37996 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
37997 }
37998 }
37999 }
38000
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,strided_cm_subtile)38001 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
38002 for (size_t k = 1; k <= 20; k += 5) {
38003 for (uint32_t n = 1; n <= 8; n++) {
38004 for (uint32_t m = 1; m <= 4; m++) {
38005 GemmMicrokernelTester()
38006 .mr(4)
38007 .nr(8)
38008 .kr(1)
38009 .sr(4)
38010 .m(m)
38011 .n(n)
38012 .k(k)
38013 .cm_stride(11)
38014 .iterations(1)
38015 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38016 }
38017 }
38018 }
38019 }
38020
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,a_offset)38021 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, a_offset) {
38022 for (size_t k = 1; k <= 20; k += 5) {
38023 GemmMicrokernelTester()
38024 .mr(4)
38025 .nr(8)
38026 .kr(1)
38027 .sr(4)
38028 .m(4)
38029 .n(8)
38030 .k(k)
38031 .ks(3)
38032 .a_offset(83)
38033 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38034 }
38035 }
38036
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,zero)38037 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, zero) {
38038 for (size_t k = 1; k <= 20; k += 5) {
38039 for (uint32_t mz = 0; mz < 4; mz++) {
38040 GemmMicrokernelTester()
38041 .mr(4)
38042 .nr(8)
38043 .kr(1)
38044 .sr(4)
38045 .m(4)
38046 .n(8)
38047 .k(k)
38048 .ks(3)
38049 .a_offset(83)
38050 .zero_index(mz)
38051 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38052 }
38053 }
38054 }
38055
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,qmin)38056 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, qmin) {
38057 GemmMicrokernelTester()
38058 .mr(4)
38059 .nr(8)
38060 .kr(1)
38061 .sr(4)
38062 .m(4)
38063 .n(8)
38064 .k(4)
38065 .qmin(128)
38066 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38067 }
38068
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,qmax)38069 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, qmax) {
38070 GemmMicrokernelTester()
38071 .mr(4)
38072 .nr(8)
38073 .kr(1)
38074 .sr(4)
38075 .m(4)
38076 .n(8)
38077 .k(4)
38078 .qmax(128)
38079 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38080 }
38081
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD,strided_cm)38082 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD, strided_cm) {
38083 GemmMicrokernelTester()
38084 .mr(4)
38085 .nr(8)
38086 .kr(1)
38087 .sr(4)
38088 .m(4)
38089 .n(8)
38090 .k(4)
38091 .cm_stride(11)
38092 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38093 }
38094 #endif // XNN_ARCH_WASMRELAXEDSIMD
38095
38096
38097 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)38098 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
38099 GemmMicrokernelTester()
38100 .mr(5)
38101 .nr(8)
38102 .kr(1)
38103 .sr(1)
38104 .m(5)
38105 .n(8)
38106 .k(1)
38107 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38108 }
38109
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)38110 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
38111 GemmMicrokernelTester()
38112 .mr(5)
38113 .nr(8)
38114 .kr(1)
38115 .sr(1)
38116 .m(5)
38117 .n(8)
38118 .k(1)
38119 .cn_stride(11)
38120 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38121 }
38122
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)38123 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
38124 for (uint32_t n = 1; n <= 8; n++) {
38125 for (uint32_t m = 1; m <= 5; m++) {
38126 GemmMicrokernelTester()
38127 .mr(5)
38128 .nr(8)
38129 .kr(1)
38130 .sr(1)
38131 .m(m)
38132 .n(n)
38133 .k(1)
38134 .iterations(1)
38135 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38136 }
38137 }
38138 }
38139
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)38140 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
38141 for (uint32_t m = 1; m <= 5; m++) {
38142 GemmMicrokernelTester()
38143 .mr(5)
38144 .nr(8)
38145 .kr(1)
38146 .sr(1)
38147 .m(m)
38148 .n(8)
38149 .k(1)
38150 .iterations(1)
38151 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38152 }
38153 }
38154
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)38155 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
38156 for (uint32_t n = 1; n <= 8; n++) {
38157 GemmMicrokernelTester()
38158 .mr(5)
38159 .nr(8)
38160 .kr(1)
38161 .sr(1)
38162 .m(5)
38163 .n(n)
38164 .k(1)
38165 .iterations(1)
38166 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38167 }
38168 }
38169
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)38170 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
38171 for (size_t k = 2; k < 10; k++) {
38172 GemmMicrokernelTester()
38173 .mr(5)
38174 .nr(8)
38175 .kr(1)
38176 .sr(1)
38177 .m(5)
38178 .n(8)
38179 .k(k)
38180 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38181 }
38182 }
38183
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)38184 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
38185 for (size_t k = 2; k < 10; k++) {
38186 for (uint32_t n = 1; n <= 8; n++) {
38187 for (uint32_t m = 1; m <= 5; m++) {
38188 GemmMicrokernelTester()
38189 .mr(5)
38190 .nr(8)
38191 .kr(1)
38192 .sr(1)
38193 .m(m)
38194 .n(n)
38195 .k(k)
38196 .iterations(1)
38197 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38198 }
38199 }
38200 }
38201 }
38202
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)38203 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
38204 for (uint32_t n = 9; n < 16; n++) {
38205 for (size_t k = 1; k <= 5; k += 2) {
38206 GemmMicrokernelTester()
38207 .mr(5)
38208 .nr(8)
38209 .kr(1)
38210 .sr(1)
38211 .m(5)
38212 .n(n)
38213 .k(k)
38214 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38215 }
38216 }
38217 }
38218
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)38219 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
38220 for (uint32_t n = 9; n < 16; n++) {
38221 for (size_t k = 1; k <= 5; k += 2) {
38222 GemmMicrokernelTester()
38223 .mr(5)
38224 .nr(8)
38225 .kr(1)
38226 .sr(1)
38227 .m(5)
38228 .n(n)
38229 .k(k)
38230 .cn_stride(11)
38231 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38232 }
38233 }
38234 }
38235
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)38236 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
38237 for (uint32_t n = 9; n < 16; n++) {
38238 for (size_t k = 1; k <= 5; k += 2) {
38239 for (uint32_t m = 1; m <= 5; m++) {
38240 GemmMicrokernelTester()
38241 .mr(5)
38242 .nr(8)
38243 .kr(1)
38244 .sr(1)
38245 .m(m)
38246 .n(n)
38247 .k(k)
38248 .iterations(1)
38249 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38250 }
38251 }
38252 }
38253 }
38254
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)38255 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
38256 for (uint32_t n = 16; n <= 24; n += 8) {
38257 for (size_t k = 1; k <= 5; k += 2) {
38258 GemmMicrokernelTester()
38259 .mr(5)
38260 .nr(8)
38261 .kr(1)
38262 .sr(1)
38263 .m(5)
38264 .n(n)
38265 .k(k)
38266 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38267 }
38268 }
38269 }
38270
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)38271 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
38272 for (uint32_t n = 16; n <= 24; n += 8) {
38273 for (size_t k = 1; k <= 5; k += 2) {
38274 GemmMicrokernelTester()
38275 .mr(5)
38276 .nr(8)
38277 .kr(1)
38278 .sr(1)
38279 .m(5)
38280 .n(n)
38281 .k(k)
38282 .cn_stride(11)
38283 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38284 }
38285 }
38286 }
38287
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)38288 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
38289 for (uint32_t n = 16; n <= 24; n += 8) {
38290 for (size_t k = 1; k <= 5; k += 2) {
38291 for (uint32_t m = 1; m <= 5; m++) {
38292 GemmMicrokernelTester()
38293 .mr(5)
38294 .nr(8)
38295 .kr(1)
38296 .sr(1)
38297 .m(m)
38298 .n(n)
38299 .k(k)
38300 .iterations(1)
38301 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38302 }
38303 }
38304 }
38305 }
38306
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel)38307 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel) {
38308 for (size_t k = 1; k <= 5; k += 2) {
38309 GemmMicrokernelTester()
38310 .mr(5)
38311 .nr(8)
38312 .kr(1)
38313 .sr(1)
38314 .m(5)
38315 .n(8)
38316 .k(k)
38317 .ks(3)
38318 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38319 }
38320 }
38321
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel_subtile)38322 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel_subtile) {
38323 for (size_t k = 1; k <= 5; k += 2) {
38324 for (uint32_t n = 1; n <= 8; n++) {
38325 for (uint32_t m = 1; m <= 5; m++) {
38326 GemmMicrokernelTester()
38327 .mr(5)
38328 .nr(8)
38329 .kr(1)
38330 .sr(1)
38331 .m(m)
38332 .n(n)
38333 .k(k)
38334 .ks(3)
38335 .iterations(1)
38336 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38337 }
38338 }
38339 }
38340 }
38341
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_small_kernel)38342 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_small_kernel) {
38343 for (uint32_t n = 9; n < 16; n++) {
38344 for (size_t k = 1; k <= 5; k += 2) {
38345 GemmMicrokernelTester()
38346 .mr(5)
38347 .nr(8)
38348 .kr(1)
38349 .sr(1)
38350 .m(5)
38351 .n(n)
38352 .k(k)
38353 .ks(3)
38354 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38355 }
38356 }
38357 }
38358
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_small_kernel)38359 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_small_kernel) {
38360 for (uint32_t n = 16; n <= 24; n += 8) {
38361 for (size_t k = 1; k <= 5; k += 2) {
38362 GemmMicrokernelTester()
38363 .mr(5)
38364 .nr(8)
38365 .kr(1)
38366 .sr(1)
38367 .m(5)
38368 .n(n)
38369 .k(k)
38370 .ks(3)
38371 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38372 }
38373 }
38374 }
38375
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)38376 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
38377 for (size_t k = 1; k <= 5; k += 2) {
38378 for (uint32_t n = 1; n <= 8; n++) {
38379 for (uint32_t m = 1; m <= 5; m++) {
38380 GemmMicrokernelTester()
38381 .mr(5)
38382 .nr(8)
38383 .kr(1)
38384 .sr(1)
38385 .m(m)
38386 .n(n)
38387 .k(k)
38388 .cm_stride(11)
38389 .iterations(1)
38390 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38391 }
38392 }
38393 }
38394 }
38395
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,a_offset)38396 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, a_offset) {
38397 for (size_t k = 1; k <= 5; k += 2) {
38398 GemmMicrokernelTester()
38399 .mr(5)
38400 .nr(8)
38401 .kr(1)
38402 .sr(1)
38403 .m(5)
38404 .n(8)
38405 .k(k)
38406 .ks(3)
38407 .a_offset(29)
38408 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38409 }
38410 }
38411
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,zero)38412 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, zero) {
38413 for (size_t k = 1; k <= 5; k += 2) {
38414 for (uint32_t mz = 0; mz < 5; mz++) {
38415 GemmMicrokernelTester()
38416 .mr(5)
38417 .nr(8)
38418 .kr(1)
38419 .sr(1)
38420 .m(5)
38421 .n(8)
38422 .k(k)
38423 .ks(3)
38424 .a_offset(29)
38425 .zero_index(mz)
38426 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38427 }
38428 }
38429 }
38430
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)38431 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
38432 GemmMicrokernelTester()
38433 .mr(5)
38434 .nr(8)
38435 .kr(1)
38436 .sr(1)
38437 .m(5)
38438 .n(8)
38439 .k(1)
38440 .qmin(128)
38441 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38442 }
38443
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)38444 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
38445 GemmMicrokernelTester()
38446 .mr(5)
38447 .nr(8)
38448 .kr(1)
38449 .sr(1)
38450 .m(5)
38451 .n(8)
38452 .k(1)
38453 .qmax(128)
38454 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38455 }
38456
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)38457 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
38458 GemmMicrokernelTester()
38459 .mr(5)
38460 .nr(8)
38461 .kr(1)
38462 .sr(1)
38463 .m(5)
38464 .n(8)
38465 .k(1)
38466 .cm_stride(11)
38467 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
38468 }
38469 #endif // XNN_ARCH_WASMRELAXEDSIMD
38470
38471
38472 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)38473 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
38474 GemmMicrokernelTester()
38475 .mr(5)
38476 .nr(8)
38477 .kr(1)
38478 .sr(1)
38479 .m(5)
38480 .n(8)
38481 .k(4)
38482 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38483 }
38484
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,strided_cn)38485 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
38486 GemmMicrokernelTester()
38487 .mr(5)
38488 .nr(8)
38489 .kr(1)
38490 .sr(1)
38491 .m(5)
38492 .n(8)
38493 .k(4)
38494 .cn_stride(11)
38495 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38496 }
38497
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)38498 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
38499 for (uint32_t n = 1; n <= 8; n++) {
38500 for (uint32_t m = 1; m <= 5; m++) {
38501 GemmMicrokernelTester()
38502 .mr(5)
38503 .nr(8)
38504 .kr(1)
38505 .sr(1)
38506 .m(m)
38507 .n(n)
38508 .k(4)
38509 .iterations(1)
38510 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38511 }
38512 }
38513 }
38514
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)38515 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
38516 for (uint32_t m = 1; m <= 5; m++) {
38517 GemmMicrokernelTester()
38518 .mr(5)
38519 .nr(8)
38520 .kr(1)
38521 .sr(1)
38522 .m(m)
38523 .n(8)
38524 .k(4)
38525 .iterations(1)
38526 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38527 }
38528 }
38529
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)38530 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
38531 for (uint32_t n = 1; n <= 8; n++) {
38532 GemmMicrokernelTester()
38533 .mr(5)
38534 .nr(8)
38535 .kr(1)
38536 .sr(1)
38537 .m(5)
38538 .n(n)
38539 .k(4)
38540 .iterations(1)
38541 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38542 }
38543 }
38544
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)38545 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
38546 for (size_t k = 1; k < 4; k++) {
38547 GemmMicrokernelTester()
38548 .mr(5)
38549 .nr(8)
38550 .kr(1)
38551 .sr(1)
38552 .m(5)
38553 .n(8)
38554 .k(k)
38555 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38556 }
38557 }
38558
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)38559 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
38560 for (size_t k = 1; k < 4; k++) {
38561 for (uint32_t n = 1; n <= 8; n++) {
38562 for (uint32_t m = 1; m <= 5; m++) {
38563 GemmMicrokernelTester()
38564 .mr(5)
38565 .nr(8)
38566 .kr(1)
38567 .sr(1)
38568 .m(m)
38569 .n(n)
38570 .k(k)
38571 .iterations(1)
38572 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38573 }
38574 }
38575 }
38576 }
38577
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)38578 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
38579 for (size_t k = 5; k < 8; k++) {
38580 GemmMicrokernelTester()
38581 .mr(5)
38582 .nr(8)
38583 .kr(1)
38584 .sr(1)
38585 .m(5)
38586 .n(8)
38587 .k(k)
38588 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38589 }
38590 }
38591
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)38592 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
38593 for (size_t k = 5; k < 8; k++) {
38594 for (uint32_t n = 1; n <= 8; n++) {
38595 for (uint32_t m = 1; m <= 5; m++) {
38596 GemmMicrokernelTester()
38597 .mr(5)
38598 .nr(8)
38599 .kr(1)
38600 .sr(1)
38601 .m(m)
38602 .n(n)
38603 .k(k)
38604 .iterations(1)
38605 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38606 }
38607 }
38608 }
38609 }
38610
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_div_4)38611 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
38612 for (size_t k = 8; k <= 40; k += 4) {
38613 GemmMicrokernelTester()
38614 .mr(5)
38615 .nr(8)
38616 .kr(1)
38617 .sr(1)
38618 .m(5)
38619 .n(8)
38620 .k(k)
38621 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38622 }
38623 }
38624
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)38625 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
38626 for (size_t k = 8; k <= 40; k += 4) {
38627 for (uint32_t n = 1; n <= 8; n++) {
38628 for (uint32_t m = 1; m <= 5; m++) {
38629 GemmMicrokernelTester()
38630 .mr(5)
38631 .nr(8)
38632 .kr(1)
38633 .sr(1)
38634 .m(m)
38635 .n(n)
38636 .k(k)
38637 .iterations(1)
38638 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38639 }
38640 }
38641 }
38642 }
38643
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)38644 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
38645 for (uint32_t n = 9; n < 16; n++) {
38646 for (size_t k = 1; k <= 20; k += 5) {
38647 GemmMicrokernelTester()
38648 .mr(5)
38649 .nr(8)
38650 .kr(1)
38651 .sr(1)
38652 .m(5)
38653 .n(n)
38654 .k(k)
38655 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38656 }
38657 }
38658 }
38659
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)38660 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
38661 for (uint32_t n = 9; n < 16; n++) {
38662 for (size_t k = 1; k <= 20; k += 5) {
38663 GemmMicrokernelTester()
38664 .mr(5)
38665 .nr(8)
38666 .kr(1)
38667 .sr(1)
38668 .m(5)
38669 .n(n)
38670 .k(k)
38671 .cn_stride(11)
38672 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38673 }
38674 }
38675 }
38676
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)38677 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
38678 for (uint32_t n = 9; n < 16; n++) {
38679 for (size_t k = 1; k <= 20; k += 5) {
38680 for (uint32_t m = 1; m <= 5; m++) {
38681 GemmMicrokernelTester()
38682 .mr(5)
38683 .nr(8)
38684 .kr(1)
38685 .sr(1)
38686 .m(m)
38687 .n(n)
38688 .k(k)
38689 .iterations(1)
38690 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38691 }
38692 }
38693 }
38694 }
38695
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8)38696 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
38697 for (uint32_t n = 16; n <= 24; n += 8) {
38698 for (size_t k = 1; k <= 20; k += 5) {
38699 GemmMicrokernelTester()
38700 .mr(5)
38701 .nr(8)
38702 .kr(1)
38703 .sr(1)
38704 .m(5)
38705 .n(n)
38706 .k(k)
38707 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38708 }
38709 }
38710 }
38711
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)38712 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
38713 for (uint32_t n = 16; n <= 24; n += 8) {
38714 for (size_t k = 1; k <= 20; k += 5) {
38715 GemmMicrokernelTester()
38716 .mr(5)
38717 .nr(8)
38718 .kr(1)
38719 .sr(1)
38720 .m(5)
38721 .n(n)
38722 .k(k)
38723 .cn_stride(11)
38724 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38725 }
38726 }
38727 }
38728
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)38729 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
38730 for (uint32_t n = 16; n <= 24; n += 8) {
38731 for (size_t k = 1; k <= 20; k += 5) {
38732 for (uint32_t m = 1; m <= 5; m++) {
38733 GemmMicrokernelTester()
38734 .mr(5)
38735 .nr(8)
38736 .kr(1)
38737 .sr(1)
38738 .m(m)
38739 .n(n)
38740 .k(k)
38741 .iterations(1)
38742 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38743 }
38744 }
38745 }
38746 }
38747
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,small_kernel)38748 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, small_kernel) {
38749 for (size_t k = 1; k <= 20; k += 5) {
38750 GemmMicrokernelTester()
38751 .mr(5)
38752 .nr(8)
38753 .kr(1)
38754 .sr(1)
38755 .m(5)
38756 .n(8)
38757 .k(k)
38758 .ks(3)
38759 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38760 }
38761 }
38762
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,small_kernel_subtile)38763 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, small_kernel_subtile) {
38764 for (size_t k = 1; k <= 20; k += 5) {
38765 for (uint32_t n = 1; n <= 8; n++) {
38766 for (uint32_t m = 1; m <= 5; m++) {
38767 GemmMicrokernelTester()
38768 .mr(5)
38769 .nr(8)
38770 .kr(1)
38771 .sr(1)
38772 .m(m)
38773 .n(n)
38774 .k(k)
38775 .ks(3)
38776 .iterations(1)
38777 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38778 }
38779 }
38780 }
38781 }
38782
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_small_kernel)38783 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_small_kernel) {
38784 for (uint32_t n = 9; n < 16; n++) {
38785 for (size_t k = 1; k <= 20; k += 5) {
38786 GemmMicrokernelTester()
38787 .mr(5)
38788 .nr(8)
38789 .kr(1)
38790 .sr(1)
38791 .m(5)
38792 .n(n)
38793 .k(k)
38794 .ks(3)
38795 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38796 }
38797 }
38798 }
38799
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8_small_kernel)38800 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8_small_kernel) {
38801 for (uint32_t n = 16; n <= 24; n += 8) {
38802 for (size_t k = 1; k <= 20; k += 5) {
38803 GemmMicrokernelTester()
38804 .mr(5)
38805 .nr(8)
38806 .kr(1)
38807 .sr(1)
38808 .m(5)
38809 .n(n)
38810 .k(k)
38811 .ks(3)
38812 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38813 }
38814 }
38815 }
38816
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)38817 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
38818 for (size_t k = 1; k <= 20; k += 5) {
38819 for (uint32_t n = 1; n <= 8; n++) {
38820 for (uint32_t m = 1; m <= 5; m++) {
38821 GemmMicrokernelTester()
38822 .mr(5)
38823 .nr(8)
38824 .kr(1)
38825 .sr(1)
38826 .m(m)
38827 .n(n)
38828 .k(k)
38829 .cm_stride(11)
38830 .iterations(1)
38831 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38832 }
38833 }
38834 }
38835 }
38836
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,a_offset)38837 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, a_offset) {
38838 for (size_t k = 1; k <= 20; k += 5) {
38839 GemmMicrokernelTester()
38840 .mr(5)
38841 .nr(8)
38842 .kr(1)
38843 .sr(1)
38844 .m(5)
38845 .n(8)
38846 .k(k)
38847 .ks(3)
38848 .a_offset(103)
38849 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38850 }
38851 }
38852
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,zero)38853 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, zero) {
38854 for (size_t k = 1; k <= 20; k += 5) {
38855 for (uint32_t mz = 0; mz < 5; mz++) {
38856 GemmMicrokernelTester()
38857 .mr(5)
38858 .nr(8)
38859 .kr(1)
38860 .sr(1)
38861 .m(5)
38862 .n(8)
38863 .k(k)
38864 .ks(3)
38865 .a_offset(103)
38866 .zero_index(mz)
38867 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38868 }
38869 }
38870 }
38871
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,qmin)38872 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, qmin) {
38873 GemmMicrokernelTester()
38874 .mr(5)
38875 .nr(8)
38876 .kr(1)
38877 .sr(1)
38878 .m(5)
38879 .n(8)
38880 .k(4)
38881 .qmin(128)
38882 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38883 }
38884
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,qmax)38885 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, qmax) {
38886 GemmMicrokernelTester()
38887 .mr(5)
38888 .nr(8)
38889 .kr(1)
38890 .sr(1)
38891 .m(5)
38892 .n(8)
38893 .k(4)
38894 .qmax(128)
38895 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38896 }
38897
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,strided_cm)38898 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
38899 GemmMicrokernelTester()
38900 .mr(5)
38901 .nr(8)
38902 .kr(1)
38903 .sr(1)
38904 .m(5)
38905 .n(8)
38906 .k(4)
38907 .cm_stride(11)
38908 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
38909 }
38910 #endif // XNN_ARCH_WASMRELAXEDSIMD
38911
38912
38913 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4)38914 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4) {
38915 GemmMicrokernelTester()
38916 .mr(5)
38917 .nr(8)
38918 .kr(1)
38919 .sr(4)
38920 .m(5)
38921 .n(8)
38922 .k(4)
38923 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38924 }
38925
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,strided_cn)38926 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, strided_cn) {
38927 GemmMicrokernelTester()
38928 .mr(5)
38929 .nr(8)
38930 .kr(1)
38931 .sr(4)
38932 .m(5)
38933 .n(8)
38934 .k(4)
38935 .cn_stride(11)
38936 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38937 }
38938
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)38939 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
38940 for (uint32_t n = 1; n <= 8; n++) {
38941 for (uint32_t m = 1; m <= 5; m++) {
38942 GemmMicrokernelTester()
38943 .mr(5)
38944 .nr(8)
38945 .kr(1)
38946 .sr(4)
38947 .m(m)
38948 .n(n)
38949 .k(4)
38950 .iterations(1)
38951 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38952 }
38953 }
38954 }
38955
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)38956 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
38957 for (uint32_t m = 1; m <= 5; m++) {
38958 GemmMicrokernelTester()
38959 .mr(5)
38960 .nr(8)
38961 .kr(1)
38962 .sr(4)
38963 .m(m)
38964 .n(8)
38965 .k(4)
38966 .iterations(1)
38967 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38968 }
38969 }
38970
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)38971 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
38972 for (uint32_t n = 1; n <= 8; n++) {
38973 GemmMicrokernelTester()
38974 .mr(5)
38975 .nr(8)
38976 .kr(1)
38977 .sr(4)
38978 .m(5)
38979 .n(n)
38980 .k(4)
38981 .iterations(1)
38982 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38983 }
38984 }
38985
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_lt_4)38986 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_lt_4) {
38987 for (size_t k = 1; k < 4; k++) {
38988 GemmMicrokernelTester()
38989 .mr(5)
38990 .nr(8)
38991 .kr(1)
38992 .sr(4)
38993 .m(5)
38994 .n(8)
38995 .k(k)
38996 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
38997 }
38998 }
38999
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)39000 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
39001 for (size_t k = 1; k < 4; k++) {
39002 for (uint32_t n = 1; n <= 8; n++) {
39003 for (uint32_t m = 1; m <= 5; m++) {
39004 GemmMicrokernelTester()
39005 .mr(5)
39006 .nr(8)
39007 .kr(1)
39008 .sr(4)
39009 .m(m)
39010 .n(n)
39011 .k(k)
39012 .iterations(1)
39013 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39014 }
39015 }
39016 }
39017 }
39018
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_gt_4)39019 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_gt_4) {
39020 for (size_t k = 5; k < 8; k++) {
39021 GemmMicrokernelTester()
39022 .mr(5)
39023 .nr(8)
39024 .kr(1)
39025 .sr(4)
39026 .m(5)
39027 .n(8)
39028 .k(k)
39029 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39030 }
39031 }
39032
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)39033 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
39034 for (size_t k = 5; k < 8; k++) {
39035 for (uint32_t n = 1; n <= 8; n++) {
39036 for (uint32_t m = 1; m <= 5; m++) {
39037 GemmMicrokernelTester()
39038 .mr(5)
39039 .nr(8)
39040 .kr(1)
39041 .sr(4)
39042 .m(m)
39043 .n(n)
39044 .k(k)
39045 .iterations(1)
39046 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39047 }
39048 }
39049 }
39050 }
39051
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_div_4)39052 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_div_4) {
39053 for (size_t k = 8; k <= 40; k += 4) {
39054 GemmMicrokernelTester()
39055 .mr(5)
39056 .nr(8)
39057 .kr(1)
39058 .sr(4)
39059 .m(5)
39060 .n(8)
39061 .k(k)
39062 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39063 }
39064 }
39065
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,k_div_4_subtile)39066 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
39067 for (size_t k = 8; k <= 40; k += 4) {
39068 for (uint32_t n = 1; n <= 8; n++) {
39069 for (uint32_t m = 1; m <= 5; m++) {
39070 GemmMicrokernelTester()
39071 .mr(5)
39072 .nr(8)
39073 .kr(1)
39074 .sr(4)
39075 .m(m)
39076 .n(n)
39077 .k(k)
39078 .iterations(1)
39079 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39080 }
39081 }
39082 }
39083 }
39084
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8)39085 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8) {
39086 for (uint32_t n = 9; n < 16; n++) {
39087 for (size_t k = 1; k <= 20; k += 5) {
39088 GemmMicrokernelTester()
39089 .mr(5)
39090 .nr(8)
39091 .kr(1)
39092 .sr(4)
39093 .m(5)
39094 .n(n)
39095 .k(k)
39096 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39097 }
39098 }
39099 }
39100
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)39101 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
39102 for (uint32_t n = 9; n < 16; n++) {
39103 for (size_t k = 1; k <= 20; k += 5) {
39104 GemmMicrokernelTester()
39105 .mr(5)
39106 .nr(8)
39107 .kr(1)
39108 .sr(4)
39109 .m(5)
39110 .n(n)
39111 .k(k)
39112 .cn_stride(11)
39113 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39114 }
39115 }
39116 }
39117
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)39118 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
39119 for (uint32_t n = 9; n < 16; n++) {
39120 for (size_t k = 1; k <= 20; k += 5) {
39121 for (uint32_t m = 1; m <= 5; m++) {
39122 GemmMicrokernelTester()
39123 .mr(5)
39124 .nr(8)
39125 .kr(1)
39126 .sr(4)
39127 .m(m)
39128 .n(n)
39129 .k(k)
39130 .iterations(1)
39131 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39132 }
39133 }
39134 }
39135 }
39136
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8)39137 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8) {
39138 for (uint32_t n = 16; n <= 24; n += 8) {
39139 for (size_t k = 1; k <= 20; k += 5) {
39140 GemmMicrokernelTester()
39141 .mr(5)
39142 .nr(8)
39143 .kr(1)
39144 .sr(4)
39145 .m(5)
39146 .n(n)
39147 .k(k)
39148 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39149 }
39150 }
39151 }
39152
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)39153 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
39154 for (uint32_t n = 16; n <= 24; n += 8) {
39155 for (size_t k = 1; k <= 20; k += 5) {
39156 GemmMicrokernelTester()
39157 .mr(5)
39158 .nr(8)
39159 .kr(1)
39160 .sr(4)
39161 .m(5)
39162 .n(n)
39163 .k(k)
39164 .cn_stride(11)
39165 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39166 }
39167 }
39168 }
39169
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8_subtile)39170 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
39171 for (uint32_t n = 16; n <= 24; n += 8) {
39172 for (size_t k = 1; k <= 20; k += 5) {
39173 for (uint32_t m = 1; m <= 5; m++) {
39174 GemmMicrokernelTester()
39175 .mr(5)
39176 .nr(8)
39177 .kr(1)
39178 .sr(4)
39179 .m(m)
39180 .n(n)
39181 .k(k)
39182 .iterations(1)
39183 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39184 }
39185 }
39186 }
39187 }
39188
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,small_kernel)39189 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, small_kernel) {
39190 for (size_t k = 1; k <= 20; k += 5) {
39191 GemmMicrokernelTester()
39192 .mr(5)
39193 .nr(8)
39194 .kr(1)
39195 .sr(4)
39196 .m(5)
39197 .n(8)
39198 .k(k)
39199 .ks(3)
39200 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39201 }
39202 }
39203
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,small_kernel_subtile)39204 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, small_kernel_subtile) {
39205 for (size_t k = 1; k <= 20; k += 5) {
39206 for (uint32_t n = 1; n <= 8; n++) {
39207 for (uint32_t m = 1; m <= 5; m++) {
39208 GemmMicrokernelTester()
39209 .mr(5)
39210 .nr(8)
39211 .kr(1)
39212 .sr(4)
39213 .m(m)
39214 .n(n)
39215 .k(k)
39216 .ks(3)
39217 .iterations(1)
39218 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39219 }
39220 }
39221 }
39222 }
39223
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8_small_kernel)39224 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8_small_kernel) {
39225 for (uint32_t n = 9; n < 16; n++) {
39226 for (size_t k = 1; k <= 20; k += 5) {
39227 GemmMicrokernelTester()
39228 .mr(5)
39229 .nr(8)
39230 .kr(1)
39231 .sr(4)
39232 .m(5)
39233 .n(n)
39234 .k(k)
39235 .ks(3)
39236 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39237 }
39238 }
39239 }
39240
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8_small_kernel)39241 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8_small_kernel) {
39242 for (uint32_t n = 16; n <= 24; n += 8) {
39243 for (size_t k = 1; k <= 20; k += 5) {
39244 GemmMicrokernelTester()
39245 .mr(5)
39246 .nr(8)
39247 .kr(1)
39248 .sr(4)
39249 .m(5)
39250 .n(n)
39251 .k(k)
39252 .ks(3)
39253 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39254 }
39255 }
39256 }
39257
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,strided_cm_subtile)39258 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
39259 for (size_t k = 1; k <= 20; k += 5) {
39260 for (uint32_t n = 1; n <= 8; n++) {
39261 for (uint32_t m = 1; m <= 5; m++) {
39262 GemmMicrokernelTester()
39263 .mr(5)
39264 .nr(8)
39265 .kr(1)
39266 .sr(4)
39267 .m(m)
39268 .n(n)
39269 .k(k)
39270 .cm_stride(11)
39271 .iterations(1)
39272 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39273 }
39274 }
39275 }
39276 }
39277
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,a_offset)39278 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, a_offset) {
39279 for (size_t k = 1; k <= 20; k += 5) {
39280 GemmMicrokernelTester()
39281 .mr(5)
39282 .nr(8)
39283 .kr(1)
39284 .sr(4)
39285 .m(5)
39286 .n(8)
39287 .k(k)
39288 .ks(3)
39289 .a_offset(103)
39290 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39291 }
39292 }
39293
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,zero)39294 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, zero) {
39295 for (size_t k = 1; k <= 20; k += 5) {
39296 for (uint32_t mz = 0; mz < 5; mz++) {
39297 GemmMicrokernelTester()
39298 .mr(5)
39299 .nr(8)
39300 .kr(1)
39301 .sr(4)
39302 .m(5)
39303 .n(8)
39304 .k(k)
39305 .ks(3)
39306 .a_offset(103)
39307 .zero_index(mz)
39308 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39309 }
39310 }
39311 }
39312
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,qmin)39313 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, qmin) {
39314 GemmMicrokernelTester()
39315 .mr(5)
39316 .nr(8)
39317 .kr(1)
39318 .sr(4)
39319 .m(5)
39320 .n(8)
39321 .k(4)
39322 .qmin(128)
39323 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39324 }
39325
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,qmax)39326 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, qmax) {
39327 GemmMicrokernelTester()
39328 .mr(5)
39329 .nr(8)
39330 .kr(1)
39331 .sr(4)
39332 .m(5)
39333 .n(8)
39334 .k(4)
39335 .qmax(128)
39336 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39337 }
39338
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD,strided_cm)39339 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD, strided_cm) {
39340 GemmMicrokernelTester()
39341 .mr(5)
39342 .nr(8)
39343 .kr(1)
39344 .sr(4)
39345 .m(5)
39346 .n(8)
39347 .k(4)
39348 .cm_stride(11)
39349 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
39350 }
39351 #endif // XNN_ARCH_WASMRELAXEDSIMD
39352
39353
39354 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)39355 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
39356 GemmMicrokernelTester()
39357 .mr(5)
39358 .nr(8)
39359 .kr(1)
39360 .sr(4)
39361 .m(5)
39362 .n(8)
39363 .k(4)
39364 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39365 }
39366
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,strided_cn)39367 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
39368 GemmMicrokernelTester()
39369 .mr(5)
39370 .nr(8)
39371 .kr(1)
39372 .sr(4)
39373 .m(5)
39374 .n(8)
39375 .k(4)
39376 .cn_stride(11)
39377 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39378 }
39379
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)39380 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
39381 for (uint32_t n = 1; n <= 8; n++) {
39382 for (uint32_t m = 1; m <= 5; m++) {
39383 GemmMicrokernelTester()
39384 .mr(5)
39385 .nr(8)
39386 .kr(1)
39387 .sr(4)
39388 .m(m)
39389 .n(n)
39390 .k(4)
39391 .iterations(1)
39392 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39393 }
39394 }
39395 }
39396
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)39397 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
39398 for (uint32_t m = 1; m <= 5; m++) {
39399 GemmMicrokernelTester()
39400 .mr(5)
39401 .nr(8)
39402 .kr(1)
39403 .sr(4)
39404 .m(m)
39405 .n(8)
39406 .k(4)
39407 .iterations(1)
39408 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39409 }
39410 }
39411
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)39412 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
39413 for (uint32_t n = 1; n <= 8; n++) {
39414 GemmMicrokernelTester()
39415 .mr(5)
39416 .nr(8)
39417 .kr(1)
39418 .sr(4)
39419 .m(5)
39420 .n(n)
39421 .k(4)
39422 .iterations(1)
39423 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39424 }
39425 }
39426
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)39427 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
39428 for (size_t k = 1; k < 4; k++) {
39429 GemmMicrokernelTester()
39430 .mr(5)
39431 .nr(8)
39432 .kr(1)
39433 .sr(4)
39434 .m(5)
39435 .n(8)
39436 .k(k)
39437 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39438 }
39439 }
39440
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)39441 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
39442 for (size_t k = 1; k < 4; k++) {
39443 for (uint32_t n = 1; n <= 8; n++) {
39444 for (uint32_t m = 1; m <= 5; m++) {
39445 GemmMicrokernelTester()
39446 .mr(5)
39447 .nr(8)
39448 .kr(1)
39449 .sr(4)
39450 .m(m)
39451 .n(n)
39452 .k(k)
39453 .iterations(1)
39454 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39455 }
39456 }
39457 }
39458 }
39459
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)39460 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
39461 for (size_t k = 5; k < 8; k++) {
39462 GemmMicrokernelTester()
39463 .mr(5)
39464 .nr(8)
39465 .kr(1)
39466 .sr(4)
39467 .m(5)
39468 .n(8)
39469 .k(k)
39470 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39471 }
39472 }
39473
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)39474 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
39475 for (size_t k = 5; k < 8; k++) {
39476 for (uint32_t n = 1; n <= 8; n++) {
39477 for (uint32_t m = 1; m <= 5; m++) {
39478 GemmMicrokernelTester()
39479 .mr(5)
39480 .nr(8)
39481 .kr(1)
39482 .sr(4)
39483 .m(m)
39484 .n(n)
39485 .k(k)
39486 .iterations(1)
39487 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39488 }
39489 }
39490 }
39491 }
39492
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4)39493 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
39494 for (size_t k = 8; k <= 40; k += 4) {
39495 GemmMicrokernelTester()
39496 .mr(5)
39497 .nr(8)
39498 .kr(1)
39499 .sr(4)
39500 .m(5)
39501 .n(8)
39502 .k(k)
39503 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39504 }
39505 }
39506
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)39507 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
39508 for (size_t k = 8; k <= 40; k += 4) {
39509 for (uint32_t n = 1; n <= 8; n++) {
39510 for (uint32_t m = 1; m <= 5; m++) {
39511 GemmMicrokernelTester()
39512 .mr(5)
39513 .nr(8)
39514 .kr(1)
39515 .sr(4)
39516 .m(m)
39517 .n(n)
39518 .k(k)
39519 .iterations(1)
39520 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39521 }
39522 }
39523 }
39524 }
39525
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)39526 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
39527 for (uint32_t n = 9; n < 16; n++) {
39528 for (size_t k = 1; k <= 20; k += 5) {
39529 GemmMicrokernelTester()
39530 .mr(5)
39531 .nr(8)
39532 .kr(1)
39533 .sr(4)
39534 .m(5)
39535 .n(n)
39536 .k(k)
39537 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39538 }
39539 }
39540 }
39541
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)39542 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
39543 for (uint32_t n = 9; n < 16; n++) {
39544 for (size_t k = 1; k <= 20; k += 5) {
39545 GemmMicrokernelTester()
39546 .mr(5)
39547 .nr(8)
39548 .kr(1)
39549 .sr(4)
39550 .m(5)
39551 .n(n)
39552 .k(k)
39553 .cn_stride(11)
39554 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39555 }
39556 }
39557 }
39558
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)39559 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
39560 for (uint32_t n = 9; n < 16; n++) {
39561 for (size_t k = 1; k <= 20; k += 5) {
39562 for (uint32_t m = 1; m <= 5; m++) {
39563 GemmMicrokernelTester()
39564 .mr(5)
39565 .nr(8)
39566 .kr(1)
39567 .sr(4)
39568 .m(m)
39569 .n(n)
39570 .k(k)
39571 .iterations(1)
39572 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39573 }
39574 }
39575 }
39576 }
39577
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8)39578 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
39579 for (uint32_t n = 16; n <= 24; n += 8) {
39580 for (size_t k = 1; k <= 20; k += 5) {
39581 GemmMicrokernelTester()
39582 .mr(5)
39583 .nr(8)
39584 .kr(1)
39585 .sr(4)
39586 .m(5)
39587 .n(n)
39588 .k(k)
39589 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39590 }
39591 }
39592 }
39593
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)39594 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
39595 for (uint32_t n = 16; n <= 24; n += 8) {
39596 for (size_t k = 1; k <= 20; k += 5) {
39597 GemmMicrokernelTester()
39598 .mr(5)
39599 .nr(8)
39600 .kr(1)
39601 .sr(4)
39602 .m(5)
39603 .n(n)
39604 .k(k)
39605 .cn_stride(11)
39606 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39607 }
39608 }
39609 }
39610
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)39611 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
39612 for (uint32_t n = 16; n <= 24; n += 8) {
39613 for (size_t k = 1; k <= 20; k += 5) {
39614 for (uint32_t m = 1; m <= 5; m++) {
39615 GemmMicrokernelTester()
39616 .mr(5)
39617 .nr(8)
39618 .kr(1)
39619 .sr(4)
39620 .m(m)
39621 .n(n)
39622 .k(k)
39623 .iterations(1)
39624 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39625 }
39626 }
39627 }
39628 }
39629
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,small_kernel)39630 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
39631 for (size_t k = 1; k <= 20; k += 5) {
39632 GemmMicrokernelTester()
39633 .mr(5)
39634 .nr(8)
39635 .kr(1)
39636 .sr(4)
39637 .m(5)
39638 .n(8)
39639 .k(k)
39640 .ks(3)
39641 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39642 }
39643 }
39644
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)39645 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
39646 for (size_t k = 1; k <= 20; k += 5) {
39647 for (uint32_t n = 1; n <= 8; n++) {
39648 for (uint32_t m = 1; m <= 5; m++) {
39649 GemmMicrokernelTester()
39650 .mr(5)
39651 .nr(8)
39652 .kr(1)
39653 .sr(4)
39654 .m(m)
39655 .n(n)
39656 .k(k)
39657 .ks(3)
39658 .iterations(1)
39659 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39660 }
39661 }
39662 }
39663 }
39664
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)39665 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
39666 for (uint32_t n = 9; n < 16; n++) {
39667 for (size_t k = 1; k <= 20; k += 5) {
39668 GemmMicrokernelTester()
39669 .mr(5)
39670 .nr(8)
39671 .kr(1)
39672 .sr(4)
39673 .m(5)
39674 .n(n)
39675 .k(k)
39676 .ks(3)
39677 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39678 }
39679 }
39680 }
39681
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)39682 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
39683 for (uint32_t n = 16; n <= 24; n += 8) {
39684 for (size_t k = 1; k <= 20; k += 5) {
39685 GemmMicrokernelTester()
39686 .mr(5)
39687 .nr(8)
39688 .kr(1)
39689 .sr(4)
39690 .m(5)
39691 .n(n)
39692 .k(k)
39693 .ks(3)
39694 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39695 }
39696 }
39697 }
39698
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)39699 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
39700 for (size_t k = 1; k <= 20; k += 5) {
39701 for (uint32_t n = 1; n <= 8; n++) {
39702 for (uint32_t m = 1; m <= 5; m++) {
39703 GemmMicrokernelTester()
39704 .mr(5)
39705 .nr(8)
39706 .kr(1)
39707 .sr(4)
39708 .m(m)
39709 .n(n)
39710 .k(k)
39711 .cm_stride(11)
39712 .iterations(1)
39713 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39714 }
39715 }
39716 }
39717 }
39718
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,a_offset)39719 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
39720 for (size_t k = 1; k <= 20; k += 5) {
39721 GemmMicrokernelTester()
39722 .mr(5)
39723 .nr(8)
39724 .kr(1)
39725 .sr(4)
39726 .m(5)
39727 .n(8)
39728 .k(k)
39729 .ks(3)
39730 .a_offset(103)
39731 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39732 }
39733 }
39734
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,zero)39735 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, zero) {
39736 for (size_t k = 1; k <= 20; k += 5) {
39737 for (uint32_t mz = 0; mz < 5; mz++) {
39738 GemmMicrokernelTester()
39739 .mr(5)
39740 .nr(8)
39741 .kr(1)
39742 .sr(4)
39743 .m(5)
39744 .n(8)
39745 .k(k)
39746 .ks(3)
39747 .a_offset(103)
39748 .zero_index(mz)
39749 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39750 }
39751 }
39752 }
39753
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,qmin)39754 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, qmin) {
39755 GemmMicrokernelTester()
39756 .mr(5)
39757 .nr(8)
39758 .kr(1)
39759 .sr(4)
39760 .m(5)
39761 .n(8)
39762 .k(4)
39763 .qmin(128)
39764 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39765 }
39766
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,qmax)39767 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, qmax) {
39768 GemmMicrokernelTester()
39769 .mr(5)
39770 .nr(8)
39771 .kr(1)
39772 .sr(4)
39773 .m(5)
39774 .n(8)
39775 .k(4)
39776 .qmax(128)
39777 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39778 }
39779
TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm)39780 TEST(F32_IGEMM_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
39781 GemmMicrokernelTester()
39782 .mr(5)
39783 .nr(8)
39784 .kr(1)
39785 .sr(4)
39786 .m(5)
39787 .n(8)
39788 .k(4)
39789 .cm_stride(11)
39790 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
39791 }
39792 #endif // XNN_ARCH_WASMRELAXEDSIMD
39793
39794
39795 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)39796 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
39797 GemmMicrokernelTester()
39798 .mr(6)
39799 .nr(8)
39800 .kr(1)
39801 .sr(1)
39802 .m(6)
39803 .n(8)
39804 .k(1)
39805 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39806 }
39807
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)39808 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
39809 GemmMicrokernelTester()
39810 .mr(6)
39811 .nr(8)
39812 .kr(1)
39813 .sr(1)
39814 .m(6)
39815 .n(8)
39816 .k(1)
39817 .cn_stride(11)
39818 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39819 }
39820
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)39821 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
39822 for (uint32_t n = 1; n <= 8; n++) {
39823 for (uint32_t m = 1; m <= 6; m++) {
39824 GemmMicrokernelTester()
39825 .mr(6)
39826 .nr(8)
39827 .kr(1)
39828 .sr(1)
39829 .m(m)
39830 .n(n)
39831 .k(1)
39832 .iterations(1)
39833 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39834 }
39835 }
39836 }
39837
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)39838 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
39839 for (uint32_t m = 1; m <= 6; m++) {
39840 GemmMicrokernelTester()
39841 .mr(6)
39842 .nr(8)
39843 .kr(1)
39844 .sr(1)
39845 .m(m)
39846 .n(8)
39847 .k(1)
39848 .iterations(1)
39849 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39850 }
39851 }
39852
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)39853 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
39854 for (uint32_t n = 1; n <= 8; n++) {
39855 GemmMicrokernelTester()
39856 .mr(6)
39857 .nr(8)
39858 .kr(1)
39859 .sr(1)
39860 .m(6)
39861 .n(n)
39862 .k(1)
39863 .iterations(1)
39864 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39865 }
39866 }
39867
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)39868 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
39869 for (size_t k = 2; k < 10; k++) {
39870 GemmMicrokernelTester()
39871 .mr(6)
39872 .nr(8)
39873 .kr(1)
39874 .sr(1)
39875 .m(6)
39876 .n(8)
39877 .k(k)
39878 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39879 }
39880 }
39881
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)39882 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
39883 for (size_t k = 2; k < 10; k++) {
39884 for (uint32_t n = 1; n <= 8; n++) {
39885 for (uint32_t m = 1; m <= 6; m++) {
39886 GemmMicrokernelTester()
39887 .mr(6)
39888 .nr(8)
39889 .kr(1)
39890 .sr(1)
39891 .m(m)
39892 .n(n)
39893 .k(k)
39894 .iterations(1)
39895 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39896 }
39897 }
39898 }
39899 }
39900
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)39901 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
39902 for (uint32_t n = 9; n < 16; n++) {
39903 for (size_t k = 1; k <= 5; k += 2) {
39904 GemmMicrokernelTester()
39905 .mr(6)
39906 .nr(8)
39907 .kr(1)
39908 .sr(1)
39909 .m(6)
39910 .n(n)
39911 .k(k)
39912 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39913 }
39914 }
39915 }
39916
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)39917 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
39918 for (uint32_t n = 9; n < 16; n++) {
39919 for (size_t k = 1; k <= 5; k += 2) {
39920 GemmMicrokernelTester()
39921 .mr(6)
39922 .nr(8)
39923 .kr(1)
39924 .sr(1)
39925 .m(6)
39926 .n(n)
39927 .k(k)
39928 .cn_stride(11)
39929 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39930 }
39931 }
39932 }
39933
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)39934 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
39935 for (uint32_t n = 9; n < 16; n++) {
39936 for (size_t k = 1; k <= 5; k += 2) {
39937 for (uint32_t m = 1; m <= 6; m++) {
39938 GemmMicrokernelTester()
39939 .mr(6)
39940 .nr(8)
39941 .kr(1)
39942 .sr(1)
39943 .m(m)
39944 .n(n)
39945 .k(k)
39946 .iterations(1)
39947 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39948 }
39949 }
39950 }
39951 }
39952
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)39953 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
39954 for (uint32_t n = 16; n <= 24; n += 8) {
39955 for (size_t k = 1; k <= 5; k += 2) {
39956 GemmMicrokernelTester()
39957 .mr(6)
39958 .nr(8)
39959 .kr(1)
39960 .sr(1)
39961 .m(6)
39962 .n(n)
39963 .k(k)
39964 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39965 }
39966 }
39967 }
39968
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)39969 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
39970 for (uint32_t n = 16; n <= 24; n += 8) {
39971 for (size_t k = 1; k <= 5; k += 2) {
39972 GemmMicrokernelTester()
39973 .mr(6)
39974 .nr(8)
39975 .kr(1)
39976 .sr(1)
39977 .m(6)
39978 .n(n)
39979 .k(k)
39980 .cn_stride(11)
39981 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
39982 }
39983 }
39984 }
39985
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)39986 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
39987 for (uint32_t n = 16; n <= 24; n += 8) {
39988 for (size_t k = 1; k <= 5; k += 2) {
39989 for (uint32_t m = 1; m <= 6; m++) {
39990 GemmMicrokernelTester()
39991 .mr(6)
39992 .nr(8)
39993 .kr(1)
39994 .sr(1)
39995 .m(m)
39996 .n(n)
39997 .k(k)
39998 .iterations(1)
39999 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40000 }
40001 }
40002 }
40003 }
40004
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel)40005 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel) {
40006 for (size_t k = 1; k <= 5; k += 2) {
40007 GemmMicrokernelTester()
40008 .mr(6)
40009 .nr(8)
40010 .kr(1)
40011 .sr(1)
40012 .m(6)
40013 .n(8)
40014 .k(k)
40015 .ks(3)
40016 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40017 }
40018 }
40019
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,small_kernel_subtile)40020 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, small_kernel_subtile) {
40021 for (size_t k = 1; k <= 5; k += 2) {
40022 for (uint32_t n = 1; n <= 8; n++) {
40023 for (uint32_t m = 1; m <= 6; m++) {
40024 GemmMicrokernelTester()
40025 .mr(6)
40026 .nr(8)
40027 .kr(1)
40028 .sr(1)
40029 .m(m)
40030 .n(n)
40031 .k(k)
40032 .ks(3)
40033 .iterations(1)
40034 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40035 }
40036 }
40037 }
40038 }
40039
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_small_kernel)40040 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_small_kernel) {
40041 for (uint32_t n = 9; n < 16; n++) {
40042 for (size_t k = 1; k <= 5; k += 2) {
40043 GemmMicrokernelTester()
40044 .mr(6)
40045 .nr(8)
40046 .kr(1)
40047 .sr(1)
40048 .m(6)
40049 .n(n)
40050 .k(k)
40051 .ks(3)
40052 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40053 }
40054 }
40055 }
40056
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_small_kernel)40057 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_small_kernel) {
40058 for (uint32_t n = 16; n <= 24; n += 8) {
40059 for (size_t k = 1; k <= 5; k += 2) {
40060 GemmMicrokernelTester()
40061 .mr(6)
40062 .nr(8)
40063 .kr(1)
40064 .sr(1)
40065 .m(6)
40066 .n(n)
40067 .k(k)
40068 .ks(3)
40069 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40070 }
40071 }
40072 }
40073
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)40074 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
40075 for (size_t k = 1; k <= 5; k += 2) {
40076 for (uint32_t n = 1; n <= 8; n++) {
40077 for (uint32_t m = 1; m <= 6; m++) {
40078 GemmMicrokernelTester()
40079 .mr(6)
40080 .nr(8)
40081 .kr(1)
40082 .sr(1)
40083 .m(m)
40084 .n(n)
40085 .k(k)
40086 .cm_stride(11)
40087 .iterations(1)
40088 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40089 }
40090 }
40091 }
40092 }
40093
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,a_offset)40094 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, a_offset) {
40095 for (size_t k = 1; k <= 5; k += 2) {
40096 GemmMicrokernelTester()
40097 .mr(6)
40098 .nr(8)
40099 .kr(1)
40100 .sr(1)
40101 .m(6)
40102 .n(8)
40103 .k(k)
40104 .ks(3)
40105 .a_offset(37)
40106 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40107 }
40108 }
40109
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,zero)40110 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, zero) {
40111 for (size_t k = 1; k <= 5; k += 2) {
40112 for (uint32_t mz = 0; mz < 6; mz++) {
40113 GemmMicrokernelTester()
40114 .mr(6)
40115 .nr(8)
40116 .kr(1)
40117 .sr(1)
40118 .m(6)
40119 .n(8)
40120 .k(k)
40121 .ks(3)
40122 .a_offset(37)
40123 .zero_index(mz)
40124 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40125 }
40126 }
40127 }
40128
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)40129 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
40130 GemmMicrokernelTester()
40131 .mr(6)
40132 .nr(8)
40133 .kr(1)
40134 .sr(1)
40135 .m(6)
40136 .n(8)
40137 .k(1)
40138 .qmin(128)
40139 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40140 }
40141
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)40142 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
40143 GemmMicrokernelTester()
40144 .mr(6)
40145 .nr(8)
40146 .kr(1)
40147 .sr(1)
40148 .m(6)
40149 .n(8)
40150 .k(1)
40151 .qmax(128)
40152 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40153 }
40154
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)40155 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
40156 GemmMicrokernelTester()
40157 .mr(6)
40158 .nr(8)
40159 .kr(1)
40160 .sr(1)
40161 .m(6)
40162 .n(8)
40163 .k(1)
40164 .cm_stride(11)
40165 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
40166 }
40167 #endif // XNN_ARCH_WASMRELAXEDSIMD
40168
40169
40170 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)40171 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
40172 GemmMicrokernelTester()
40173 .mr(6)
40174 .nr(8)
40175 .kr(1)
40176 .sr(1)
40177 .m(6)
40178 .n(8)
40179 .k(4)
40180 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40181 }
40182
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,strided_cn)40183 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
40184 GemmMicrokernelTester()
40185 .mr(6)
40186 .nr(8)
40187 .kr(1)
40188 .sr(1)
40189 .m(6)
40190 .n(8)
40191 .k(4)
40192 .cn_stride(11)
40193 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40194 }
40195
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)40196 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
40197 for (uint32_t n = 1; n <= 8; n++) {
40198 for (uint32_t m = 1; m <= 6; m++) {
40199 GemmMicrokernelTester()
40200 .mr(6)
40201 .nr(8)
40202 .kr(1)
40203 .sr(1)
40204 .m(m)
40205 .n(n)
40206 .k(4)
40207 .iterations(1)
40208 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40209 }
40210 }
40211 }
40212
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)40213 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
40214 for (uint32_t m = 1; m <= 6; m++) {
40215 GemmMicrokernelTester()
40216 .mr(6)
40217 .nr(8)
40218 .kr(1)
40219 .sr(1)
40220 .m(m)
40221 .n(8)
40222 .k(4)
40223 .iterations(1)
40224 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40225 }
40226 }
40227
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)40228 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
40229 for (uint32_t n = 1; n <= 8; n++) {
40230 GemmMicrokernelTester()
40231 .mr(6)
40232 .nr(8)
40233 .kr(1)
40234 .sr(1)
40235 .m(6)
40236 .n(n)
40237 .k(4)
40238 .iterations(1)
40239 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40240 }
40241 }
40242
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)40243 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
40244 for (size_t k = 1; k < 4; k++) {
40245 GemmMicrokernelTester()
40246 .mr(6)
40247 .nr(8)
40248 .kr(1)
40249 .sr(1)
40250 .m(6)
40251 .n(8)
40252 .k(k)
40253 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40254 }
40255 }
40256
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)40257 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
40258 for (size_t k = 1; k < 4; k++) {
40259 for (uint32_t n = 1; n <= 8; n++) {
40260 for (uint32_t m = 1; m <= 6; m++) {
40261 GemmMicrokernelTester()
40262 .mr(6)
40263 .nr(8)
40264 .kr(1)
40265 .sr(1)
40266 .m(m)
40267 .n(n)
40268 .k(k)
40269 .iterations(1)
40270 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40271 }
40272 }
40273 }
40274 }
40275
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)40276 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
40277 for (size_t k = 5; k < 8; k++) {
40278 GemmMicrokernelTester()
40279 .mr(6)
40280 .nr(8)
40281 .kr(1)
40282 .sr(1)
40283 .m(6)
40284 .n(8)
40285 .k(k)
40286 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40287 }
40288 }
40289
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)40290 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
40291 for (size_t k = 5; k < 8; k++) {
40292 for (uint32_t n = 1; n <= 8; n++) {
40293 for (uint32_t m = 1; m <= 6; m++) {
40294 GemmMicrokernelTester()
40295 .mr(6)
40296 .nr(8)
40297 .kr(1)
40298 .sr(1)
40299 .m(m)
40300 .n(n)
40301 .k(k)
40302 .iterations(1)
40303 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40304 }
40305 }
40306 }
40307 }
40308
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_div_4)40309 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
40310 for (size_t k = 8; k <= 40; k += 4) {
40311 GemmMicrokernelTester()
40312 .mr(6)
40313 .nr(8)
40314 .kr(1)
40315 .sr(1)
40316 .m(6)
40317 .n(8)
40318 .k(k)
40319 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40320 }
40321 }
40322
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)40323 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
40324 for (size_t k = 8; k <= 40; k += 4) {
40325 for (uint32_t n = 1; n <= 8; n++) {
40326 for (uint32_t m = 1; m <= 6; m++) {
40327 GemmMicrokernelTester()
40328 .mr(6)
40329 .nr(8)
40330 .kr(1)
40331 .sr(1)
40332 .m(m)
40333 .n(n)
40334 .k(k)
40335 .iterations(1)
40336 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40337 }
40338 }
40339 }
40340 }
40341
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)40342 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
40343 for (uint32_t n = 9; n < 16; n++) {
40344 for (size_t k = 1; k <= 20; k += 5) {
40345 GemmMicrokernelTester()
40346 .mr(6)
40347 .nr(8)
40348 .kr(1)
40349 .sr(1)
40350 .m(6)
40351 .n(n)
40352 .k(k)
40353 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40354 }
40355 }
40356 }
40357
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)40358 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
40359 for (uint32_t n = 9; n < 16; n++) {
40360 for (size_t k = 1; k <= 20; k += 5) {
40361 GemmMicrokernelTester()
40362 .mr(6)
40363 .nr(8)
40364 .kr(1)
40365 .sr(1)
40366 .m(6)
40367 .n(n)
40368 .k(k)
40369 .cn_stride(11)
40370 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40371 }
40372 }
40373 }
40374
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)40375 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
40376 for (uint32_t n = 9; n < 16; n++) {
40377 for (size_t k = 1; k <= 20; k += 5) {
40378 for (uint32_t m = 1; m <= 6; m++) {
40379 GemmMicrokernelTester()
40380 .mr(6)
40381 .nr(8)
40382 .kr(1)
40383 .sr(1)
40384 .m(m)
40385 .n(n)
40386 .k(k)
40387 .iterations(1)
40388 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40389 }
40390 }
40391 }
40392 }
40393
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8)40394 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
40395 for (uint32_t n = 16; n <= 24; n += 8) {
40396 for (size_t k = 1; k <= 20; k += 5) {
40397 GemmMicrokernelTester()
40398 .mr(6)
40399 .nr(8)
40400 .kr(1)
40401 .sr(1)
40402 .m(6)
40403 .n(n)
40404 .k(k)
40405 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40406 }
40407 }
40408 }
40409
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)40410 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
40411 for (uint32_t n = 16; n <= 24; n += 8) {
40412 for (size_t k = 1; k <= 20; k += 5) {
40413 GemmMicrokernelTester()
40414 .mr(6)
40415 .nr(8)
40416 .kr(1)
40417 .sr(1)
40418 .m(6)
40419 .n(n)
40420 .k(k)
40421 .cn_stride(11)
40422 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40423 }
40424 }
40425 }
40426
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)40427 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
40428 for (uint32_t n = 16; n <= 24; n += 8) {
40429 for (size_t k = 1; k <= 20; k += 5) {
40430 for (uint32_t m = 1; m <= 6; m++) {
40431 GemmMicrokernelTester()
40432 .mr(6)
40433 .nr(8)
40434 .kr(1)
40435 .sr(1)
40436 .m(m)
40437 .n(n)
40438 .k(k)
40439 .iterations(1)
40440 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40441 }
40442 }
40443 }
40444 }
40445
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,small_kernel)40446 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, small_kernel) {
40447 for (size_t k = 1; k <= 20; k += 5) {
40448 GemmMicrokernelTester()
40449 .mr(6)
40450 .nr(8)
40451 .kr(1)
40452 .sr(1)
40453 .m(6)
40454 .n(8)
40455 .k(k)
40456 .ks(3)
40457 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40458 }
40459 }
40460
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,small_kernel_subtile)40461 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, small_kernel_subtile) {
40462 for (size_t k = 1; k <= 20; k += 5) {
40463 for (uint32_t n = 1; n <= 8; n++) {
40464 for (uint32_t m = 1; m <= 6; m++) {
40465 GemmMicrokernelTester()
40466 .mr(6)
40467 .nr(8)
40468 .kr(1)
40469 .sr(1)
40470 .m(m)
40471 .n(n)
40472 .k(k)
40473 .ks(3)
40474 .iterations(1)
40475 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40476 }
40477 }
40478 }
40479 }
40480
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_small_kernel)40481 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_small_kernel) {
40482 for (uint32_t n = 9; n < 16; n++) {
40483 for (size_t k = 1; k <= 20; k += 5) {
40484 GemmMicrokernelTester()
40485 .mr(6)
40486 .nr(8)
40487 .kr(1)
40488 .sr(1)
40489 .m(6)
40490 .n(n)
40491 .k(k)
40492 .ks(3)
40493 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40494 }
40495 }
40496 }
40497
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8_small_kernel)40498 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8_small_kernel) {
40499 for (uint32_t n = 16; n <= 24; n += 8) {
40500 for (size_t k = 1; k <= 20; k += 5) {
40501 GemmMicrokernelTester()
40502 .mr(6)
40503 .nr(8)
40504 .kr(1)
40505 .sr(1)
40506 .m(6)
40507 .n(n)
40508 .k(k)
40509 .ks(3)
40510 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40511 }
40512 }
40513 }
40514
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)40515 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
40516 for (size_t k = 1; k <= 20; k += 5) {
40517 for (uint32_t n = 1; n <= 8; n++) {
40518 for (uint32_t m = 1; m <= 6; m++) {
40519 GemmMicrokernelTester()
40520 .mr(6)
40521 .nr(8)
40522 .kr(1)
40523 .sr(1)
40524 .m(m)
40525 .n(n)
40526 .k(k)
40527 .cm_stride(11)
40528 .iterations(1)
40529 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40530 }
40531 }
40532 }
40533 }
40534
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,a_offset)40535 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, a_offset) {
40536 for (size_t k = 1; k <= 20; k += 5) {
40537 GemmMicrokernelTester()
40538 .mr(6)
40539 .nr(8)
40540 .kr(1)
40541 .sr(1)
40542 .m(6)
40543 .n(8)
40544 .k(k)
40545 .ks(3)
40546 .a_offset(127)
40547 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40548 }
40549 }
40550
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,zero)40551 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, zero) {
40552 for (size_t k = 1; k <= 20; k += 5) {
40553 for (uint32_t mz = 0; mz < 6; mz++) {
40554 GemmMicrokernelTester()
40555 .mr(6)
40556 .nr(8)
40557 .kr(1)
40558 .sr(1)
40559 .m(6)
40560 .n(8)
40561 .k(k)
40562 .ks(3)
40563 .a_offset(127)
40564 .zero_index(mz)
40565 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40566 }
40567 }
40568 }
40569
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,qmin)40570 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, qmin) {
40571 GemmMicrokernelTester()
40572 .mr(6)
40573 .nr(8)
40574 .kr(1)
40575 .sr(1)
40576 .m(6)
40577 .n(8)
40578 .k(4)
40579 .qmin(128)
40580 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40581 }
40582
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,qmax)40583 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, qmax) {
40584 GemmMicrokernelTester()
40585 .mr(6)
40586 .nr(8)
40587 .kr(1)
40588 .sr(1)
40589 .m(6)
40590 .n(8)
40591 .k(4)
40592 .qmax(128)
40593 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40594 }
40595
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,strided_cm)40596 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
40597 GemmMicrokernelTester()
40598 .mr(6)
40599 .nr(8)
40600 .kr(1)
40601 .sr(1)
40602 .m(6)
40603 .n(8)
40604 .k(4)
40605 .cm_stride(11)
40606 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
40607 }
40608 #endif // XNN_ARCH_WASMRELAXEDSIMD
40609
40610
40611 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4)40612 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4) {
40613 GemmMicrokernelTester()
40614 .mr(6)
40615 .nr(8)
40616 .kr(1)
40617 .sr(4)
40618 .m(6)
40619 .n(8)
40620 .k(4)
40621 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40622 }
40623
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,strided_cn)40624 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, strided_cn) {
40625 GemmMicrokernelTester()
40626 .mr(6)
40627 .nr(8)
40628 .kr(1)
40629 .sr(4)
40630 .m(6)
40631 .n(8)
40632 .k(4)
40633 .cn_stride(11)
40634 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40635 }
40636
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)40637 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
40638 for (uint32_t n = 1; n <= 8; n++) {
40639 for (uint32_t m = 1; m <= 6; m++) {
40640 GemmMicrokernelTester()
40641 .mr(6)
40642 .nr(8)
40643 .kr(1)
40644 .sr(4)
40645 .m(m)
40646 .n(n)
40647 .k(4)
40648 .iterations(1)
40649 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40650 }
40651 }
40652 }
40653
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)40654 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
40655 for (uint32_t m = 1; m <= 6; m++) {
40656 GemmMicrokernelTester()
40657 .mr(6)
40658 .nr(8)
40659 .kr(1)
40660 .sr(4)
40661 .m(m)
40662 .n(8)
40663 .k(4)
40664 .iterations(1)
40665 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40666 }
40667 }
40668
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)40669 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
40670 for (uint32_t n = 1; n <= 8; n++) {
40671 GemmMicrokernelTester()
40672 .mr(6)
40673 .nr(8)
40674 .kr(1)
40675 .sr(4)
40676 .m(6)
40677 .n(n)
40678 .k(4)
40679 .iterations(1)
40680 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40681 }
40682 }
40683
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_lt_4)40684 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_lt_4) {
40685 for (size_t k = 1; k < 4; k++) {
40686 GemmMicrokernelTester()
40687 .mr(6)
40688 .nr(8)
40689 .kr(1)
40690 .sr(4)
40691 .m(6)
40692 .n(8)
40693 .k(k)
40694 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40695 }
40696 }
40697
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)40698 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
40699 for (size_t k = 1; k < 4; k++) {
40700 for (uint32_t n = 1; n <= 8; n++) {
40701 for (uint32_t m = 1; m <= 6; m++) {
40702 GemmMicrokernelTester()
40703 .mr(6)
40704 .nr(8)
40705 .kr(1)
40706 .sr(4)
40707 .m(m)
40708 .n(n)
40709 .k(k)
40710 .iterations(1)
40711 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40712 }
40713 }
40714 }
40715 }
40716
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_gt_4)40717 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_gt_4) {
40718 for (size_t k = 5; k < 8; k++) {
40719 GemmMicrokernelTester()
40720 .mr(6)
40721 .nr(8)
40722 .kr(1)
40723 .sr(4)
40724 .m(6)
40725 .n(8)
40726 .k(k)
40727 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40728 }
40729 }
40730
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)40731 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
40732 for (size_t k = 5; k < 8; k++) {
40733 for (uint32_t n = 1; n <= 8; n++) {
40734 for (uint32_t m = 1; m <= 6; m++) {
40735 GemmMicrokernelTester()
40736 .mr(6)
40737 .nr(8)
40738 .kr(1)
40739 .sr(4)
40740 .m(m)
40741 .n(n)
40742 .k(k)
40743 .iterations(1)
40744 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40745 }
40746 }
40747 }
40748 }
40749
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_div_4)40750 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_div_4) {
40751 for (size_t k = 8; k <= 40; k += 4) {
40752 GemmMicrokernelTester()
40753 .mr(6)
40754 .nr(8)
40755 .kr(1)
40756 .sr(4)
40757 .m(6)
40758 .n(8)
40759 .k(k)
40760 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40761 }
40762 }
40763
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,k_div_4_subtile)40764 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
40765 for (size_t k = 8; k <= 40; k += 4) {
40766 for (uint32_t n = 1; n <= 8; n++) {
40767 for (uint32_t m = 1; m <= 6; m++) {
40768 GemmMicrokernelTester()
40769 .mr(6)
40770 .nr(8)
40771 .kr(1)
40772 .sr(4)
40773 .m(m)
40774 .n(n)
40775 .k(k)
40776 .iterations(1)
40777 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40778 }
40779 }
40780 }
40781 }
40782
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8)40783 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8) {
40784 for (uint32_t n = 9; n < 16; n++) {
40785 for (size_t k = 1; k <= 20; k += 5) {
40786 GemmMicrokernelTester()
40787 .mr(6)
40788 .nr(8)
40789 .kr(1)
40790 .sr(4)
40791 .m(6)
40792 .n(n)
40793 .k(k)
40794 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40795 }
40796 }
40797 }
40798
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)40799 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
40800 for (uint32_t n = 9; n < 16; n++) {
40801 for (size_t k = 1; k <= 20; k += 5) {
40802 GemmMicrokernelTester()
40803 .mr(6)
40804 .nr(8)
40805 .kr(1)
40806 .sr(4)
40807 .m(6)
40808 .n(n)
40809 .k(k)
40810 .cn_stride(11)
40811 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40812 }
40813 }
40814 }
40815
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)40816 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
40817 for (uint32_t n = 9; n < 16; n++) {
40818 for (size_t k = 1; k <= 20; k += 5) {
40819 for (uint32_t m = 1; m <= 6; m++) {
40820 GemmMicrokernelTester()
40821 .mr(6)
40822 .nr(8)
40823 .kr(1)
40824 .sr(4)
40825 .m(m)
40826 .n(n)
40827 .k(k)
40828 .iterations(1)
40829 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40830 }
40831 }
40832 }
40833 }
40834
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8)40835 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8) {
40836 for (uint32_t n = 16; n <= 24; n += 8) {
40837 for (size_t k = 1; k <= 20; k += 5) {
40838 GemmMicrokernelTester()
40839 .mr(6)
40840 .nr(8)
40841 .kr(1)
40842 .sr(4)
40843 .m(6)
40844 .n(n)
40845 .k(k)
40846 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40847 }
40848 }
40849 }
40850
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)40851 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
40852 for (uint32_t n = 16; n <= 24; n += 8) {
40853 for (size_t k = 1; k <= 20; k += 5) {
40854 GemmMicrokernelTester()
40855 .mr(6)
40856 .nr(8)
40857 .kr(1)
40858 .sr(4)
40859 .m(6)
40860 .n(n)
40861 .k(k)
40862 .cn_stride(11)
40863 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40864 }
40865 }
40866 }
40867
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8_subtile)40868 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
40869 for (uint32_t n = 16; n <= 24; n += 8) {
40870 for (size_t k = 1; k <= 20; k += 5) {
40871 for (uint32_t m = 1; m <= 6; m++) {
40872 GemmMicrokernelTester()
40873 .mr(6)
40874 .nr(8)
40875 .kr(1)
40876 .sr(4)
40877 .m(m)
40878 .n(n)
40879 .k(k)
40880 .iterations(1)
40881 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40882 }
40883 }
40884 }
40885 }
40886
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,small_kernel)40887 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, small_kernel) {
40888 for (size_t k = 1; k <= 20; k += 5) {
40889 GemmMicrokernelTester()
40890 .mr(6)
40891 .nr(8)
40892 .kr(1)
40893 .sr(4)
40894 .m(6)
40895 .n(8)
40896 .k(k)
40897 .ks(3)
40898 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40899 }
40900 }
40901
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,small_kernel_subtile)40902 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, small_kernel_subtile) {
40903 for (size_t k = 1; k <= 20; k += 5) {
40904 for (uint32_t n = 1; n <= 8; n++) {
40905 for (uint32_t m = 1; m <= 6; m++) {
40906 GemmMicrokernelTester()
40907 .mr(6)
40908 .nr(8)
40909 .kr(1)
40910 .sr(4)
40911 .m(m)
40912 .n(n)
40913 .k(k)
40914 .ks(3)
40915 .iterations(1)
40916 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40917 }
40918 }
40919 }
40920 }
40921
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8_small_kernel)40922 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8_small_kernel) {
40923 for (uint32_t n = 9; n < 16; n++) {
40924 for (size_t k = 1; k <= 20; k += 5) {
40925 GemmMicrokernelTester()
40926 .mr(6)
40927 .nr(8)
40928 .kr(1)
40929 .sr(4)
40930 .m(6)
40931 .n(n)
40932 .k(k)
40933 .ks(3)
40934 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40935 }
40936 }
40937 }
40938
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8_small_kernel)40939 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8_small_kernel) {
40940 for (uint32_t n = 16; n <= 24; n += 8) {
40941 for (size_t k = 1; k <= 20; k += 5) {
40942 GemmMicrokernelTester()
40943 .mr(6)
40944 .nr(8)
40945 .kr(1)
40946 .sr(4)
40947 .m(6)
40948 .n(n)
40949 .k(k)
40950 .ks(3)
40951 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40952 }
40953 }
40954 }
40955
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,strided_cm_subtile)40956 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
40957 for (size_t k = 1; k <= 20; k += 5) {
40958 for (uint32_t n = 1; n <= 8; n++) {
40959 for (uint32_t m = 1; m <= 6; m++) {
40960 GemmMicrokernelTester()
40961 .mr(6)
40962 .nr(8)
40963 .kr(1)
40964 .sr(4)
40965 .m(m)
40966 .n(n)
40967 .k(k)
40968 .cm_stride(11)
40969 .iterations(1)
40970 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40971 }
40972 }
40973 }
40974 }
40975
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,a_offset)40976 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, a_offset) {
40977 for (size_t k = 1; k <= 20; k += 5) {
40978 GemmMicrokernelTester()
40979 .mr(6)
40980 .nr(8)
40981 .kr(1)
40982 .sr(4)
40983 .m(6)
40984 .n(8)
40985 .k(k)
40986 .ks(3)
40987 .a_offset(127)
40988 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
40989 }
40990 }
40991
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,zero)40992 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, zero) {
40993 for (size_t k = 1; k <= 20; k += 5) {
40994 for (uint32_t mz = 0; mz < 6; mz++) {
40995 GemmMicrokernelTester()
40996 .mr(6)
40997 .nr(8)
40998 .kr(1)
40999 .sr(4)
41000 .m(6)
41001 .n(8)
41002 .k(k)
41003 .ks(3)
41004 .a_offset(127)
41005 .zero_index(mz)
41006 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
41007 }
41008 }
41009 }
41010
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,qmin)41011 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, qmin) {
41012 GemmMicrokernelTester()
41013 .mr(6)
41014 .nr(8)
41015 .kr(1)
41016 .sr(4)
41017 .m(6)
41018 .n(8)
41019 .k(4)
41020 .qmin(128)
41021 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
41022 }
41023
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,qmax)41024 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, qmax) {
41025 GemmMicrokernelTester()
41026 .mr(6)
41027 .nr(8)
41028 .kr(1)
41029 .sr(4)
41030 .m(6)
41031 .n(8)
41032 .k(4)
41033 .qmax(128)
41034 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
41035 }
41036
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD,strided_cm)41037 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD, strided_cm) {
41038 GemmMicrokernelTester()
41039 .mr(6)
41040 .nr(8)
41041 .kr(1)
41042 .sr(4)
41043 .m(6)
41044 .n(8)
41045 .k(4)
41046 .cm_stride(11)
41047 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
41048 }
41049 #endif // XNN_ARCH_WASMRELAXEDSIMD
41050
41051
41052 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)41053 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
41054 GemmMicrokernelTester()
41055 .mr(6)
41056 .nr(8)
41057 .kr(1)
41058 .sr(4)
41059 .m(6)
41060 .n(8)
41061 .k(4)
41062 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41063 }
41064
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,strided_cn)41065 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
41066 GemmMicrokernelTester()
41067 .mr(6)
41068 .nr(8)
41069 .kr(1)
41070 .sr(4)
41071 .m(6)
41072 .n(8)
41073 .k(4)
41074 .cn_stride(11)
41075 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41076 }
41077
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)41078 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
41079 for (uint32_t n = 1; n <= 8; n++) {
41080 for (uint32_t m = 1; m <= 6; m++) {
41081 GemmMicrokernelTester()
41082 .mr(6)
41083 .nr(8)
41084 .kr(1)
41085 .sr(4)
41086 .m(m)
41087 .n(n)
41088 .k(4)
41089 .iterations(1)
41090 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41091 }
41092 }
41093 }
41094
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)41095 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
41096 for (uint32_t m = 1; m <= 6; m++) {
41097 GemmMicrokernelTester()
41098 .mr(6)
41099 .nr(8)
41100 .kr(1)
41101 .sr(4)
41102 .m(m)
41103 .n(8)
41104 .k(4)
41105 .iterations(1)
41106 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41107 }
41108 }
41109
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)41110 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
41111 for (uint32_t n = 1; n <= 8; n++) {
41112 GemmMicrokernelTester()
41113 .mr(6)
41114 .nr(8)
41115 .kr(1)
41116 .sr(4)
41117 .m(6)
41118 .n(n)
41119 .k(4)
41120 .iterations(1)
41121 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41122 }
41123 }
41124
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)41125 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
41126 for (size_t k = 1; k < 4; k++) {
41127 GemmMicrokernelTester()
41128 .mr(6)
41129 .nr(8)
41130 .kr(1)
41131 .sr(4)
41132 .m(6)
41133 .n(8)
41134 .k(k)
41135 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41136 }
41137 }
41138
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)41139 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
41140 for (size_t k = 1; k < 4; k++) {
41141 for (uint32_t n = 1; n <= 8; n++) {
41142 for (uint32_t m = 1; m <= 6; m++) {
41143 GemmMicrokernelTester()
41144 .mr(6)
41145 .nr(8)
41146 .kr(1)
41147 .sr(4)
41148 .m(m)
41149 .n(n)
41150 .k(k)
41151 .iterations(1)
41152 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41153 }
41154 }
41155 }
41156 }
41157
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)41158 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
41159 for (size_t k = 5; k < 8; k++) {
41160 GemmMicrokernelTester()
41161 .mr(6)
41162 .nr(8)
41163 .kr(1)
41164 .sr(4)
41165 .m(6)
41166 .n(8)
41167 .k(k)
41168 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41169 }
41170 }
41171
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)41172 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
41173 for (size_t k = 5; k < 8; k++) {
41174 for (uint32_t n = 1; n <= 8; n++) {
41175 for (uint32_t m = 1; m <= 6; m++) {
41176 GemmMicrokernelTester()
41177 .mr(6)
41178 .nr(8)
41179 .kr(1)
41180 .sr(4)
41181 .m(m)
41182 .n(n)
41183 .k(k)
41184 .iterations(1)
41185 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41186 }
41187 }
41188 }
41189 }
41190
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4)41191 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
41192 for (size_t k = 8; k <= 40; k += 4) {
41193 GemmMicrokernelTester()
41194 .mr(6)
41195 .nr(8)
41196 .kr(1)
41197 .sr(4)
41198 .m(6)
41199 .n(8)
41200 .k(k)
41201 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41202 }
41203 }
41204
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)41205 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
41206 for (size_t k = 8; k <= 40; k += 4) {
41207 for (uint32_t n = 1; n <= 8; n++) {
41208 for (uint32_t m = 1; m <= 6; m++) {
41209 GemmMicrokernelTester()
41210 .mr(6)
41211 .nr(8)
41212 .kr(1)
41213 .sr(4)
41214 .m(m)
41215 .n(n)
41216 .k(k)
41217 .iterations(1)
41218 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41219 }
41220 }
41221 }
41222 }
41223
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)41224 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
41225 for (uint32_t n = 9; n < 16; n++) {
41226 for (size_t k = 1; k <= 20; k += 5) {
41227 GemmMicrokernelTester()
41228 .mr(6)
41229 .nr(8)
41230 .kr(1)
41231 .sr(4)
41232 .m(6)
41233 .n(n)
41234 .k(k)
41235 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41236 }
41237 }
41238 }
41239
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)41240 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
41241 for (uint32_t n = 9; n < 16; n++) {
41242 for (size_t k = 1; k <= 20; k += 5) {
41243 GemmMicrokernelTester()
41244 .mr(6)
41245 .nr(8)
41246 .kr(1)
41247 .sr(4)
41248 .m(6)
41249 .n(n)
41250 .k(k)
41251 .cn_stride(11)
41252 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41253 }
41254 }
41255 }
41256
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)41257 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
41258 for (uint32_t n = 9; n < 16; n++) {
41259 for (size_t k = 1; k <= 20; k += 5) {
41260 for (uint32_t m = 1; m <= 6; m++) {
41261 GemmMicrokernelTester()
41262 .mr(6)
41263 .nr(8)
41264 .kr(1)
41265 .sr(4)
41266 .m(m)
41267 .n(n)
41268 .k(k)
41269 .iterations(1)
41270 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41271 }
41272 }
41273 }
41274 }
41275
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8)41276 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
41277 for (uint32_t n = 16; n <= 24; n += 8) {
41278 for (size_t k = 1; k <= 20; k += 5) {
41279 GemmMicrokernelTester()
41280 .mr(6)
41281 .nr(8)
41282 .kr(1)
41283 .sr(4)
41284 .m(6)
41285 .n(n)
41286 .k(k)
41287 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41288 }
41289 }
41290 }
41291
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)41292 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
41293 for (uint32_t n = 16; n <= 24; n += 8) {
41294 for (size_t k = 1; k <= 20; k += 5) {
41295 GemmMicrokernelTester()
41296 .mr(6)
41297 .nr(8)
41298 .kr(1)
41299 .sr(4)
41300 .m(6)
41301 .n(n)
41302 .k(k)
41303 .cn_stride(11)
41304 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41305 }
41306 }
41307 }
41308
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)41309 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
41310 for (uint32_t n = 16; n <= 24; n += 8) {
41311 for (size_t k = 1; k <= 20; k += 5) {
41312 for (uint32_t m = 1; m <= 6; m++) {
41313 GemmMicrokernelTester()
41314 .mr(6)
41315 .nr(8)
41316 .kr(1)
41317 .sr(4)
41318 .m(m)
41319 .n(n)
41320 .k(k)
41321 .iterations(1)
41322 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41323 }
41324 }
41325 }
41326 }
41327
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,small_kernel)41328 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
41329 for (size_t k = 1; k <= 20; k += 5) {
41330 GemmMicrokernelTester()
41331 .mr(6)
41332 .nr(8)
41333 .kr(1)
41334 .sr(4)
41335 .m(6)
41336 .n(8)
41337 .k(k)
41338 .ks(3)
41339 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41340 }
41341 }
41342
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)41343 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
41344 for (size_t k = 1; k <= 20; k += 5) {
41345 for (uint32_t n = 1; n <= 8; n++) {
41346 for (uint32_t m = 1; m <= 6; m++) {
41347 GemmMicrokernelTester()
41348 .mr(6)
41349 .nr(8)
41350 .kr(1)
41351 .sr(4)
41352 .m(m)
41353 .n(n)
41354 .k(k)
41355 .ks(3)
41356 .iterations(1)
41357 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41358 }
41359 }
41360 }
41361 }
41362
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)41363 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
41364 for (uint32_t n = 9; n < 16; n++) {
41365 for (size_t k = 1; k <= 20; k += 5) {
41366 GemmMicrokernelTester()
41367 .mr(6)
41368 .nr(8)
41369 .kr(1)
41370 .sr(4)
41371 .m(6)
41372 .n(n)
41373 .k(k)
41374 .ks(3)
41375 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41376 }
41377 }
41378 }
41379
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)41380 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
41381 for (uint32_t n = 16; n <= 24; n += 8) {
41382 for (size_t k = 1; k <= 20; k += 5) {
41383 GemmMicrokernelTester()
41384 .mr(6)
41385 .nr(8)
41386 .kr(1)
41387 .sr(4)
41388 .m(6)
41389 .n(n)
41390 .k(k)
41391 .ks(3)
41392 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41393 }
41394 }
41395 }
41396
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)41397 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
41398 for (size_t k = 1; k <= 20; k += 5) {
41399 for (uint32_t n = 1; n <= 8; n++) {
41400 for (uint32_t m = 1; m <= 6; m++) {
41401 GemmMicrokernelTester()
41402 .mr(6)
41403 .nr(8)
41404 .kr(1)
41405 .sr(4)
41406 .m(m)
41407 .n(n)
41408 .k(k)
41409 .cm_stride(11)
41410 .iterations(1)
41411 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41412 }
41413 }
41414 }
41415 }
41416
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,a_offset)41417 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
41418 for (size_t k = 1; k <= 20; k += 5) {
41419 GemmMicrokernelTester()
41420 .mr(6)
41421 .nr(8)
41422 .kr(1)
41423 .sr(4)
41424 .m(6)
41425 .n(8)
41426 .k(k)
41427 .ks(3)
41428 .a_offset(127)
41429 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41430 }
41431 }
41432
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,zero)41433 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, zero) {
41434 for (size_t k = 1; k <= 20; k += 5) {
41435 for (uint32_t mz = 0; mz < 6; mz++) {
41436 GemmMicrokernelTester()
41437 .mr(6)
41438 .nr(8)
41439 .kr(1)
41440 .sr(4)
41441 .m(6)
41442 .n(8)
41443 .k(k)
41444 .ks(3)
41445 .a_offset(127)
41446 .zero_index(mz)
41447 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41448 }
41449 }
41450 }
41451
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,qmin)41452 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, qmin) {
41453 GemmMicrokernelTester()
41454 .mr(6)
41455 .nr(8)
41456 .kr(1)
41457 .sr(4)
41458 .m(6)
41459 .n(8)
41460 .k(4)
41461 .qmin(128)
41462 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41463 }
41464
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,qmax)41465 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, qmax) {
41466 GemmMicrokernelTester()
41467 .mr(6)
41468 .nr(8)
41469 .kr(1)
41470 .sr(4)
41471 .m(6)
41472 .n(8)
41473 .k(4)
41474 .qmax(128)
41475 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41476 }
41477
TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm)41478 TEST(F32_IGEMM_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
41479 GemmMicrokernelTester()
41480 .mr(6)
41481 .nr(8)
41482 .kr(1)
41483 .sr(4)
41484 .m(6)
41485 .n(8)
41486 .k(4)
41487 .cm_stride(11)
41488 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
41489 }
41490 #endif // XNN_ARCH_WASMRELAXEDSIMD
41491
41492
41493 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_2X4__WASM,k_eq_1)41494 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1) {
41495 GemmMicrokernelTester()
41496 .mr(2)
41497 .nr(4)
41498 .kr(1)
41499 .sr(1)
41500 .m(2)
41501 .n(4)
41502 .k(1)
41503 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41504 }
41505
TEST(F32_IGEMM_MINMAX_2X4__WASM,strided_cn)41506 TEST(F32_IGEMM_MINMAX_2X4__WASM, strided_cn) {
41507 GemmMicrokernelTester()
41508 .mr(2)
41509 .nr(4)
41510 .kr(1)
41511 .sr(1)
41512 .m(2)
41513 .n(4)
41514 .k(1)
41515 .cn_stride(7)
41516 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41517 }
41518
TEST(F32_IGEMM_MINMAX_2X4__WASM,k_eq_1_subtile)41519 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1_subtile) {
41520 for (uint32_t n = 1; n <= 4; n++) {
41521 for (uint32_t m = 1; m <= 2; m++) {
41522 GemmMicrokernelTester()
41523 .mr(2)
41524 .nr(4)
41525 .kr(1)
41526 .sr(1)
41527 .m(m)
41528 .n(n)
41529 .k(1)
41530 .iterations(1)
41531 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41532 }
41533 }
41534 }
41535
TEST(F32_IGEMM_MINMAX_2X4__WASM,k_eq_1_subtile_m)41536 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1_subtile_m) {
41537 for (uint32_t m = 1; m <= 2; m++) {
41538 GemmMicrokernelTester()
41539 .mr(2)
41540 .nr(4)
41541 .kr(1)
41542 .sr(1)
41543 .m(m)
41544 .n(4)
41545 .k(1)
41546 .iterations(1)
41547 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41548 }
41549 }
41550
TEST(F32_IGEMM_MINMAX_2X4__WASM,k_eq_1_subtile_n)41551 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1_subtile_n) {
41552 for (uint32_t n = 1; n <= 4; n++) {
41553 GemmMicrokernelTester()
41554 .mr(2)
41555 .nr(4)
41556 .kr(1)
41557 .sr(1)
41558 .m(2)
41559 .n(n)
41560 .k(1)
41561 .iterations(1)
41562 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41563 }
41564 }
41565
TEST(F32_IGEMM_MINMAX_2X4__WASM,k_gt_1)41566 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_gt_1) {
41567 for (size_t k = 2; k < 10; k++) {
41568 GemmMicrokernelTester()
41569 .mr(2)
41570 .nr(4)
41571 .kr(1)
41572 .sr(1)
41573 .m(2)
41574 .n(4)
41575 .k(k)
41576 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41577 }
41578 }
41579
TEST(F32_IGEMM_MINMAX_2X4__WASM,k_gt_1_subtile)41580 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_gt_1_subtile) {
41581 for (size_t k = 2; k < 10; k++) {
41582 for (uint32_t n = 1; n <= 4; n++) {
41583 for (uint32_t m = 1; m <= 2; m++) {
41584 GemmMicrokernelTester()
41585 .mr(2)
41586 .nr(4)
41587 .kr(1)
41588 .sr(1)
41589 .m(m)
41590 .n(n)
41591 .k(k)
41592 .iterations(1)
41593 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41594 }
41595 }
41596 }
41597 }
41598
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_gt_4)41599 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4) {
41600 for (uint32_t n = 5; n < 8; n++) {
41601 for (size_t k = 1; k <= 5; k += 2) {
41602 GemmMicrokernelTester()
41603 .mr(2)
41604 .nr(4)
41605 .kr(1)
41606 .sr(1)
41607 .m(2)
41608 .n(n)
41609 .k(k)
41610 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41611 }
41612 }
41613 }
41614
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_gt_4_strided_cn)41615 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4_strided_cn) {
41616 for (uint32_t n = 5; n < 8; n++) {
41617 for (size_t k = 1; k <= 5; k += 2) {
41618 GemmMicrokernelTester()
41619 .mr(2)
41620 .nr(4)
41621 .kr(1)
41622 .sr(1)
41623 .m(2)
41624 .n(n)
41625 .k(k)
41626 .cn_stride(7)
41627 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41628 }
41629 }
41630 }
41631
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_gt_4_subtile)41632 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4_subtile) {
41633 for (uint32_t n = 5; n < 8; n++) {
41634 for (size_t k = 1; k <= 5; k += 2) {
41635 for (uint32_t m = 1; m <= 2; m++) {
41636 GemmMicrokernelTester()
41637 .mr(2)
41638 .nr(4)
41639 .kr(1)
41640 .sr(1)
41641 .m(m)
41642 .n(n)
41643 .k(k)
41644 .iterations(1)
41645 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41646 }
41647 }
41648 }
41649 }
41650
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_div_4)41651 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4) {
41652 for (uint32_t n = 8; n <= 12; n += 4) {
41653 for (size_t k = 1; k <= 5; k += 2) {
41654 GemmMicrokernelTester()
41655 .mr(2)
41656 .nr(4)
41657 .kr(1)
41658 .sr(1)
41659 .m(2)
41660 .n(n)
41661 .k(k)
41662 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41663 }
41664 }
41665 }
41666
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_div_4_strided_cn)41667 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4_strided_cn) {
41668 for (uint32_t n = 8; n <= 12; n += 4) {
41669 for (size_t k = 1; k <= 5; k += 2) {
41670 GemmMicrokernelTester()
41671 .mr(2)
41672 .nr(4)
41673 .kr(1)
41674 .sr(1)
41675 .m(2)
41676 .n(n)
41677 .k(k)
41678 .cn_stride(7)
41679 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41680 }
41681 }
41682 }
41683
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_div_4_subtile)41684 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4_subtile) {
41685 for (uint32_t n = 8; n <= 12; n += 4) {
41686 for (size_t k = 1; k <= 5; k += 2) {
41687 for (uint32_t m = 1; m <= 2; m++) {
41688 GemmMicrokernelTester()
41689 .mr(2)
41690 .nr(4)
41691 .kr(1)
41692 .sr(1)
41693 .m(m)
41694 .n(n)
41695 .k(k)
41696 .iterations(1)
41697 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41698 }
41699 }
41700 }
41701 }
41702
TEST(F32_IGEMM_MINMAX_2X4__WASM,small_kernel)41703 TEST(F32_IGEMM_MINMAX_2X4__WASM, small_kernel) {
41704 for (size_t k = 1; k <= 5; k += 2) {
41705 GemmMicrokernelTester()
41706 .mr(2)
41707 .nr(4)
41708 .kr(1)
41709 .sr(1)
41710 .m(2)
41711 .n(4)
41712 .k(k)
41713 .ks(3)
41714 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41715 }
41716 }
41717
TEST(F32_IGEMM_MINMAX_2X4__WASM,small_kernel_subtile)41718 TEST(F32_IGEMM_MINMAX_2X4__WASM, small_kernel_subtile) {
41719 for (size_t k = 1; k <= 5; k += 2) {
41720 for (uint32_t n = 1; n <= 4; n++) {
41721 for (uint32_t m = 1; m <= 2; m++) {
41722 GemmMicrokernelTester()
41723 .mr(2)
41724 .nr(4)
41725 .kr(1)
41726 .sr(1)
41727 .m(m)
41728 .n(n)
41729 .k(k)
41730 .ks(3)
41731 .iterations(1)
41732 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41733 }
41734 }
41735 }
41736 }
41737
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_gt_4_small_kernel)41738 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4_small_kernel) {
41739 for (uint32_t n = 5; n < 8; n++) {
41740 for (size_t k = 1; k <= 5; k += 2) {
41741 GemmMicrokernelTester()
41742 .mr(2)
41743 .nr(4)
41744 .kr(1)
41745 .sr(1)
41746 .m(2)
41747 .n(n)
41748 .k(k)
41749 .ks(3)
41750 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41751 }
41752 }
41753 }
41754
TEST(F32_IGEMM_MINMAX_2X4__WASM,n_div_4_small_kernel)41755 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4_small_kernel) {
41756 for (uint32_t n = 8; n <= 12; n += 4) {
41757 for (size_t k = 1; k <= 5; k += 2) {
41758 GemmMicrokernelTester()
41759 .mr(2)
41760 .nr(4)
41761 .kr(1)
41762 .sr(1)
41763 .m(2)
41764 .n(n)
41765 .k(k)
41766 .ks(3)
41767 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41768 }
41769 }
41770 }
41771
TEST(F32_IGEMM_MINMAX_2X4__WASM,strided_cm_subtile)41772 TEST(F32_IGEMM_MINMAX_2X4__WASM, strided_cm_subtile) {
41773 for (size_t k = 1; k <= 5; k += 2) {
41774 for (uint32_t n = 1; n <= 4; n++) {
41775 for (uint32_t m = 1; m <= 2; m++) {
41776 GemmMicrokernelTester()
41777 .mr(2)
41778 .nr(4)
41779 .kr(1)
41780 .sr(1)
41781 .m(m)
41782 .n(n)
41783 .k(k)
41784 .cm_stride(7)
41785 .iterations(1)
41786 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41787 }
41788 }
41789 }
41790 }
41791
TEST(F32_IGEMM_MINMAX_2X4__WASM,a_offset)41792 TEST(F32_IGEMM_MINMAX_2X4__WASM, a_offset) {
41793 for (size_t k = 1; k <= 5; k += 2) {
41794 GemmMicrokernelTester()
41795 .mr(2)
41796 .nr(4)
41797 .kr(1)
41798 .sr(1)
41799 .m(2)
41800 .n(4)
41801 .k(k)
41802 .ks(3)
41803 .a_offset(13)
41804 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41805 }
41806 }
41807
TEST(F32_IGEMM_MINMAX_2X4__WASM,zero)41808 TEST(F32_IGEMM_MINMAX_2X4__WASM, zero) {
41809 for (size_t k = 1; k <= 5; k += 2) {
41810 for (uint32_t mz = 0; mz < 2; mz++) {
41811 GemmMicrokernelTester()
41812 .mr(2)
41813 .nr(4)
41814 .kr(1)
41815 .sr(1)
41816 .m(2)
41817 .n(4)
41818 .k(k)
41819 .ks(3)
41820 .a_offset(13)
41821 .zero_index(mz)
41822 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41823 }
41824 }
41825 }
41826
TEST(F32_IGEMM_MINMAX_2X4__WASM,qmin)41827 TEST(F32_IGEMM_MINMAX_2X4__WASM, qmin) {
41828 GemmMicrokernelTester()
41829 .mr(2)
41830 .nr(4)
41831 .kr(1)
41832 .sr(1)
41833 .m(2)
41834 .n(4)
41835 .k(1)
41836 .qmin(128)
41837 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41838 }
41839
TEST(F32_IGEMM_MINMAX_2X4__WASM,qmax)41840 TEST(F32_IGEMM_MINMAX_2X4__WASM, qmax) {
41841 GemmMicrokernelTester()
41842 .mr(2)
41843 .nr(4)
41844 .kr(1)
41845 .sr(1)
41846 .m(2)
41847 .n(4)
41848 .k(1)
41849 .qmax(128)
41850 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41851 }
41852
TEST(F32_IGEMM_MINMAX_2X4__WASM,strided_cm)41853 TEST(F32_IGEMM_MINMAX_2X4__WASM, strided_cm) {
41854 GemmMicrokernelTester()
41855 .mr(2)
41856 .nr(4)
41857 .kr(1)
41858 .sr(1)
41859 .m(2)
41860 .n(4)
41861 .k(1)
41862 .cm_stride(7)
41863 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
41864 }
41865 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41866
41867
41868 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X2__WASM,k_eq_1)41869 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1) {
41870 GemmMicrokernelTester()
41871 .mr(4)
41872 .nr(2)
41873 .kr(1)
41874 .sr(1)
41875 .m(4)
41876 .n(2)
41877 .k(1)
41878 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41879 }
41880
TEST(F32_IGEMM_MINMAX_4X2__WASM,strided_cn)41881 TEST(F32_IGEMM_MINMAX_4X2__WASM, strided_cn) {
41882 GemmMicrokernelTester()
41883 .mr(4)
41884 .nr(2)
41885 .kr(1)
41886 .sr(1)
41887 .m(4)
41888 .n(2)
41889 .k(1)
41890 .cn_stride(5)
41891 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41892 }
41893
TEST(F32_IGEMM_MINMAX_4X2__WASM,k_eq_1_subtile)41894 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1_subtile) {
41895 for (uint32_t n = 1; n <= 2; n++) {
41896 for (uint32_t m = 1; m <= 4; m++) {
41897 GemmMicrokernelTester()
41898 .mr(4)
41899 .nr(2)
41900 .kr(1)
41901 .sr(1)
41902 .m(m)
41903 .n(n)
41904 .k(1)
41905 .iterations(1)
41906 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41907 }
41908 }
41909 }
41910
TEST(F32_IGEMM_MINMAX_4X2__WASM,k_eq_1_subtile_m)41911 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1_subtile_m) {
41912 for (uint32_t m = 1; m <= 4; m++) {
41913 GemmMicrokernelTester()
41914 .mr(4)
41915 .nr(2)
41916 .kr(1)
41917 .sr(1)
41918 .m(m)
41919 .n(2)
41920 .k(1)
41921 .iterations(1)
41922 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41923 }
41924 }
41925
TEST(F32_IGEMM_MINMAX_4X2__WASM,k_eq_1_subtile_n)41926 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1_subtile_n) {
41927 for (uint32_t n = 1; n <= 2; n++) {
41928 GemmMicrokernelTester()
41929 .mr(4)
41930 .nr(2)
41931 .kr(1)
41932 .sr(1)
41933 .m(4)
41934 .n(n)
41935 .k(1)
41936 .iterations(1)
41937 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41938 }
41939 }
41940
TEST(F32_IGEMM_MINMAX_4X2__WASM,k_gt_1)41941 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_gt_1) {
41942 for (size_t k = 2; k < 10; k++) {
41943 GemmMicrokernelTester()
41944 .mr(4)
41945 .nr(2)
41946 .kr(1)
41947 .sr(1)
41948 .m(4)
41949 .n(2)
41950 .k(k)
41951 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41952 }
41953 }
41954
TEST(F32_IGEMM_MINMAX_4X2__WASM,k_gt_1_subtile)41955 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_gt_1_subtile) {
41956 for (size_t k = 2; k < 10; k++) {
41957 for (uint32_t n = 1; n <= 2; n++) {
41958 for (uint32_t m = 1; m <= 4; m++) {
41959 GemmMicrokernelTester()
41960 .mr(4)
41961 .nr(2)
41962 .kr(1)
41963 .sr(1)
41964 .m(m)
41965 .n(n)
41966 .k(k)
41967 .iterations(1)
41968 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41969 }
41970 }
41971 }
41972 }
41973
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_gt_2)41974 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2) {
41975 for (uint32_t n = 3; n < 4; n++) {
41976 for (size_t k = 1; k <= 5; k += 2) {
41977 GemmMicrokernelTester()
41978 .mr(4)
41979 .nr(2)
41980 .kr(1)
41981 .sr(1)
41982 .m(4)
41983 .n(n)
41984 .k(k)
41985 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
41986 }
41987 }
41988 }
41989
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_gt_2_strided_cn)41990 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2_strided_cn) {
41991 for (uint32_t n = 3; n < 4; n++) {
41992 for (size_t k = 1; k <= 5; k += 2) {
41993 GemmMicrokernelTester()
41994 .mr(4)
41995 .nr(2)
41996 .kr(1)
41997 .sr(1)
41998 .m(4)
41999 .n(n)
42000 .k(k)
42001 .cn_stride(5)
42002 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42003 }
42004 }
42005 }
42006
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_gt_2_subtile)42007 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2_subtile) {
42008 for (uint32_t n = 3; n < 4; n++) {
42009 for (size_t k = 1; k <= 5; k += 2) {
42010 for (uint32_t m = 1; m <= 4; m++) {
42011 GemmMicrokernelTester()
42012 .mr(4)
42013 .nr(2)
42014 .kr(1)
42015 .sr(1)
42016 .m(m)
42017 .n(n)
42018 .k(k)
42019 .iterations(1)
42020 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42021 }
42022 }
42023 }
42024 }
42025
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_div_2)42026 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2) {
42027 for (uint32_t n = 4; n <= 6; n += 2) {
42028 for (size_t k = 1; k <= 5; k += 2) {
42029 GemmMicrokernelTester()
42030 .mr(4)
42031 .nr(2)
42032 .kr(1)
42033 .sr(1)
42034 .m(4)
42035 .n(n)
42036 .k(k)
42037 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42038 }
42039 }
42040 }
42041
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_div_2_strided_cn)42042 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2_strided_cn) {
42043 for (uint32_t n = 4; n <= 6; n += 2) {
42044 for (size_t k = 1; k <= 5; k += 2) {
42045 GemmMicrokernelTester()
42046 .mr(4)
42047 .nr(2)
42048 .kr(1)
42049 .sr(1)
42050 .m(4)
42051 .n(n)
42052 .k(k)
42053 .cn_stride(5)
42054 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42055 }
42056 }
42057 }
42058
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_div_2_subtile)42059 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2_subtile) {
42060 for (uint32_t n = 4; n <= 6; n += 2) {
42061 for (size_t k = 1; k <= 5; k += 2) {
42062 for (uint32_t m = 1; m <= 4; m++) {
42063 GemmMicrokernelTester()
42064 .mr(4)
42065 .nr(2)
42066 .kr(1)
42067 .sr(1)
42068 .m(m)
42069 .n(n)
42070 .k(k)
42071 .iterations(1)
42072 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42073 }
42074 }
42075 }
42076 }
42077
TEST(F32_IGEMM_MINMAX_4X2__WASM,small_kernel)42078 TEST(F32_IGEMM_MINMAX_4X2__WASM, small_kernel) {
42079 for (size_t k = 1; k <= 5; k += 2) {
42080 GemmMicrokernelTester()
42081 .mr(4)
42082 .nr(2)
42083 .kr(1)
42084 .sr(1)
42085 .m(4)
42086 .n(2)
42087 .k(k)
42088 .ks(3)
42089 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42090 }
42091 }
42092
TEST(F32_IGEMM_MINMAX_4X2__WASM,small_kernel_subtile)42093 TEST(F32_IGEMM_MINMAX_4X2__WASM, small_kernel_subtile) {
42094 for (size_t k = 1; k <= 5; k += 2) {
42095 for (uint32_t n = 1; n <= 2; n++) {
42096 for (uint32_t m = 1; m <= 4; m++) {
42097 GemmMicrokernelTester()
42098 .mr(4)
42099 .nr(2)
42100 .kr(1)
42101 .sr(1)
42102 .m(m)
42103 .n(n)
42104 .k(k)
42105 .ks(3)
42106 .iterations(1)
42107 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42108 }
42109 }
42110 }
42111 }
42112
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_gt_2_small_kernel)42113 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2_small_kernel) {
42114 for (uint32_t n = 3; n < 4; n++) {
42115 for (size_t k = 1; k <= 5; k += 2) {
42116 GemmMicrokernelTester()
42117 .mr(4)
42118 .nr(2)
42119 .kr(1)
42120 .sr(1)
42121 .m(4)
42122 .n(n)
42123 .k(k)
42124 .ks(3)
42125 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42126 }
42127 }
42128 }
42129
TEST(F32_IGEMM_MINMAX_4X2__WASM,n_div_2_small_kernel)42130 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2_small_kernel) {
42131 for (uint32_t n = 4; n <= 6; n += 2) {
42132 for (size_t k = 1; k <= 5; k += 2) {
42133 GemmMicrokernelTester()
42134 .mr(4)
42135 .nr(2)
42136 .kr(1)
42137 .sr(1)
42138 .m(4)
42139 .n(n)
42140 .k(k)
42141 .ks(3)
42142 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42143 }
42144 }
42145 }
42146
TEST(F32_IGEMM_MINMAX_4X2__WASM,strided_cm_subtile)42147 TEST(F32_IGEMM_MINMAX_4X2__WASM, strided_cm_subtile) {
42148 for (size_t k = 1; k <= 5; k += 2) {
42149 for (uint32_t n = 1; n <= 2; n++) {
42150 for (uint32_t m = 1; m <= 4; m++) {
42151 GemmMicrokernelTester()
42152 .mr(4)
42153 .nr(2)
42154 .kr(1)
42155 .sr(1)
42156 .m(m)
42157 .n(n)
42158 .k(k)
42159 .cm_stride(5)
42160 .iterations(1)
42161 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42162 }
42163 }
42164 }
42165 }
42166
TEST(F32_IGEMM_MINMAX_4X2__WASM,a_offset)42167 TEST(F32_IGEMM_MINMAX_4X2__WASM, a_offset) {
42168 for (size_t k = 1; k <= 5; k += 2) {
42169 GemmMicrokernelTester()
42170 .mr(4)
42171 .nr(2)
42172 .kr(1)
42173 .sr(1)
42174 .m(4)
42175 .n(2)
42176 .k(k)
42177 .ks(3)
42178 .a_offset(23)
42179 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42180 }
42181 }
42182
TEST(F32_IGEMM_MINMAX_4X2__WASM,zero)42183 TEST(F32_IGEMM_MINMAX_4X2__WASM, zero) {
42184 for (size_t k = 1; k <= 5; k += 2) {
42185 for (uint32_t mz = 0; mz < 4; mz++) {
42186 GemmMicrokernelTester()
42187 .mr(4)
42188 .nr(2)
42189 .kr(1)
42190 .sr(1)
42191 .m(4)
42192 .n(2)
42193 .k(k)
42194 .ks(3)
42195 .a_offset(23)
42196 .zero_index(mz)
42197 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42198 }
42199 }
42200 }
42201
TEST(F32_IGEMM_MINMAX_4X2__WASM,qmin)42202 TEST(F32_IGEMM_MINMAX_4X2__WASM, qmin) {
42203 GemmMicrokernelTester()
42204 .mr(4)
42205 .nr(2)
42206 .kr(1)
42207 .sr(1)
42208 .m(4)
42209 .n(2)
42210 .k(1)
42211 .qmin(128)
42212 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42213 }
42214
TEST(F32_IGEMM_MINMAX_4X2__WASM,qmax)42215 TEST(F32_IGEMM_MINMAX_4X2__WASM, qmax) {
42216 GemmMicrokernelTester()
42217 .mr(4)
42218 .nr(2)
42219 .kr(1)
42220 .sr(1)
42221 .m(4)
42222 .n(2)
42223 .k(1)
42224 .qmax(128)
42225 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42226 }
42227
TEST(F32_IGEMM_MINMAX_4X2__WASM,strided_cm)42228 TEST(F32_IGEMM_MINMAX_4X2__WASM, strided_cm) {
42229 GemmMicrokernelTester()
42230 .mr(4)
42231 .nr(2)
42232 .kr(1)
42233 .sr(1)
42234 .m(4)
42235 .n(2)
42236 .k(1)
42237 .cm_stride(5)
42238 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
42239 }
42240 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42241
42242
42243 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X4__WASM,k_eq_1)42244 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1) {
42245 GemmMicrokernelTester()
42246 .mr(4)
42247 .nr(4)
42248 .kr(1)
42249 .sr(1)
42250 .m(4)
42251 .n(4)
42252 .k(1)
42253 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42254 }
42255
TEST(F32_IGEMM_MINMAX_4X4__WASM,strided_cn)42256 TEST(F32_IGEMM_MINMAX_4X4__WASM, strided_cn) {
42257 GemmMicrokernelTester()
42258 .mr(4)
42259 .nr(4)
42260 .kr(1)
42261 .sr(1)
42262 .m(4)
42263 .n(4)
42264 .k(1)
42265 .cn_stride(7)
42266 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42267 }
42268
TEST(F32_IGEMM_MINMAX_4X4__WASM,k_eq_1_subtile)42269 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1_subtile) {
42270 for (uint32_t n = 1; n <= 4; n++) {
42271 for (uint32_t m = 1; m <= 4; m++) {
42272 GemmMicrokernelTester()
42273 .mr(4)
42274 .nr(4)
42275 .kr(1)
42276 .sr(1)
42277 .m(m)
42278 .n(n)
42279 .k(1)
42280 .iterations(1)
42281 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42282 }
42283 }
42284 }
42285
TEST(F32_IGEMM_MINMAX_4X4__WASM,k_eq_1_subtile_m)42286 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1_subtile_m) {
42287 for (uint32_t m = 1; m <= 4; m++) {
42288 GemmMicrokernelTester()
42289 .mr(4)
42290 .nr(4)
42291 .kr(1)
42292 .sr(1)
42293 .m(m)
42294 .n(4)
42295 .k(1)
42296 .iterations(1)
42297 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42298 }
42299 }
42300
TEST(F32_IGEMM_MINMAX_4X4__WASM,k_eq_1_subtile_n)42301 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1_subtile_n) {
42302 for (uint32_t n = 1; n <= 4; n++) {
42303 GemmMicrokernelTester()
42304 .mr(4)
42305 .nr(4)
42306 .kr(1)
42307 .sr(1)
42308 .m(4)
42309 .n(n)
42310 .k(1)
42311 .iterations(1)
42312 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42313 }
42314 }
42315
TEST(F32_IGEMM_MINMAX_4X4__WASM,k_gt_1)42316 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_gt_1) {
42317 for (size_t k = 2; k < 10; k++) {
42318 GemmMicrokernelTester()
42319 .mr(4)
42320 .nr(4)
42321 .kr(1)
42322 .sr(1)
42323 .m(4)
42324 .n(4)
42325 .k(k)
42326 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42327 }
42328 }
42329
TEST(F32_IGEMM_MINMAX_4X4__WASM,k_gt_1_subtile)42330 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_gt_1_subtile) {
42331 for (size_t k = 2; k < 10; k++) {
42332 for (uint32_t n = 1; n <= 4; n++) {
42333 for (uint32_t m = 1; m <= 4; m++) {
42334 GemmMicrokernelTester()
42335 .mr(4)
42336 .nr(4)
42337 .kr(1)
42338 .sr(1)
42339 .m(m)
42340 .n(n)
42341 .k(k)
42342 .iterations(1)
42343 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42344 }
42345 }
42346 }
42347 }
42348
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_gt_4)42349 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4) {
42350 for (uint32_t n = 5; n < 8; n++) {
42351 for (size_t k = 1; k <= 5; k += 2) {
42352 GemmMicrokernelTester()
42353 .mr(4)
42354 .nr(4)
42355 .kr(1)
42356 .sr(1)
42357 .m(4)
42358 .n(n)
42359 .k(k)
42360 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42361 }
42362 }
42363 }
42364
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_gt_4_strided_cn)42365 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4_strided_cn) {
42366 for (uint32_t n = 5; n < 8; n++) {
42367 for (size_t k = 1; k <= 5; k += 2) {
42368 GemmMicrokernelTester()
42369 .mr(4)
42370 .nr(4)
42371 .kr(1)
42372 .sr(1)
42373 .m(4)
42374 .n(n)
42375 .k(k)
42376 .cn_stride(7)
42377 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42378 }
42379 }
42380 }
42381
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_gt_4_subtile)42382 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4_subtile) {
42383 for (uint32_t n = 5; n < 8; n++) {
42384 for (size_t k = 1; k <= 5; k += 2) {
42385 for (uint32_t m = 1; m <= 4; m++) {
42386 GemmMicrokernelTester()
42387 .mr(4)
42388 .nr(4)
42389 .kr(1)
42390 .sr(1)
42391 .m(m)
42392 .n(n)
42393 .k(k)
42394 .iterations(1)
42395 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42396 }
42397 }
42398 }
42399 }
42400
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_div_4)42401 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4) {
42402 for (uint32_t n = 8; n <= 12; n += 4) {
42403 for (size_t k = 1; k <= 5; k += 2) {
42404 GemmMicrokernelTester()
42405 .mr(4)
42406 .nr(4)
42407 .kr(1)
42408 .sr(1)
42409 .m(4)
42410 .n(n)
42411 .k(k)
42412 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42413 }
42414 }
42415 }
42416
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_div_4_strided_cn)42417 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4_strided_cn) {
42418 for (uint32_t n = 8; n <= 12; n += 4) {
42419 for (size_t k = 1; k <= 5; k += 2) {
42420 GemmMicrokernelTester()
42421 .mr(4)
42422 .nr(4)
42423 .kr(1)
42424 .sr(1)
42425 .m(4)
42426 .n(n)
42427 .k(k)
42428 .cn_stride(7)
42429 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42430 }
42431 }
42432 }
42433
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_div_4_subtile)42434 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4_subtile) {
42435 for (uint32_t n = 8; n <= 12; n += 4) {
42436 for (size_t k = 1; k <= 5; k += 2) {
42437 for (uint32_t m = 1; m <= 4; m++) {
42438 GemmMicrokernelTester()
42439 .mr(4)
42440 .nr(4)
42441 .kr(1)
42442 .sr(1)
42443 .m(m)
42444 .n(n)
42445 .k(k)
42446 .iterations(1)
42447 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42448 }
42449 }
42450 }
42451 }
42452
TEST(F32_IGEMM_MINMAX_4X4__WASM,small_kernel)42453 TEST(F32_IGEMM_MINMAX_4X4__WASM, small_kernel) {
42454 for (size_t k = 1; k <= 5; k += 2) {
42455 GemmMicrokernelTester()
42456 .mr(4)
42457 .nr(4)
42458 .kr(1)
42459 .sr(1)
42460 .m(4)
42461 .n(4)
42462 .k(k)
42463 .ks(3)
42464 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42465 }
42466 }
42467
TEST(F32_IGEMM_MINMAX_4X4__WASM,small_kernel_subtile)42468 TEST(F32_IGEMM_MINMAX_4X4__WASM, small_kernel_subtile) {
42469 for (size_t k = 1; k <= 5; k += 2) {
42470 for (uint32_t n = 1; n <= 4; n++) {
42471 for (uint32_t m = 1; m <= 4; m++) {
42472 GemmMicrokernelTester()
42473 .mr(4)
42474 .nr(4)
42475 .kr(1)
42476 .sr(1)
42477 .m(m)
42478 .n(n)
42479 .k(k)
42480 .ks(3)
42481 .iterations(1)
42482 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42483 }
42484 }
42485 }
42486 }
42487
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_gt_4_small_kernel)42488 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4_small_kernel) {
42489 for (uint32_t n = 5; n < 8; n++) {
42490 for (size_t k = 1; k <= 5; k += 2) {
42491 GemmMicrokernelTester()
42492 .mr(4)
42493 .nr(4)
42494 .kr(1)
42495 .sr(1)
42496 .m(4)
42497 .n(n)
42498 .k(k)
42499 .ks(3)
42500 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42501 }
42502 }
42503 }
42504
TEST(F32_IGEMM_MINMAX_4X4__WASM,n_div_4_small_kernel)42505 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4_small_kernel) {
42506 for (uint32_t n = 8; n <= 12; n += 4) {
42507 for (size_t k = 1; k <= 5; k += 2) {
42508 GemmMicrokernelTester()
42509 .mr(4)
42510 .nr(4)
42511 .kr(1)
42512 .sr(1)
42513 .m(4)
42514 .n(n)
42515 .k(k)
42516 .ks(3)
42517 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42518 }
42519 }
42520 }
42521
TEST(F32_IGEMM_MINMAX_4X4__WASM,strided_cm_subtile)42522 TEST(F32_IGEMM_MINMAX_4X4__WASM, strided_cm_subtile) {
42523 for (size_t k = 1; k <= 5; k += 2) {
42524 for (uint32_t n = 1; n <= 4; n++) {
42525 for (uint32_t m = 1; m <= 4; m++) {
42526 GemmMicrokernelTester()
42527 .mr(4)
42528 .nr(4)
42529 .kr(1)
42530 .sr(1)
42531 .m(m)
42532 .n(n)
42533 .k(k)
42534 .cm_stride(7)
42535 .iterations(1)
42536 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42537 }
42538 }
42539 }
42540 }
42541
TEST(F32_IGEMM_MINMAX_4X4__WASM,a_offset)42542 TEST(F32_IGEMM_MINMAX_4X4__WASM, a_offset) {
42543 for (size_t k = 1; k <= 5; k += 2) {
42544 GemmMicrokernelTester()
42545 .mr(4)
42546 .nr(4)
42547 .kr(1)
42548 .sr(1)
42549 .m(4)
42550 .n(4)
42551 .k(k)
42552 .ks(3)
42553 .a_offset(23)
42554 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42555 }
42556 }
42557
TEST(F32_IGEMM_MINMAX_4X4__WASM,zero)42558 TEST(F32_IGEMM_MINMAX_4X4__WASM, zero) {
42559 for (size_t k = 1; k <= 5; k += 2) {
42560 for (uint32_t mz = 0; mz < 4; mz++) {
42561 GemmMicrokernelTester()
42562 .mr(4)
42563 .nr(4)
42564 .kr(1)
42565 .sr(1)
42566 .m(4)
42567 .n(4)
42568 .k(k)
42569 .ks(3)
42570 .a_offset(23)
42571 .zero_index(mz)
42572 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42573 }
42574 }
42575 }
42576
TEST(F32_IGEMM_MINMAX_4X4__WASM,qmin)42577 TEST(F32_IGEMM_MINMAX_4X4__WASM, qmin) {
42578 GemmMicrokernelTester()
42579 .mr(4)
42580 .nr(4)
42581 .kr(1)
42582 .sr(1)
42583 .m(4)
42584 .n(4)
42585 .k(1)
42586 .qmin(128)
42587 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42588 }
42589
TEST(F32_IGEMM_MINMAX_4X4__WASM,qmax)42590 TEST(F32_IGEMM_MINMAX_4X4__WASM, qmax) {
42591 GemmMicrokernelTester()
42592 .mr(4)
42593 .nr(4)
42594 .kr(1)
42595 .sr(1)
42596 .m(4)
42597 .n(4)
42598 .k(1)
42599 .qmax(128)
42600 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42601 }
42602
TEST(F32_IGEMM_MINMAX_4X4__WASM,strided_cm)42603 TEST(F32_IGEMM_MINMAX_4X4__WASM, strided_cm) {
42604 GemmMicrokernelTester()
42605 .mr(4)
42606 .nr(4)
42607 .kr(1)
42608 .sr(1)
42609 .m(4)
42610 .n(4)
42611 .k(1)
42612 .cm_stride(7)
42613 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
42614 }
42615 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42616
42617
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,k_eq_1)42618 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1) {
42619 GemmMicrokernelTester()
42620 .mr(1)
42621 .nr(4)
42622 .kr(1)
42623 .sr(1)
42624 .m(1)
42625 .n(4)
42626 .k(1)
42627 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42628 }
42629
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,strided_cn)42630 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, strided_cn) {
42631 GemmMicrokernelTester()
42632 .mr(1)
42633 .nr(4)
42634 .kr(1)
42635 .sr(1)
42636 .m(1)
42637 .n(4)
42638 .k(1)
42639 .cn_stride(7)
42640 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42641 }
42642
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,k_eq_1_subtile)42643 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile) {
42644 for (uint32_t n = 1; n <= 4; n++) {
42645 for (uint32_t m = 1; m <= 1; m++) {
42646 GemmMicrokernelTester()
42647 .mr(1)
42648 .nr(4)
42649 .kr(1)
42650 .sr(1)
42651 .m(m)
42652 .n(n)
42653 .k(1)
42654 .iterations(1)
42655 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42656 }
42657 }
42658 }
42659
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,k_eq_1_subtile_m)42660 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_m) {
42661 for (uint32_t m = 1; m <= 1; m++) {
42662 GemmMicrokernelTester()
42663 .mr(1)
42664 .nr(4)
42665 .kr(1)
42666 .sr(1)
42667 .m(m)
42668 .n(4)
42669 .k(1)
42670 .iterations(1)
42671 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42672 }
42673 }
42674
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,k_eq_1_subtile_n)42675 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_n) {
42676 for (uint32_t n = 1; n <= 4; n++) {
42677 GemmMicrokernelTester()
42678 .mr(1)
42679 .nr(4)
42680 .kr(1)
42681 .sr(1)
42682 .m(1)
42683 .n(n)
42684 .k(1)
42685 .iterations(1)
42686 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42687 }
42688 }
42689
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,k_gt_1)42690 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_gt_1) {
42691 for (size_t k = 2; k < 10; k++) {
42692 GemmMicrokernelTester()
42693 .mr(1)
42694 .nr(4)
42695 .kr(1)
42696 .sr(1)
42697 .m(1)
42698 .n(4)
42699 .k(k)
42700 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42701 }
42702 }
42703
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,k_gt_1_subtile)42704 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_gt_1_subtile) {
42705 for (size_t k = 2; k < 10; k++) {
42706 for (uint32_t n = 1; n <= 4; n++) {
42707 for (uint32_t m = 1; m <= 1; m++) {
42708 GemmMicrokernelTester()
42709 .mr(1)
42710 .nr(4)
42711 .kr(1)
42712 .sr(1)
42713 .m(m)
42714 .n(n)
42715 .k(k)
42716 .iterations(1)
42717 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42718 }
42719 }
42720 }
42721 }
42722
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_gt_4)42723 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4) {
42724 for (uint32_t n = 5; n < 8; n++) {
42725 for (size_t k = 1; k <= 5; k += 2) {
42726 GemmMicrokernelTester()
42727 .mr(1)
42728 .nr(4)
42729 .kr(1)
42730 .sr(1)
42731 .m(1)
42732 .n(n)
42733 .k(k)
42734 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42735 }
42736 }
42737 }
42738
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_gt_4_strided_cn)42739 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_strided_cn) {
42740 for (uint32_t n = 5; n < 8; n++) {
42741 for (size_t k = 1; k <= 5; k += 2) {
42742 GemmMicrokernelTester()
42743 .mr(1)
42744 .nr(4)
42745 .kr(1)
42746 .sr(1)
42747 .m(1)
42748 .n(n)
42749 .k(k)
42750 .cn_stride(7)
42751 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42752 }
42753 }
42754 }
42755
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_gt_4_subtile)42756 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_subtile) {
42757 for (uint32_t n = 5; n < 8; n++) {
42758 for (size_t k = 1; k <= 5; k += 2) {
42759 for (uint32_t m = 1; m <= 1; m++) {
42760 GemmMicrokernelTester()
42761 .mr(1)
42762 .nr(4)
42763 .kr(1)
42764 .sr(1)
42765 .m(m)
42766 .n(n)
42767 .k(k)
42768 .iterations(1)
42769 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42770 }
42771 }
42772 }
42773 }
42774
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_div_4)42775 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4) {
42776 for (uint32_t n = 8; n <= 12; n += 4) {
42777 for (size_t k = 1; k <= 5; k += 2) {
42778 GemmMicrokernelTester()
42779 .mr(1)
42780 .nr(4)
42781 .kr(1)
42782 .sr(1)
42783 .m(1)
42784 .n(n)
42785 .k(k)
42786 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42787 }
42788 }
42789 }
42790
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_div_4_strided_cn)42791 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4_strided_cn) {
42792 for (uint32_t n = 8; n <= 12; n += 4) {
42793 for (size_t k = 1; k <= 5; k += 2) {
42794 GemmMicrokernelTester()
42795 .mr(1)
42796 .nr(4)
42797 .kr(1)
42798 .sr(1)
42799 .m(1)
42800 .n(n)
42801 .k(k)
42802 .cn_stride(7)
42803 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42804 }
42805 }
42806 }
42807
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_div_4_subtile)42808 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4_subtile) {
42809 for (uint32_t n = 8; n <= 12; n += 4) {
42810 for (size_t k = 1; k <= 5; k += 2) {
42811 for (uint32_t m = 1; m <= 1; m++) {
42812 GemmMicrokernelTester()
42813 .mr(1)
42814 .nr(4)
42815 .kr(1)
42816 .sr(1)
42817 .m(m)
42818 .n(n)
42819 .k(k)
42820 .iterations(1)
42821 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42822 }
42823 }
42824 }
42825 }
42826
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,small_kernel)42827 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, small_kernel) {
42828 for (size_t k = 1; k <= 5; k += 2) {
42829 GemmMicrokernelTester()
42830 .mr(1)
42831 .nr(4)
42832 .kr(1)
42833 .sr(1)
42834 .m(1)
42835 .n(4)
42836 .k(k)
42837 .ks(3)
42838 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42839 }
42840 }
42841
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,small_kernel_subtile)42842 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, small_kernel_subtile) {
42843 for (size_t k = 1; k <= 5; k += 2) {
42844 for (uint32_t n = 1; n <= 4; n++) {
42845 for (uint32_t m = 1; m <= 1; m++) {
42846 GemmMicrokernelTester()
42847 .mr(1)
42848 .nr(4)
42849 .kr(1)
42850 .sr(1)
42851 .m(m)
42852 .n(n)
42853 .k(k)
42854 .ks(3)
42855 .iterations(1)
42856 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42857 }
42858 }
42859 }
42860 }
42861
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_gt_4_small_kernel)42862 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_small_kernel) {
42863 for (uint32_t n = 5; n < 8; n++) {
42864 for (size_t k = 1; k <= 5; k += 2) {
42865 GemmMicrokernelTester()
42866 .mr(1)
42867 .nr(4)
42868 .kr(1)
42869 .sr(1)
42870 .m(1)
42871 .n(n)
42872 .k(k)
42873 .ks(3)
42874 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42875 }
42876 }
42877 }
42878
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,n_div_4_small_kernel)42879 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4_small_kernel) {
42880 for (uint32_t n = 8; n <= 12; n += 4) {
42881 for (size_t k = 1; k <= 5; k += 2) {
42882 GemmMicrokernelTester()
42883 .mr(1)
42884 .nr(4)
42885 .kr(1)
42886 .sr(1)
42887 .m(1)
42888 .n(n)
42889 .k(k)
42890 .ks(3)
42891 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42892 }
42893 }
42894 }
42895
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,strided_cm_subtile)42896 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, strided_cm_subtile) {
42897 for (size_t k = 1; k <= 5; k += 2) {
42898 for (uint32_t n = 1; n <= 4; n++) {
42899 for (uint32_t m = 1; m <= 1; m++) {
42900 GemmMicrokernelTester()
42901 .mr(1)
42902 .nr(4)
42903 .kr(1)
42904 .sr(1)
42905 .m(m)
42906 .n(n)
42907 .k(k)
42908 .cm_stride(7)
42909 .iterations(1)
42910 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42911 }
42912 }
42913 }
42914 }
42915
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,a_offset)42916 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, a_offset) {
42917 for (size_t k = 1; k <= 5; k += 2) {
42918 GemmMicrokernelTester()
42919 .mr(1)
42920 .nr(4)
42921 .kr(1)
42922 .sr(1)
42923 .m(1)
42924 .n(4)
42925 .k(k)
42926 .ks(3)
42927 .a_offset(7)
42928 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42929 }
42930 }
42931
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,zero)42932 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, zero) {
42933 for (size_t k = 1; k <= 5; k += 2) {
42934 for (uint32_t mz = 0; mz < 1; mz++) {
42935 GemmMicrokernelTester()
42936 .mr(1)
42937 .nr(4)
42938 .kr(1)
42939 .sr(1)
42940 .m(1)
42941 .n(4)
42942 .k(k)
42943 .ks(3)
42944 .a_offset(7)
42945 .zero_index(mz)
42946 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42947 }
42948 }
42949 }
42950
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,qmin)42951 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, qmin) {
42952 GemmMicrokernelTester()
42953 .mr(1)
42954 .nr(4)
42955 .kr(1)
42956 .sr(1)
42957 .m(1)
42958 .n(4)
42959 .k(1)
42960 .qmin(128)
42961 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42962 }
42963
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,qmax)42964 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, qmax) {
42965 GemmMicrokernelTester()
42966 .mr(1)
42967 .nr(4)
42968 .kr(1)
42969 .sr(1)
42970 .m(1)
42971 .n(4)
42972 .k(1)
42973 .qmax(128)
42974 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42975 }
42976
TEST(F32_IGEMM_MINMAX_1X4__SCALAR,strided_cm)42977 TEST(F32_IGEMM_MINMAX_1X4__SCALAR, strided_cm) {
42978 GemmMicrokernelTester()
42979 .mr(1)
42980 .nr(4)
42981 .kr(1)
42982 .sr(1)
42983 .m(1)
42984 .n(4)
42985 .k(1)
42986 .cm_stride(7)
42987 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
42988 }
42989
42990
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,k_eq_1)42991 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1) {
42992 GemmMicrokernelTester()
42993 .mr(4)
42994 .nr(2)
42995 .kr(1)
42996 .sr(1)
42997 .m(4)
42998 .n(2)
42999 .k(1)
43000 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43001 }
43002
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,strided_cn)43003 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, strided_cn) {
43004 GemmMicrokernelTester()
43005 .mr(4)
43006 .nr(2)
43007 .kr(1)
43008 .sr(1)
43009 .m(4)
43010 .n(2)
43011 .k(1)
43012 .cn_stride(5)
43013 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43014 }
43015
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,k_eq_1_subtile)43016 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
43017 for (uint32_t n = 1; n <= 2; n++) {
43018 for (uint32_t m = 1; m <= 4; m++) {
43019 GemmMicrokernelTester()
43020 .mr(4)
43021 .nr(2)
43022 .kr(1)
43023 .sr(1)
43024 .m(m)
43025 .n(n)
43026 .k(1)
43027 .iterations(1)
43028 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43029 }
43030 }
43031 }
43032
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,k_eq_1_subtile_m)43033 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
43034 for (uint32_t m = 1; m <= 4; m++) {
43035 GemmMicrokernelTester()
43036 .mr(4)
43037 .nr(2)
43038 .kr(1)
43039 .sr(1)
43040 .m(m)
43041 .n(2)
43042 .k(1)
43043 .iterations(1)
43044 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43045 }
43046 }
43047
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,k_eq_1_subtile_n)43048 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
43049 for (uint32_t n = 1; n <= 2; n++) {
43050 GemmMicrokernelTester()
43051 .mr(4)
43052 .nr(2)
43053 .kr(1)
43054 .sr(1)
43055 .m(4)
43056 .n(n)
43057 .k(1)
43058 .iterations(1)
43059 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43060 }
43061 }
43062
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,k_gt_1)43063 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_gt_1) {
43064 for (size_t k = 2; k < 10; k++) {
43065 GemmMicrokernelTester()
43066 .mr(4)
43067 .nr(2)
43068 .kr(1)
43069 .sr(1)
43070 .m(4)
43071 .n(2)
43072 .k(k)
43073 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43074 }
43075 }
43076
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,k_gt_1_subtile)43077 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
43078 for (size_t k = 2; k < 10; k++) {
43079 for (uint32_t n = 1; n <= 2; n++) {
43080 for (uint32_t m = 1; m <= 4; m++) {
43081 GemmMicrokernelTester()
43082 .mr(4)
43083 .nr(2)
43084 .kr(1)
43085 .sr(1)
43086 .m(m)
43087 .n(n)
43088 .k(k)
43089 .iterations(1)
43090 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43091 }
43092 }
43093 }
43094 }
43095
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_gt_2)43096 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2) {
43097 for (uint32_t n = 3; n < 4; n++) {
43098 for (size_t k = 1; k <= 5; k += 2) {
43099 GemmMicrokernelTester()
43100 .mr(4)
43101 .nr(2)
43102 .kr(1)
43103 .sr(1)
43104 .m(4)
43105 .n(n)
43106 .k(k)
43107 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43108 }
43109 }
43110 }
43111
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_gt_2_strided_cn)43112 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
43113 for (uint32_t n = 3; n < 4; n++) {
43114 for (size_t k = 1; k <= 5; k += 2) {
43115 GemmMicrokernelTester()
43116 .mr(4)
43117 .nr(2)
43118 .kr(1)
43119 .sr(1)
43120 .m(4)
43121 .n(n)
43122 .k(k)
43123 .cn_stride(5)
43124 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43125 }
43126 }
43127 }
43128
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_gt_2_subtile)43129 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
43130 for (uint32_t n = 3; n < 4; n++) {
43131 for (size_t k = 1; k <= 5; k += 2) {
43132 for (uint32_t m = 1; m <= 4; m++) {
43133 GemmMicrokernelTester()
43134 .mr(4)
43135 .nr(2)
43136 .kr(1)
43137 .sr(1)
43138 .m(m)
43139 .n(n)
43140 .k(k)
43141 .iterations(1)
43142 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43143 }
43144 }
43145 }
43146 }
43147
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_div_2)43148 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2) {
43149 for (uint32_t n = 4; n <= 6; n += 2) {
43150 for (size_t k = 1; k <= 5; k += 2) {
43151 GemmMicrokernelTester()
43152 .mr(4)
43153 .nr(2)
43154 .kr(1)
43155 .sr(1)
43156 .m(4)
43157 .n(n)
43158 .k(k)
43159 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43160 }
43161 }
43162 }
43163
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_div_2_strided_cn)43164 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
43165 for (uint32_t n = 4; n <= 6; n += 2) {
43166 for (size_t k = 1; k <= 5; k += 2) {
43167 GemmMicrokernelTester()
43168 .mr(4)
43169 .nr(2)
43170 .kr(1)
43171 .sr(1)
43172 .m(4)
43173 .n(n)
43174 .k(k)
43175 .cn_stride(5)
43176 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43177 }
43178 }
43179 }
43180
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_div_2_subtile)43181 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
43182 for (uint32_t n = 4; n <= 6; n += 2) {
43183 for (size_t k = 1; k <= 5; k += 2) {
43184 for (uint32_t m = 1; m <= 4; m++) {
43185 GemmMicrokernelTester()
43186 .mr(4)
43187 .nr(2)
43188 .kr(1)
43189 .sr(1)
43190 .m(m)
43191 .n(n)
43192 .k(k)
43193 .iterations(1)
43194 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43195 }
43196 }
43197 }
43198 }
43199
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,small_kernel)43200 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, small_kernel) {
43201 for (size_t k = 1; k <= 5; k += 2) {
43202 GemmMicrokernelTester()
43203 .mr(4)
43204 .nr(2)
43205 .kr(1)
43206 .sr(1)
43207 .m(4)
43208 .n(2)
43209 .k(k)
43210 .ks(3)
43211 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43212 }
43213 }
43214
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,small_kernel_subtile)43215 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, small_kernel_subtile) {
43216 for (size_t k = 1; k <= 5; k += 2) {
43217 for (uint32_t n = 1; n <= 2; n++) {
43218 for (uint32_t m = 1; m <= 4; m++) {
43219 GemmMicrokernelTester()
43220 .mr(4)
43221 .nr(2)
43222 .kr(1)
43223 .sr(1)
43224 .m(m)
43225 .n(n)
43226 .k(k)
43227 .ks(3)
43228 .iterations(1)
43229 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43230 }
43231 }
43232 }
43233 }
43234
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_gt_2_small_kernel)43235 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_small_kernel) {
43236 for (uint32_t n = 3; n < 4; n++) {
43237 for (size_t k = 1; k <= 5; k += 2) {
43238 GemmMicrokernelTester()
43239 .mr(4)
43240 .nr(2)
43241 .kr(1)
43242 .sr(1)
43243 .m(4)
43244 .n(n)
43245 .k(k)
43246 .ks(3)
43247 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43248 }
43249 }
43250 }
43251
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,n_div_2_small_kernel)43252 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2_small_kernel) {
43253 for (uint32_t n = 4; n <= 6; n += 2) {
43254 for (size_t k = 1; k <= 5; k += 2) {
43255 GemmMicrokernelTester()
43256 .mr(4)
43257 .nr(2)
43258 .kr(1)
43259 .sr(1)
43260 .m(4)
43261 .n(n)
43262 .k(k)
43263 .ks(3)
43264 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43265 }
43266 }
43267 }
43268
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,strided_cm_subtile)43269 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
43270 for (size_t k = 1; k <= 5; k += 2) {
43271 for (uint32_t n = 1; n <= 2; n++) {
43272 for (uint32_t m = 1; m <= 4; m++) {
43273 GemmMicrokernelTester()
43274 .mr(4)
43275 .nr(2)
43276 .kr(1)
43277 .sr(1)
43278 .m(m)
43279 .n(n)
43280 .k(k)
43281 .cm_stride(5)
43282 .iterations(1)
43283 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43284 }
43285 }
43286 }
43287 }
43288
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,a_offset)43289 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, a_offset) {
43290 for (size_t k = 1; k <= 5; k += 2) {
43291 GemmMicrokernelTester()
43292 .mr(4)
43293 .nr(2)
43294 .kr(1)
43295 .sr(1)
43296 .m(4)
43297 .n(2)
43298 .k(k)
43299 .ks(3)
43300 .a_offset(23)
43301 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43302 }
43303 }
43304
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,zero)43305 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, zero) {
43306 for (size_t k = 1; k <= 5; k += 2) {
43307 for (uint32_t mz = 0; mz < 4; mz++) {
43308 GemmMicrokernelTester()
43309 .mr(4)
43310 .nr(2)
43311 .kr(1)
43312 .sr(1)
43313 .m(4)
43314 .n(2)
43315 .k(k)
43316 .ks(3)
43317 .a_offset(23)
43318 .zero_index(mz)
43319 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43320 }
43321 }
43322 }
43323
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,qmin)43324 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, qmin) {
43325 GemmMicrokernelTester()
43326 .mr(4)
43327 .nr(2)
43328 .kr(1)
43329 .sr(1)
43330 .m(4)
43331 .n(2)
43332 .k(1)
43333 .qmin(128)
43334 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43335 }
43336
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,qmax)43337 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, qmax) {
43338 GemmMicrokernelTester()
43339 .mr(4)
43340 .nr(2)
43341 .kr(1)
43342 .sr(1)
43343 .m(4)
43344 .n(2)
43345 .k(1)
43346 .qmax(128)
43347 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43348 }
43349
TEST(F32_IGEMM_MINMAX_4X2__SCALAR,strided_cm)43350 TEST(F32_IGEMM_MINMAX_4X2__SCALAR, strided_cm) {
43351 GemmMicrokernelTester()
43352 .mr(4)
43353 .nr(2)
43354 .kr(1)
43355 .sr(1)
43356 .m(4)
43357 .n(2)
43358 .k(1)
43359 .cm_stride(5)
43360 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
43361 }
43362
43363
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,k_eq_1)43364 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1) {
43365 GemmMicrokernelTester()
43366 .mr(4)
43367 .nr(4)
43368 .kr(1)
43369 .sr(1)
43370 .m(4)
43371 .n(4)
43372 .k(1)
43373 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43374 }
43375
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,strided_cn)43376 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, strided_cn) {
43377 GemmMicrokernelTester()
43378 .mr(4)
43379 .nr(4)
43380 .kr(1)
43381 .sr(1)
43382 .m(4)
43383 .n(4)
43384 .k(1)
43385 .cn_stride(7)
43386 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43387 }
43388
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,k_eq_1_subtile)43389 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
43390 for (uint32_t n = 1; n <= 4; n++) {
43391 for (uint32_t m = 1; m <= 4; m++) {
43392 GemmMicrokernelTester()
43393 .mr(4)
43394 .nr(4)
43395 .kr(1)
43396 .sr(1)
43397 .m(m)
43398 .n(n)
43399 .k(1)
43400 .iterations(1)
43401 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43402 }
43403 }
43404 }
43405
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,k_eq_1_subtile_m)43406 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
43407 for (uint32_t m = 1; m <= 4; m++) {
43408 GemmMicrokernelTester()
43409 .mr(4)
43410 .nr(4)
43411 .kr(1)
43412 .sr(1)
43413 .m(m)
43414 .n(4)
43415 .k(1)
43416 .iterations(1)
43417 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43418 }
43419 }
43420
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,k_eq_1_subtile_n)43421 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
43422 for (uint32_t n = 1; n <= 4; n++) {
43423 GemmMicrokernelTester()
43424 .mr(4)
43425 .nr(4)
43426 .kr(1)
43427 .sr(1)
43428 .m(4)
43429 .n(n)
43430 .k(1)
43431 .iterations(1)
43432 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43433 }
43434 }
43435
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,k_gt_1)43436 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_gt_1) {
43437 for (size_t k = 2; k < 10; k++) {
43438 GemmMicrokernelTester()
43439 .mr(4)
43440 .nr(4)
43441 .kr(1)
43442 .sr(1)
43443 .m(4)
43444 .n(4)
43445 .k(k)
43446 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43447 }
43448 }
43449
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,k_gt_1_subtile)43450 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
43451 for (size_t k = 2; k < 10; k++) {
43452 for (uint32_t n = 1; n <= 4; n++) {
43453 for (uint32_t m = 1; m <= 4; m++) {
43454 GemmMicrokernelTester()
43455 .mr(4)
43456 .nr(4)
43457 .kr(1)
43458 .sr(1)
43459 .m(m)
43460 .n(n)
43461 .k(k)
43462 .iterations(1)
43463 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43464 }
43465 }
43466 }
43467 }
43468
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_gt_4)43469 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4) {
43470 for (uint32_t n = 5; n < 8; n++) {
43471 for (size_t k = 1; k <= 5; k += 2) {
43472 GemmMicrokernelTester()
43473 .mr(4)
43474 .nr(4)
43475 .kr(1)
43476 .sr(1)
43477 .m(4)
43478 .n(n)
43479 .k(k)
43480 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43481 }
43482 }
43483 }
43484
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_gt_4_strided_cn)43485 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
43486 for (uint32_t n = 5; n < 8; n++) {
43487 for (size_t k = 1; k <= 5; k += 2) {
43488 GemmMicrokernelTester()
43489 .mr(4)
43490 .nr(4)
43491 .kr(1)
43492 .sr(1)
43493 .m(4)
43494 .n(n)
43495 .k(k)
43496 .cn_stride(7)
43497 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43498 }
43499 }
43500 }
43501
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_gt_4_subtile)43502 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
43503 for (uint32_t n = 5; n < 8; n++) {
43504 for (size_t k = 1; k <= 5; k += 2) {
43505 for (uint32_t m = 1; m <= 4; m++) {
43506 GemmMicrokernelTester()
43507 .mr(4)
43508 .nr(4)
43509 .kr(1)
43510 .sr(1)
43511 .m(m)
43512 .n(n)
43513 .k(k)
43514 .iterations(1)
43515 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43516 }
43517 }
43518 }
43519 }
43520
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_div_4)43521 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4) {
43522 for (uint32_t n = 8; n <= 12; n += 4) {
43523 for (size_t k = 1; k <= 5; k += 2) {
43524 GemmMicrokernelTester()
43525 .mr(4)
43526 .nr(4)
43527 .kr(1)
43528 .sr(1)
43529 .m(4)
43530 .n(n)
43531 .k(k)
43532 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43533 }
43534 }
43535 }
43536
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_div_4_strided_cn)43537 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
43538 for (uint32_t n = 8; n <= 12; n += 4) {
43539 for (size_t k = 1; k <= 5; k += 2) {
43540 GemmMicrokernelTester()
43541 .mr(4)
43542 .nr(4)
43543 .kr(1)
43544 .sr(1)
43545 .m(4)
43546 .n(n)
43547 .k(k)
43548 .cn_stride(7)
43549 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43550 }
43551 }
43552 }
43553
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_div_4_subtile)43554 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
43555 for (uint32_t n = 8; n <= 12; n += 4) {
43556 for (size_t k = 1; k <= 5; k += 2) {
43557 for (uint32_t m = 1; m <= 4; m++) {
43558 GemmMicrokernelTester()
43559 .mr(4)
43560 .nr(4)
43561 .kr(1)
43562 .sr(1)
43563 .m(m)
43564 .n(n)
43565 .k(k)
43566 .iterations(1)
43567 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43568 }
43569 }
43570 }
43571 }
43572
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,small_kernel)43573 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, small_kernel) {
43574 for (size_t k = 1; k <= 5; k += 2) {
43575 GemmMicrokernelTester()
43576 .mr(4)
43577 .nr(4)
43578 .kr(1)
43579 .sr(1)
43580 .m(4)
43581 .n(4)
43582 .k(k)
43583 .ks(3)
43584 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43585 }
43586 }
43587
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,small_kernel_subtile)43588 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, small_kernel_subtile) {
43589 for (size_t k = 1; k <= 5; k += 2) {
43590 for (uint32_t n = 1; n <= 4; n++) {
43591 for (uint32_t m = 1; m <= 4; m++) {
43592 GemmMicrokernelTester()
43593 .mr(4)
43594 .nr(4)
43595 .kr(1)
43596 .sr(1)
43597 .m(m)
43598 .n(n)
43599 .k(k)
43600 .ks(3)
43601 .iterations(1)
43602 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43603 }
43604 }
43605 }
43606 }
43607
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_gt_4_small_kernel)43608 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_small_kernel) {
43609 for (uint32_t n = 5; n < 8; n++) {
43610 for (size_t k = 1; k <= 5; k += 2) {
43611 GemmMicrokernelTester()
43612 .mr(4)
43613 .nr(4)
43614 .kr(1)
43615 .sr(1)
43616 .m(4)
43617 .n(n)
43618 .k(k)
43619 .ks(3)
43620 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43621 }
43622 }
43623 }
43624
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,n_div_4_small_kernel)43625 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4_small_kernel) {
43626 for (uint32_t n = 8; n <= 12; n += 4) {
43627 for (size_t k = 1; k <= 5; k += 2) {
43628 GemmMicrokernelTester()
43629 .mr(4)
43630 .nr(4)
43631 .kr(1)
43632 .sr(1)
43633 .m(4)
43634 .n(n)
43635 .k(k)
43636 .ks(3)
43637 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43638 }
43639 }
43640 }
43641
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,strided_cm_subtile)43642 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
43643 for (size_t k = 1; k <= 5; k += 2) {
43644 for (uint32_t n = 1; n <= 4; n++) {
43645 for (uint32_t m = 1; m <= 4; m++) {
43646 GemmMicrokernelTester()
43647 .mr(4)
43648 .nr(4)
43649 .kr(1)
43650 .sr(1)
43651 .m(m)
43652 .n(n)
43653 .k(k)
43654 .cm_stride(7)
43655 .iterations(1)
43656 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43657 }
43658 }
43659 }
43660 }
43661
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,a_offset)43662 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, a_offset) {
43663 for (size_t k = 1; k <= 5; k += 2) {
43664 GemmMicrokernelTester()
43665 .mr(4)
43666 .nr(4)
43667 .kr(1)
43668 .sr(1)
43669 .m(4)
43670 .n(4)
43671 .k(k)
43672 .ks(3)
43673 .a_offset(23)
43674 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43675 }
43676 }
43677
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,zero)43678 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, zero) {
43679 for (size_t k = 1; k <= 5; k += 2) {
43680 for (uint32_t mz = 0; mz < 4; mz++) {
43681 GemmMicrokernelTester()
43682 .mr(4)
43683 .nr(4)
43684 .kr(1)
43685 .sr(1)
43686 .m(4)
43687 .n(4)
43688 .k(k)
43689 .ks(3)
43690 .a_offset(23)
43691 .zero_index(mz)
43692 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43693 }
43694 }
43695 }
43696
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,qmin)43697 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, qmin) {
43698 GemmMicrokernelTester()
43699 .mr(4)
43700 .nr(4)
43701 .kr(1)
43702 .sr(1)
43703 .m(4)
43704 .n(4)
43705 .k(1)
43706 .qmin(128)
43707 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43708 }
43709
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,qmax)43710 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, qmax) {
43711 GemmMicrokernelTester()
43712 .mr(4)
43713 .nr(4)
43714 .kr(1)
43715 .sr(1)
43716 .m(4)
43717 .n(4)
43718 .k(1)
43719 .qmax(128)
43720 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43721 }
43722
TEST(F32_IGEMM_MINMAX_4X4__SCALAR,strided_cm)43723 TEST(F32_IGEMM_MINMAX_4X4__SCALAR, strided_cm) {
43724 GemmMicrokernelTester()
43725 .mr(4)
43726 .nr(4)
43727 .kr(1)
43728 .sr(1)
43729 .m(4)
43730 .n(4)
43731 .k(1)
43732 .cm_stride(7)
43733 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
43734 }
43735
43736
43737 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4)43738 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4) {
43739 TEST_REQUIRES_ARM_NEON;
43740 GemmMicrokernelTester()
43741 .mr(4)
43742 .nr(8)
43743 .kr(1)
43744 .sr(1)
43745 .m(4)
43746 .n(8)
43747 .k(4)
43748 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43749 }
43750
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,strided_cn)43751 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, strided_cn) {
43752 TEST_REQUIRES_ARM_NEON;
43753 GemmMicrokernelTester()
43754 .mr(4)
43755 .nr(8)
43756 .kr(1)
43757 .sr(1)
43758 .m(4)
43759 .n(8)
43760 .k(4)
43761 .cn_stride(11)
43762 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43763 }
43764
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4_subtile)43765 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile) {
43766 TEST_REQUIRES_ARM_NEON;
43767 for (uint32_t n = 1; n <= 8; n++) {
43768 for (uint32_t m = 1; m <= 4; m++) {
43769 GemmMicrokernelTester()
43770 .mr(4)
43771 .nr(8)
43772 .kr(1)
43773 .sr(1)
43774 .m(m)
43775 .n(n)
43776 .k(4)
43777 .iterations(1)
43778 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43779 }
43780 }
43781 }
43782
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4_subtile_m)43783 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_m) {
43784 TEST_REQUIRES_ARM_NEON;
43785 for (uint32_t m = 1; m <= 4; m++) {
43786 GemmMicrokernelTester()
43787 .mr(4)
43788 .nr(8)
43789 .kr(1)
43790 .sr(1)
43791 .m(m)
43792 .n(8)
43793 .k(4)
43794 .iterations(1)
43795 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43796 }
43797 }
43798
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_eq_4_subtile_n)43799 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_n) {
43800 TEST_REQUIRES_ARM_NEON;
43801 for (uint32_t n = 1; n <= 8; n++) {
43802 GemmMicrokernelTester()
43803 .mr(4)
43804 .nr(8)
43805 .kr(1)
43806 .sr(1)
43807 .m(4)
43808 .n(n)
43809 .k(4)
43810 .iterations(1)
43811 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43812 }
43813 }
43814
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_eq_8)43815 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8) {
43816 TEST_REQUIRES_ARM_NEON;
43817 GemmMicrokernelTester()
43818 .mr(4)
43819 .nr(8)
43820 .kr(1)
43821 .sr(1)
43822 .m(4)
43823 .n(8)
43824 .k(8)
43825 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43826 }
43827
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_eq_8_subtile)43828 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8_subtile) {
43829 TEST_REQUIRES_ARM_NEON;
43830 for (uint32_t n = 1; n <= 8; n++) {
43831 for (uint32_t m = 1; m <= 4; m++) {
43832 GemmMicrokernelTester()
43833 .mr(4)
43834 .nr(8)
43835 .kr(1)
43836 .sr(1)
43837 .m(m)
43838 .n(n)
43839 .k(8)
43840 .iterations(1)
43841 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43842 }
43843 }
43844 }
43845
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_lt_8)43846 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8) {
43847 TEST_REQUIRES_ARM_NEON;
43848 for (size_t k = 1; k < 8; k++) {
43849 GemmMicrokernelTester()
43850 .mr(4)
43851 .nr(8)
43852 .kr(1)
43853 .sr(1)
43854 .m(4)
43855 .n(8)
43856 .k(k)
43857 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43858 }
43859 }
43860
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_lt_8_subtile)43861 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8_subtile) {
43862 TEST_REQUIRES_ARM_NEON;
43863 for (size_t k = 1; k < 8; k++) {
43864 for (uint32_t n = 1; n <= 8; n++) {
43865 for (uint32_t m = 1; m <= 4; m++) {
43866 GemmMicrokernelTester()
43867 .mr(4)
43868 .nr(8)
43869 .kr(1)
43870 .sr(1)
43871 .m(m)
43872 .n(n)
43873 .k(k)
43874 .iterations(1)
43875 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43876 }
43877 }
43878 }
43879 }
43880
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_gt_8)43881 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8) {
43882 TEST_REQUIRES_ARM_NEON;
43883 for (size_t k = 9; k < 16; k++) {
43884 GemmMicrokernelTester()
43885 .mr(4)
43886 .nr(8)
43887 .kr(1)
43888 .sr(1)
43889 .m(4)
43890 .n(8)
43891 .k(k)
43892 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43893 }
43894 }
43895
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_gt_8_subtile)43896 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8_subtile) {
43897 TEST_REQUIRES_ARM_NEON;
43898 for (size_t k = 9; k < 16; k++) {
43899 for (uint32_t n = 1; n <= 8; n++) {
43900 for (uint32_t m = 1; m <= 4; m++) {
43901 GemmMicrokernelTester()
43902 .mr(4)
43903 .nr(8)
43904 .kr(1)
43905 .sr(1)
43906 .m(m)
43907 .n(n)
43908 .k(k)
43909 .iterations(1)
43910 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43911 }
43912 }
43913 }
43914 }
43915
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_div_4)43916 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_div_4) {
43917 TEST_REQUIRES_ARM_NEON;
43918 for (size_t k = 12; k <= 40; k += 4) {
43919 GemmMicrokernelTester()
43920 .mr(4)
43921 .nr(8)
43922 .kr(1)
43923 .sr(1)
43924 .m(4)
43925 .n(8)
43926 .k(k)
43927 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43928 }
43929 }
43930
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,k_div_4_subtile)43931 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_div_4_subtile) {
43932 TEST_REQUIRES_ARM_NEON;
43933 for (size_t k = 12; k <= 40; k += 4) {
43934 for (uint32_t n = 1; n <= 8; n++) {
43935 for (uint32_t m = 1; m <= 4; m++) {
43936 GemmMicrokernelTester()
43937 .mr(4)
43938 .nr(8)
43939 .kr(1)
43940 .sr(1)
43941 .m(m)
43942 .n(n)
43943 .k(k)
43944 .iterations(1)
43945 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43946 }
43947 }
43948 }
43949 }
43950
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8)43951 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8) {
43952 TEST_REQUIRES_ARM_NEON;
43953 for (uint32_t n = 9; n < 16; n++) {
43954 for (size_t k = 1; k <= 20; k += 5) {
43955 GemmMicrokernelTester()
43956 .mr(4)
43957 .nr(8)
43958 .kr(1)
43959 .sr(1)
43960 .m(4)
43961 .n(n)
43962 .k(k)
43963 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43964 }
43965 }
43966 }
43967
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8_strided_cn)43968 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_strided_cn) {
43969 TEST_REQUIRES_ARM_NEON;
43970 for (uint32_t n = 9; n < 16; n++) {
43971 for (size_t k = 1; k <= 20; k += 5) {
43972 GemmMicrokernelTester()
43973 .mr(4)
43974 .nr(8)
43975 .kr(1)
43976 .sr(1)
43977 .m(4)
43978 .n(n)
43979 .k(k)
43980 .cn_stride(11)
43981 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
43982 }
43983 }
43984 }
43985
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8_subtile)43986 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_subtile) {
43987 TEST_REQUIRES_ARM_NEON;
43988 for (uint32_t n = 9; n < 16; n++) {
43989 for (size_t k = 1; k <= 20; k += 5) {
43990 for (uint32_t m = 1; m <= 4; m++) {
43991 GemmMicrokernelTester()
43992 .mr(4)
43993 .nr(8)
43994 .kr(1)
43995 .sr(1)
43996 .m(m)
43997 .n(n)
43998 .k(k)
43999 .iterations(1)
44000 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44001 }
44002 }
44003 }
44004 }
44005
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_div_8)44006 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8) {
44007 TEST_REQUIRES_ARM_NEON;
44008 for (uint32_t n = 16; n <= 24; n += 8) {
44009 for (size_t k = 1; k <= 20; k += 5) {
44010 GemmMicrokernelTester()
44011 .mr(4)
44012 .nr(8)
44013 .kr(1)
44014 .sr(1)
44015 .m(4)
44016 .n(n)
44017 .k(k)
44018 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44019 }
44020 }
44021 }
44022
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_div_8_strided_cn)44023 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_strided_cn) {
44024 TEST_REQUIRES_ARM_NEON;
44025 for (uint32_t n = 16; n <= 24; n += 8) {
44026 for (size_t k = 1; k <= 20; k += 5) {
44027 GemmMicrokernelTester()
44028 .mr(4)
44029 .nr(8)
44030 .kr(1)
44031 .sr(1)
44032 .m(4)
44033 .n(n)
44034 .k(k)
44035 .cn_stride(11)
44036 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44037 }
44038 }
44039 }
44040
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_div_8_subtile)44041 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_subtile) {
44042 TEST_REQUIRES_ARM_NEON;
44043 for (uint32_t n = 16; n <= 24; n += 8) {
44044 for (size_t k = 1; k <= 20; k += 5) {
44045 for (uint32_t m = 1; m <= 4; m++) {
44046 GemmMicrokernelTester()
44047 .mr(4)
44048 .nr(8)
44049 .kr(1)
44050 .sr(1)
44051 .m(m)
44052 .n(n)
44053 .k(k)
44054 .iterations(1)
44055 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44056 }
44057 }
44058 }
44059 }
44060
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,small_kernel)44061 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, small_kernel) {
44062 TEST_REQUIRES_ARM_NEON;
44063 for (size_t k = 1; k <= 20; k += 5) {
44064 GemmMicrokernelTester()
44065 .mr(4)
44066 .nr(8)
44067 .kr(1)
44068 .sr(1)
44069 .m(4)
44070 .n(8)
44071 .k(k)
44072 .ks(3)
44073 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44074 }
44075 }
44076
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,small_kernel_subtile)44077 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, small_kernel_subtile) {
44078 TEST_REQUIRES_ARM_NEON;
44079 for (size_t k = 1; k <= 20; k += 5) {
44080 for (uint32_t n = 1; n <= 8; n++) {
44081 for (uint32_t m = 1; m <= 4; m++) {
44082 GemmMicrokernelTester()
44083 .mr(4)
44084 .nr(8)
44085 .kr(1)
44086 .sr(1)
44087 .m(m)
44088 .n(n)
44089 .k(k)
44090 .ks(3)
44091 .iterations(1)
44092 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44093 }
44094 }
44095 }
44096 }
44097
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_gt_8_small_kernel)44098 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_small_kernel) {
44099 TEST_REQUIRES_ARM_NEON;
44100 for (uint32_t n = 9; n < 16; n++) {
44101 for (size_t k = 1; k <= 20; k += 5) {
44102 GemmMicrokernelTester()
44103 .mr(4)
44104 .nr(8)
44105 .kr(1)
44106 .sr(1)
44107 .m(4)
44108 .n(n)
44109 .k(k)
44110 .ks(3)
44111 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44112 }
44113 }
44114 }
44115
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,n_div_8_small_kernel)44116 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_small_kernel) {
44117 TEST_REQUIRES_ARM_NEON;
44118 for (uint32_t n = 16; n <= 24; n += 8) {
44119 for (size_t k = 1; k <= 20; k += 5) {
44120 GemmMicrokernelTester()
44121 .mr(4)
44122 .nr(8)
44123 .kr(1)
44124 .sr(1)
44125 .m(4)
44126 .n(n)
44127 .k(k)
44128 .ks(3)
44129 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44130 }
44131 }
44132 }
44133
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,strided_cm_subtile)44134 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, strided_cm_subtile) {
44135 TEST_REQUIRES_ARM_NEON;
44136 for (size_t k = 1; k <= 20; k += 5) {
44137 for (uint32_t n = 1; n <= 8; n++) {
44138 for (uint32_t m = 1; m <= 4; m++) {
44139 GemmMicrokernelTester()
44140 .mr(4)
44141 .nr(8)
44142 .kr(1)
44143 .sr(1)
44144 .m(m)
44145 .n(n)
44146 .k(k)
44147 .cm_stride(11)
44148 .iterations(1)
44149 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44150 }
44151 }
44152 }
44153 }
44154
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,a_offset)44155 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, a_offset) {
44156 TEST_REQUIRES_ARM_NEON;
44157 for (size_t k = 1; k <= 20; k += 5) {
44158 GemmMicrokernelTester()
44159 .mr(4)
44160 .nr(8)
44161 .kr(1)
44162 .sr(1)
44163 .m(4)
44164 .n(8)
44165 .k(k)
44166 .ks(3)
44167 .a_offset(83)
44168 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44169 }
44170 }
44171
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,zero)44172 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, zero) {
44173 TEST_REQUIRES_ARM_NEON;
44174 for (size_t k = 1; k <= 20; k += 5) {
44175 for (uint32_t mz = 0; mz < 4; mz++) {
44176 GemmMicrokernelTester()
44177 .mr(4)
44178 .nr(8)
44179 .kr(1)
44180 .sr(1)
44181 .m(4)
44182 .n(8)
44183 .k(k)
44184 .ks(3)
44185 .a_offset(83)
44186 .zero_index(mz)
44187 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44188 }
44189 }
44190 }
44191
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,qmin)44192 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, qmin) {
44193 TEST_REQUIRES_ARM_NEON;
44194 GemmMicrokernelTester()
44195 .mr(4)
44196 .nr(8)
44197 .kr(1)
44198 .sr(1)
44199 .m(4)
44200 .n(8)
44201 .k(4)
44202 .qmin(128)
44203 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44204 }
44205
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,qmax)44206 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, qmax) {
44207 TEST_REQUIRES_ARM_NEON;
44208 GemmMicrokernelTester()
44209 .mr(4)
44210 .nr(8)
44211 .kr(1)
44212 .sr(1)
44213 .m(4)
44214 .n(8)
44215 .k(4)
44216 .qmax(128)
44217 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44218 }
44219
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53,strided_cm)44220 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, strided_cm) {
44221 TEST_REQUIRES_ARM_NEON;
44222 GemmMicrokernelTester()
44223 .mr(4)
44224 .nr(8)
44225 .kr(1)
44226 .sr(1)
44227 .m(4)
44228 .n(8)
44229 .k(4)
44230 .cm_stride(11)
44231 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
44232 }
44233 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
44234
44235
44236 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4)44237 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4) {
44238 TEST_REQUIRES_ARM_NEON;
44239 GemmMicrokernelTester()
44240 .mr(4)
44241 .nr(8)
44242 .kr(1)
44243 .sr(1)
44244 .m(4)
44245 .n(8)
44246 .k(4)
44247 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44248 }
44249
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,strided_cn)44250 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cn) {
44251 TEST_REQUIRES_ARM_NEON;
44252 GemmMicrokernelTester()
44253 .mr(4)
44254 .nr(8)
44255 .kr(1)
44256 .sr(1)
44257 .m(4)
44258 .n(8)
44259 .k(4)
44260 .cn_stride(11)
44261 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44262 }
44263
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4_subtile)44264 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile) {
44265 TEST_REQUIRES_ARM_NEON;
44266 for (uint32_t n = 1; n <= 8; n++) {
44267 for (uint32_t m = 1; m <= 4; m++) {
44268 GemmMicrokernelTester()
44269 .mr(4)
44270 .nr(8)
44271 .kr(1)
44272 .sr(1)
44273 .m(m)
44274 .n(n)
44275 .k(4)
44276 .iterations(1)
44277 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44278 }
44279 }
44280 }
44281
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4_subtile_m)44282 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_m) {
44283 TEST_REQUIRES_ARM_NEON;
44284 for (uint32_t m = 1; m <= 4; m++) {
44285 GemmMicrokernelTester()
44286 .mr(4)
44287 .nr(8)
44288 .kr(1)
44289 .sr(1)
44290 .m(m)
44291 .n(8)
44292 .k(4)
44293 .iterations(1)
44294 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44295 }
44296 }
44297
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4_subtile_n)44298 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_n) {
44299 TEST_REQUIRES_ARM_NEON;
44300 for (uint32_t n = 1; n <= 8; n++) {
44301 GemmMicrokernelTester()
44302 .mr(4)
44303 .nr(8)
44304 .kr(1)
44305 .sr(1)
44306 .m(4)
44307 .n(n)
44308 .k(4)
44309 .iterations(1)
44310 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44311 }
44312 }
44313
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_8)44314 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8) {
44315 TEST_REQUIRES_ARM_NEON;
44316 GemmMicrokernelTester()
44317 .mr(4)
44318 .nr(8)
44319 .kr(1)
44320 .sr(1)
44321 .m(4)
44322 .n(8)
44323 .k(8)
44324 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44325 }
44326
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_8_subtile)44327 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8_subtile) {
44328 TEST_REQUIRES_ARM_NEON;
44329 for (uint32_t n = 1; n <= 8; n++) {
44330 for (uint32_t m = 1; m <= 4; m++) {
44331 GemmMicrokernelTester()
44332 .mr(4)
44333 .nr(8)
44334 .kr(1)
44335 .sr(1)
44336 .m(m)
44337 .n(n)
44338 .k(8)
44339 .iterations(1)
44340 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44341 }
44342 }
44343 }
44344
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_lt_8)44345 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8) {
44346 TEST_REQUIRES_ARM_NEON;
44347 for (size_t k = 1; k < 8; k++) {
44348 GemmMicrokernelTester()
44349 .mr(4)
44350 .nr(8)
44351 .kr(1)
44352 .sr(1)
44353 .m(4)
44354 .n(8)
44355 .k(k)
44356 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44357 }
44358 }
44359
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_lt_8_subtile)44360 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8_subtile) {
44361 TEST_REQUIRES_ARM_NEON;
44362 for (size_t k = 1; k < 8; k++) {
44363 for (uint32_t n = 1; n <= 8; n++) {
44364 for (uint32_t m = 1; m <= 4; m++) {
44365 GemmMicrokernelTester()
44366 .mr(4)
44367 .nr(8)
44368 .kr(1)
44369 .sr(1)
44370 .m(m)
44371 .n(n)
44372 .k(k)
44373 .iterations(1)
44374 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44375 }
44376 }
44377 }
44378 }
44379
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_gt_8)44380 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8) {
44381 TEST_REQUIRES_ARM_NEON;
44382 for (size_t k = 9; k < 16; k++) {
44383 GemmMicrokernelTester()
44384 .mr(4)
44385 .nr(8)
44386 .kr(1)
44387 .sr(1)
44388 .m(4)
44389 .n(8)
44390 .k(k)
44391 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44392 }
44393 }
44394
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_gt_8_subtile)44395 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8_subtile) {
44396 TEST_REQUIRES_ARM_NEON;
44397 for (size_t k = 9; k < 16; k++) {
44398 for (uint32_t n = 1; n <= 8; n++) {
44399 for (uint32_t m = 1; m <= 4; m++) {
44400 GemmMicrokernelTester()
44401 .mr(4)
44402 .nr(8)
44403 .kr(1)
44404 .sr(1)
44405 .m(m)
44406 .n(n)
44407 .k(k)
44408 .iterations(1)
44409 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44410 }
44411 }
44412 }
44413 }
44414
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_div_4)44415 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4) {
44416 TEST_REQUIRES_ARM_NEON;
44417 for (size_t k = 12; k <= 40; k += 4) {
44418 GemmMicrokernelTester()
44419 .mr(4)
44420 .nr(8)
44421 .kr(1)
44422 .sr(1)
44423 .m(4)
44424 .n(8)
44425 .k(k)
44426 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44427 }
44428 }
44429
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_div_4_subtile)44430 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4_subtile) {
44431 TEST_REQUIRES_ARM_NEON;
44432 for (size_t k = 12; k <= 40; k += 4) {
44433 for (uint32_t n = 1; n <= 8; n++) {
44434 for (uint32_t m = 1; m <= 4; m++) {
44435 GemmMicrokernelTester()
44436 .mr(4)
44437 .nr(8)
44438 .kr(1)
44439 .sr(1)
44440 .m(m)
44441 .n(n)
44442 .k(k)
44443 .iterations(1)
44444 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44445 }
44446 }
44447 }
44448 }
44449
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8)44450 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8) {
44451 TEST_REQUIRES_ARM_NEON;
44452 for (uint32_t n = 9; n < 16; n++) {
44453 for (size_t k = 1; k <= 20; k += 5) {
44454 GemmMicrokernelTester()
44455 .mr(4)
44456 .nr(8)
44457 .kr(1)
44458 .sr(1)
44459 .m(4)
44460 .n(n)
44461 .k(k)
44462 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44463 }
44464 }
44465 }
44466
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8_strided_cn)44467 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
44468 TEST_REQUIRES_ARM_NEON;
44469 for (uint32_t n = 9; n < 16; n++) {
44470 for (size_t k = 1; k <= 20; k += 5) {
44471 GemmMicrokernelTester()
44472 .mr(4)
44473 .nr(8)
44474 .kr(1)
44475 .sr(1)
44476 .m(4)
44477 .n(n)
44478 .k(k)
44479 .cn_stride(11)
44480 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44481 }
44482 }
44483 }
44484
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8_subtile)44485 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_subtile) {
44486 TEST_REQUIRES_ARM_NEON;
44487 for (uint32_t n = 9; n < 16; n++) {
44488 for (size_t k = 1; k <= 20; k += 5) {
44489 for (uint32_t m = 1; m <= 4; m++) {
44490 GemmMicrokernelTester()
44491 .mr(4)
44492 .nr(8)
44493 .kr(1)
44494 .sr(1)
44495 .m(m)
44496 .n(n)
44497 .k(k)
44498 .iterations(1)
44499 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44500 }
44501 }
44502 }
44503 }
44504
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8)44505 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8) {
44506 TEST_REQUIRES_ARM_NEON;
44507 for (uint32_t n = 16; n <= 24; n += 8) {
44508 for (size_t k = 1; k <= 20; k += 5) {
44509 GemmMicrokernelTester()
44510 .mr(4)
44511 .nr(8)
44512 .kr(1)
44513 .sr(1)
44514 .m(4)
44515 .n(n)
44516 .k(k)
44517 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44518 }
44519 }
44520 }
44521
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8_strided_cn)44522 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_strided_cn) {
44523 TEST_REQUIRES_ARM_NEON;
44524 for (uint32_t n = 16; n <= 24; n += 8) {
44525 for (size_t k = 1; k <= 20; k += 5) {
44526 GemmMicrokernelTester()
44527 .mr(4)
44528 .nr(8)
44529 .kr(1)
44530 .sr(1)
44531 .m(4)
44532 .n(n)
44533 .k(k)
44534 .cn_stride(11)
44535 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44536 }
44537 }
44538 }
44539
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8_subtile)44540 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_subtile) {
44541 TEST_REQUIRES_ARM_NEON;
44542 for (uint32_t n = 16; n <= 24; n += 8) {
44543 for (size_t k = 1; k <= 20; k += 5) {
44544 for (uint32_t m = 1; m <= 4; m++) {
44545 GemmMicrokernelTester()
44546 .mr(4)
44547 .nr(8)
44548 .kr(1)
44549 .sr(1)
44550 .m(m)
44551 .n(n)
44552 .k(k)
44553 .iterations(1)
44554 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44555 }
44556 }
44557 }
44558 }
44559
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,small_kernel)44560 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel) {
44561 TEST_REQUIRES_ARM_NEON;
44562 for (size_t k = 1; k <= 20; k += 5) {
44563 GemmMicrokernelTester()
44564 .mr(4)
44565 .nr(8)
44566 .kr(1)
44567 .sr(1)
44568 .m(4)
44569 .n(8)
44570 .k(k)
44571 .ks(3)
44572 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44573 }
44574 }
44575
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,small_kernel_subtile)44576 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel_subtile) {
44577 TEST_REQUIRES_ARM_NEON;
44578 for (size_t k = 1; k <= 20; k += 5) {
44579 for (uint32_t n = 1; n <= 8; n++) {
44580 for (uint32_t m = 1; m <= 4; m++) {
44581 GemmMicrokernelTester()
44582 .mr(4)
44583 .nr(8)
44584 .kr(1)
44585 .sr(1)
44586 .m(m)
44587 .n(n)
44588 .k(k)
44589 .ks(3)
44590 .iterations(1)
44591 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44592 }
44593 }
44594 }
44595 }
44596
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8_small_kernel)44597 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
44598 TEST_REQUIRES_ARM_NEON;
44599 for (uint32_t n = 9; n < 16; n++) {
44600 for (size_t k = 1; k <= 20; k += 5) {
44601 GemmMicrokernelTester()
44602 .mr(4)
44603 .nr(8)
44604 .kr(1)
44605 .sr(1)
44606 .m(4)
44607 .n(n)
44608 .k(k)
44609 .ks(3)
44610 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44611 }
44612 }
44613 }
44614
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8_small_kernel)44615 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_small_kernel) {
44616 TEST_REQUIRES_ARM_NEON;
44617 for (uint32_t n = 16; n <= 24; n += 8) {
44618 for (size_t k = 1; k <= 20; k += 5) {
44619 GemmMicrokernelTester()
44620 .mr(4)
44621 .nr(8)
44622 .kr(1)
44623 .sr(1)
44624 .m(4)
44625 .n(n)
44626 .k(k)
44627 .ks(3)
44628 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44629 }
44630 }
44631 }
44632
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,strided_cm_subtile)44633 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm_subtile) {
44634 TEST_REQUIRES_ARM_NEON;
44635 for (size_t k = 1; k <= 20; k += 5) {
44636 for (uint32_t n = 1; n <= 8; n++) {
44637 for (uint32_t m = 1; m <= 4; m++) {
44638 GemmMicrokernelTester()
44639 .mr(4)
44640 .nr(8)
44641 .kr(1)
44642 .sr(1)
44643 .m(m)
44644 .n(n)
44645 .k(k)
44646 .cm_stride(11)
44647 .iterations(1)
44648 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44649 }
44650 }
44651 }
44652 }
44653
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,a_offset)44654 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, a_offset) {
44655 TEST_REQUIRES_ARM_NEON;
44656 for (size_t k = 1; k <= 20; k += 5) {
44657 GemmMicrokernelTester()
44658 .mr(4)
44659 .nr(8)
44660 .kr(1)
44661 .sr(1)
44662 .m(4)
44663 .n(8)
44664 .k(k)
44665 .ks(3)
44666 .a_offset(83)
44667 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44668 }
44669 }
44670
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,zero)44671 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, zero) {
44672 TEST_REQUIRES_ARM_NEON;
44673 for (size_t k = 1; k <= 20; k += 5) {
44674 for (uint32_t mz = 0; mz < 4; mz++) {
44675 GemmMicrokernelTester()
44676 .mr(4)
44677 .nr(8)
44678 .kr(1)
44679 .sr(1)
44680 .m(4)
44681 .n(8)
44682 .k(k)
44683 .ks(3)
44684 .a_offset(83)
44685 .zero_index(mz)
44686 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44687 }
44688 }
44689 }
44690
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,qmin)44691 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmin) {
44692 TEST_REQUIRES_ARM_NEON;
44693 GemmMicrokernelTester()
44694 .mr(4)
44695 .nr(8)
44696 .kr(1)
44697 .sr(1)
44698 .m(4)
44699 .n(8)
44700 .k(4)
44701 .qmin(128)
44702 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44703 }
44704
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,qmax)44705 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmax) {
44706 TEST_REQUIRES_ARM_NEON;
44707 GemmMicrokernelTester()
44708 .mr(4)
44709 .nr(8)
44710 .kr(1)
44711 .sr(1)
44712 .m(4)
44713 .n(8)
44714 .k(4)
44715 .qmax(128)
44716 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44717 }
44718
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75,strided_cm)44719 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm) {
44720 TEST_REQUIRES_ARM_NEON;
44721 GemmMicrokernelTester()
44722 .mr(4)
44723 .nr(8)
44724 .kr(1)
44725 .sr(1)
44726 .m(4)
44727 .n(8)
44728 .k(4)
44729 .cm_stride(11)
44730 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44731 }
44732 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
44733
44734
44735 #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)44736 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
44737 TEST_REQUIRES_ARM_NEON_FMA;
44738 GemmMicrokernelTester()
44739 .mr(6)
44740 .nr(8)
44741 .kr(1)
44742 .sr(1)
44743 .m(6)
44744 .n(8)
44745 .k(8)
44746 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44747 }
44748
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)44749 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
44750 TEST_REQUIRES_ARM_NEON_FMA;
44751 GemmMicrokernelTester()
44752 .mr(6)
44753 .nr(8)
44754 .kr(1)
44755 .sr(1)
44756 .m(6)
44757 .n(8)
44758 .k(8)
44759 .cn_stride(11)
44760 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44761 }
44762
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)44763 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
44764 TEST_REQUIRES_ARM_NEON_FMA;
44765 for (uint32_t n = 1; n <= 8; n++) {
44766 for (uint32_t m = 1; m <= 6; m++) {
44767 GemmMicrokernelTester()
44768 .mr(6)
44769 .nr(8)
44770 .kr(1)
44771 .sr(1)
44772 .m(m)
44773 .n(n)
44774 .k(8)
44775 .iterations(1)
44776 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44777 }
44778 }
44779 }
44780
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)44781 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
44782 TEST_REQUIRES_ARM_NEON_FMA;
44783 for (uint32_t m = 1; m <= 6; m++) {
44784 GemmMicrokernelTester()
44785 .mr(6)
44786 .nr(8)
44787 .kr(1)
44788 .sr(1)
44789 .m(m)
44790 .n(8)
44791 .k(8)
44792 .iterations(1)
44793 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44794 }
44795 }
44796
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)44797 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
44798 TEST_REQUIRES_ARM_NEON_FMA;
44799 for (uint32_t n = 1; n <= 8; n++) {
44800 GemmMicrokernelTester()
44801 .mr(6)
44802 .nr(8)
44803 .kr(1)
44804 .sr(1)
44805 .m(6)
44806 .n(n)
44807 .k(8)
44808 .iterations(1)
44809 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44810 }
44811 }
44812
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)44813 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
44814 TEST_REQUIRES_ARM_NEON_FMA;
44815 GemmMicrokernelTester()
44816 .mr(6)
44817 .nr(8)
44818 .kr(1)
44819 .sr(1)
44820 .m(6)
44821 .n(8)
44822 .k(16)
44823 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44824 }
44825
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)44826 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
44827 TEST_REQUIRES_ARM_NEON_FMA;
44828 for (uint32_t n = 1; n <= 8; n++) {
44829 for (uint32_t m = 1; m <= 6; m++) {
44830 GemmMicrokernelTester()
44831 .mr(6)
44832 .nr(8)
44833 .kr(1)
44834 .sr(1)
44835 .m(m)
44836 .n(n)
44837 .k(16)
44838 .iterations(1)
44839 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44840 }
44841 }
44842 }
44843
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)44844 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
44845 TEST_REQUIRES_ARM_NEON_FMA;
44846 for (size_t k = 1; k < 16; k++) {
44847 GemmMicrokernelTester()
44848 .mr(6)
44849 .nr(8)
44850 .kr(1)
44851 .sr(1)
44852 .m(6)
44853 .n(8)
44854 .k(k)
44855 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44856 }
44857 }
44858
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)44859 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
44860 TEST_REQUIRES_ARM_NEON_FMA;
44861 for (size_t k = 1; k < 16; k++) {
44862 for (uint32_t n = 1; n <= 8; n++) {
44863 for (uint32_t m = 1; m <= 6; m++) {
44864 GemmMicrokernelTester()
44865 .mr(6)
44866 .nr(8)
44867 .kr(1)
44868 .sr(1)
44869 .m(m)
44870 .n(n)
44871 .k(k)
44872 .iterations(1)
44873 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44874 }
44875 }
44876 }
44877 }
44878
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)44879 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
44880 TEST_REQUIRES_ARM_NEON_FMA;
44881 for (size_t k = 17; k < 32; k++) {
44882 GemmMicrokernelTester()
44883 .mr(6)
44884 .nr(8)
44885 .kr(1)
44886 .sr(1)
44887 .m(6)
44888 .n(8)
44889 .k(k)
44890 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44891 }
44892 }
44893
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)44894 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
44895 TEST_REQUIRES_ARM_NEON_FMA;
44896 for (size_t k = 17; k < 32; k++) {
44897 for (uint32_t n = 1; n <= 8; n++) {
44898 for (uint32_t m = 1; m <= 6; m++) {
44899 GemmMicrokernelTester()
44900 .mr(6)
44901 .nr(8)
44902 .kr(1)
44903 .sr(1)
44904 .m(m)
44905 .n(n)
44906 .k(k)
44907 .iterations(1)
44908 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44909 }
44910 }
44911 }
44912 }
44913
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)44914 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
44915 TEST_REQUIRES_ARM_NEON_FMA;
44916 for (size_t k = 24; k <= 80; k += 8) {
44917 GemmMicrokernelTester()
44918 .mr(6)
44919 .nr(8)
44920 .kr(1)
44921 .sr(1)
44922 .m(6)
44923 .n(8)
44924 .k(k)
44925 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44926 }
44927 }
44928
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)44929 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
44930 TEST_REQUIRES_ARM_NEON_FMA;
44931 for (size_t k = 24; k <= 80; k += 8) {
44932 for (uint32_t n = 1; n <= 8; n++) {
44933 for (uint32_t m = 1; m <= 6; m++) {
44934 GemmMicrokernelTester()
44935 .mr(6)
44936 .nr(8)
44937 .kr(1)
44938 .sr(1)
44939 .m(m)
44940 .n(n)
44941 .k(k)
44942 .iterations(1)
44943 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44944 }
44945 }
44946 }
44947 }
44948
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)44949 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
44950 TEST_REQUIRES_ARM_NEON_FMA;
44951 for (uint32_t n = 9; n < 16; n++) {
44952 for (size_t k = 1; k <= 40; k += 9) {
44953 GemmMicrokernelTester()
44954 .mr(6)
44955 .nr(8)
44956 .kr(1)
44957 .sr(1)
44958 .m(6)
44959 .n(n)
44960 .k(k)
44961 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44962 }
44963 }
44964 }
44965
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)44966 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
44967 TEST_REQUIRES_ARM_NEON_FMA;
44968 for (uint32_t n = 9; n < 16; n++) {
44969 for (size_t k = 1; k <= 40; k += 9) {
44970 GemmMicrokernelTester()
44971 .mr(6)
44972 .nr(8)
44973 .kr(1)
44974 .sr(1)
44975 .m(6)
44976 .n(n)
44977 .k(k)
44978 .cn_stride(11)
44979 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44980 }
44981 }
44982 }
44983
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)44984 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
44985 TEST_REQUIRES_ARM_NEON_FMA;
44986 for (uint32_t n = 9; n < 16; n++) {
44987 for (size_t k = 1; k <= 40; k += 9) {
44988 for (uint32_t m = 1; m <= 6; m++) {
44989 GemmMicrokernelTester()
44990 .mr(6)
44991 .nr(8)
44992 .kr(1)
44993 .sr(1)
44994 .m(m)
44995 .n(n)
44996 .k(k)
44997 .iterations(1)
44998 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
44999 }
45000 }
45001 }
45002 }
45003
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)45004 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
45005 TEST_REQUIRES_ARM_NEON_FMA;
45006 for (uint32_t n = 16; n <= 24; n += 8) {
45007 for (size_t k = 1; k <= 40; k += 9) {
45008 GemmMicrokernelTester()
45009 .mr(6)
45010 .nr(8)
45011 .kr(1)
45012 .sr(1)
45013 .m(6)
45014 .n(n)
45015 .k(k)
45016 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45017 }
45018 }
45019 }
45020
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)45021 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
45022 TEST_REQUIRES_ARM_NEON_FMA;
45023 for (uint32_t n = 16; n <= 24; n += 8) {
45024 for (size_t k = 1; k <= 40; k += 9) {
45025 GemmMicrokernelTester()
45026 .mr(6)
45027 .nr(8)
45028 .kr(1)
45029 .sr(1)
45030 .m(6)
45031 .n(n)
45032 .k(k)
45033 .cn_stride(11)
45034 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45035 }
45036 }
45037 }
45038
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)45039 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
45040 TEST_REQUIRES_ARM_NEON_FMA;
45041 for (uint32_t n = 16; n <= 24; n += 8) {
45042 for (size_t k = 1; k <= 40; k += 9) {
45043 for (uint32_t m = 1; m <= 6; m++) {
45044 GemmMicrokernelTester()
45045 .mr(6)
45046 .nr(8)
45047 .kr(1)
45048 .sr(1)
45049 .m(m)
45050 .n(n)
45051 .k(k)
45052 .iterations(1)
45053 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45054 }
45055 }
45056 }
45057 }
45058
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)45059 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
45060 TEST_REQUIRES_ARM_NEON_FMA;
45061 for (size_t k = 1; k <= 40; k += 9) {
45062 GemmMicrokernelTester()
45063 .mr(6)
45064 .nr(8)
45065 .kr(1)
45066 .sr(1)
45067 .m(6)
45068 .n(8)
45069 .k(k)
45070 .ks(3)
45071 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45072 }
45073 }
45074
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)45075 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
45076 TEST_REQUIRES_ARM_NEON_FMA;
45077 for (size_t k = 1; k <= 40; k += 9) {
45078 for (uint32_t n = 1; n <= 8; n++) {
45079 for (uint32_t m = 1; m <= 6; m++) {
45080 GemmMicrokernelTester()
45081 .mr(6)
45082 .nr(8)
45083 .kr(1)
45084 .sr(1)
45085 .m(m)
45086 .n(n)
45087 .k(k)
45088 .ks(3)
45089 .iterations(1)
45090 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45091 }
45092 }
45093 }
45094 }
45095
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_small_kernel)45096 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
45097 TEST_REQUIRES_ARM_NEON_FMA;
45098 for (uint32_t n = 9; n < 16; n++) {
45099 for (size_t k = 1; k <= 40; k += 9) {
45100 GemmMicrokernelTester()
45101 .mr(6)
45102 .nr(8)
45103 .kr(1)
45104 .sr(1)
45105 .m(6)
45106 .n(n)
45107 .k(k)
45108 .ks(3)
45109 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45110 }
45111 }
45112 }
45113
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_small_kernel)45114 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
45115 TEST_REQUIRES_ARM_NEON_FMA;
45116 for (uint32_t n = 16; n <= 24; n += 8) {
45117 for (size_t k = 1; k <= 40; k += 9) {
45118 GemmMicrokernelTester()
45119 .mr(6)
45120 .nr(8)
45121 .kr(1)
45122 .sr(1)
45123 .m(6)
45124 .n(n)
45125 .k(k)
45126 .ks(3)
45127 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45128 }
45129 }
45130 }
45131
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)45132 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
45133 TEST_REQUIRES_ARM_NEON_FMA;
45134 for (size_t k = 1; k <= 40; k += 9) {
45135 for (uint32_t n = 1; n <= 8; n++) {
45136 for (uint32_t m = 1; m <= 6; m++) {
45137 GemmMicrokernelTester()
45138 .mr(6)
45139 .nr(8)
45140 .kr(1)
45141 .sr(1)
45142 .m(m)
45143 .n(n)
45144 .k(k)
45145 .cm_stride(11)
45146 .iterations(1)
45147 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45148 }
45149 }
45150 }
45151 }
45152
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)45153 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
45154 TEST_REQUIRES_ARM_NEON_FMA;
45155 for (size_t k = 1; k <= 40; k += 9) {
45156 GemmMicrokernelTester()
45157 .mr(6)
45158 .nr(8)
45159 .kr(1)
45160 .sr(1)
45161 .m(6)
45162 .n(8)
45163 .k(k)
45164 .ks(3)
45165 .a_offset(251)
45166 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45167 }
45168 }
45169
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)45170 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
45171 TEST_REQUIRES_ARM_NEON_FMA;
45172 for (size_t k = 1; k <= 40; k += 9) {
45173 for (uint32_t mz = 0; mz < 6; mz++) {
45174 GemmMicrokernelTester()
45175 .mr(6)
45176 .nr(8)
45177 .kr(1)
45178 .sr(1)
45179 .m(6)
45180 .n(8)
45181 .k(k)
45182 .ks(3)
45183 .a_offset(251)
45184 .zero_index(mz)
45185 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45186 }
45187 }
45188 }
45189
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)45190 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
45191 TEST_REQUIRES_ARM_NEON_FMA;
45192 GemmMicrokernelTester()
45193 .mr(6)
45194 .nr(8)
45195 .kr(1)
45196 .sr(1)
45197 .m(6)
45198 .n(8)
45199 .k(8)
45200 .qmin(128)
45201 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45202 }
45203
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)45204 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
45205 TEST_REQUIRES_ARM_NEON_FMA;
45206 GemmMicrokernelTester()
45207 .mr(6)
45208 .nr(8)
45209 .kr(1)
45210 .sr(1)
45211 .m(6)
45212 .n(8)
45213 .k(8)
45214 .qmax(128)
45215 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45216 }
45217
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)45218 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
45219 TEST_REQUIRES_ARM_NEON_FMA;
45220 GemmMicrokernelTester()
45221 .mr(6)
45222 .nr(8)
45223 .kr(1)
45224 .sr(1)
45225 .m(6)
45226 .n(8)
45227 .k(8)
45228 .cm_stride(11)
45229 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45230 }
45231
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m_upto_mr)45232 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m_upto_mr) {
45233 TEST_REQUIRES_ARM_NEON_FMA;
45234 for (uint32_t max_mr = 1; max_mr <= 6; max_mr++) {
45235 for (uint32_t m = 1; m <= max_mr; m++) {
45236 GemmMicrokernelTester()
45237 .mr(max_mr)
45238 .nr(8)
45239 .kr(1)
45240 .sr(1)
45241 .m(m)
45242 .n(8)
45243 .k(8)
45244 .iterations(1)
45245 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45246 }
45247 }
45248 }
45249 #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
45250
45251
45252 #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)45253 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
45254 TEST_REQUIRES_ARM_NEON_FMA;
45255 GemmMicrokernelTester()
45256 .mr(1)
45257 .nr(8)
45258 .kr(1)
45259 .sr(1)
45260 .m(1)
45261 .n(8)
45262 .k(8)
45263 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45264 }
45265
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)45266 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
45267 TEST_REQUIRES_ARM_NEON_FMA;
45268 GemmMicrokernelTester()
45269 .mr(1)
45270 .nr(8)
45271 .kr(1)
45272 .sr(1)
45273 .m(1)
45274 .n(8)
45275 .k(8)
45276 .cn_stride(11)
45277 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45278 }
45279
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)45280 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
45281 TEST_REQUIRES_ARM_NEON_FMA;
45282 for (uint32_t n = 1; n <= 8; n++) {
45283 for (uint32_t m = 1; m <= 1; m++) {
45284 GemmMicrokernelTester()
45285 .mr(1)
45286 .nr(8)
45287 .kr(1)
45288 .sr(1)
45289 .m(m)
45290 .n(n)
45291 .k(8)
45292 .iterations(1)
45293 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45294 }
45295 }
45296 }
45297
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)45298 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
45299 TEST_REQUIRES_ARM_NEON_FMA;
45300 for (uint32_t m = 1; m <= 1; m++) {
45301 GemmMicrokernelTester()
45302 .mr(1)
45303 .nr(8)
45304 .kr(1)
45305 .sr(1)
45306 .m(m)
45307 .n(8)
45308 .k(8)
45309 .iterations(1)
45310 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45311 }
45312 }
45313
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)45314 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
45315 TEST_REQUIRES_ARM_NEON_FMA;
45316 for (uint32_t n = 1; n <= 8; n++) {
45317 GemmMicrokernelTester()
45318 .mr(1)
45319 .nr(8)
45320 .kr(1)
45321 .sr(1)
45322 .m(1)
45323 .n(n)
45324 .k(8)
45325 .iterations(1)
45326 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45327 }
45328 }
45329
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)45330 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
45331 TEST_REQUIRES_ARM_NEON_FMA;
45332 GemmMicrokernelTester()
45333 .mr(1)
45334 .nr(8)
45335 .kr(1)
45336 .sr(1)
45337 .m(1)
45338 .n(8)
45339 .k(16)
45340 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45341 }
45342
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)45343 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
45344 TEST_REQUIRES_ARM_NEON_FMA;
45345 for (uint32_t n = 1; n <= 8; n++) {
45346 for (uint32_t m = 1; m <= 1; m++) {
45347 GemmMicrokernelTester()
45348 .mr(1)
45349 .nr(8)
45350 .kr(1)
45351 .sr(1)
45352 .m(m)
45353 .n(n)
45354 .k(16)
45355 .iterations(1)
45356 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45357 }
45358 }
45359 }
45360
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)45361 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
45362 TEST_REQUIRES_ARM_NEON_FMA;
45363 for (size_t k = 1; k < 16; k++) {
45364 GemmMicrokernelTester()
45365 .mr(1)
45366 .nr(8)
45367 .kr(1)
45368 .sr(1)
45369 .m(1)
45370 .n(8)
45371 .k(k)
45372 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45373 }
45374 }
45375
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)45376 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
45377 TEST_REQUIRES_ARM_NEON_FMA;
45378 for (size_t k = 1; k < 16; k++) {
45379 for (uint32_t n = 1; n <= 8; n++) {
45380 for (uint32_t m = 1; m <= 1; m++) {
45381 GemmMicrokernelTester()
45382 .mr(1)
45383 .nr(8)
45384 .kr(1)
45385 .sr(1)
45386 .m(m)
45387 .n(n)
45388 .k(k)
45389 .iterations(1)
45390 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45391 }
45392 }
45393 }
45394 }
45395
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)45396 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
45397 TEST_REQUIRES_ARM_NEON_FMA;
45398 for (size_t k = 17; k < 32; k++) {
45399 GemmMicrokernelTester()
45400 .mr(1)
45401 .nr(8)
45402 .kr(1)
45403 .sr(1)
45404 .m(1)
45405 .n(8)
45406 .k(k)
45407 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45408 }
45409 }
45410
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)45411 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
45412 TEST_REQUIRES_ARM_NEON_FMA;
45413 for (size_t k = 17; k < 32; k++) {
45414 for (uint32_t n = 1; n <= 8; n++) {
45415 for (uint32_t m = 1; m <= 1; m++) {
45416 GemmMicrokernelTester()
45417 .mr(1)
45418 .nr(8)
45419 .kr(1)
45420 .sr(1)
45421 .m(m)
45422 .n(n)
45423 .k(k)
45424 .iterations(1)
45425 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45426 }
45427 }
45428 }
45429 }
45430
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)45431 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
45432 TEST_REQUIRES_ARM_NEON_FMA;
45433 for (size_t k = 24; k <= 80; k += 8) {
45434 GemmMicrokernelTester()
45435 .mr(1)
45436 .nr(8)
45437 .kr(1)
45438 .sr(1)
45439 .m(1)
45440 .n(8)
45441 .k(k)
45442 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45443 }
45444 }
45445
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)45446 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
45447 TEST_REQUIRES_ARM_NEON_FMA;
45448 for (size_t k = 24; k <= 80; k += 8) {
45449 for (uint32_t n = 1; n <= 8; n++) {
45450 for (uint32_t m = 1; m <= 1; m++) {
45451 GemmMicrokernelTester()
45452 .mr(1)
45453 .nr(8)
45454 .kr(1)
45455 .sr(1)
45456 .m(m)
45457 .n(n)
45458 .k(k)
45459 .iterations(1)
45460 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45461 }
45462 }
45463 }
45464 }
45465
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)45466 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
45467 TEST_REQUIRES_ARM_NEON_FMA;
45468 for (uint32_t n = 9; n < 16; n++) {
45469 for (size_t k = 1; k <= 40; k += 9) {
45470 GemmMicrokernelTester()
45471 .mr(1)
45472 .nr(8)
45473 .kr(1)
45474 .sr(1)
45475 .m(1)
45476 .n(n)
45477 .k(k)
45478 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45479 }
45480 }
45481 }
45482
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)45483 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
45484 TEST_REQUIRES_ARM_NEON_FMA;
45485 for (uint32_t n = 9; n < 16; n++) {
45486 for (size_t k = 1; k <= 40; k += 9) {
45487 GemmMicrokernelTester()
45488 .mr(1)
45489 .nr(8)
45490 .kr(1)
45491 .sr(1)
45492 .m(1)
45493 .n(n)
45494 .k(k)
45495 .cn_stride(11)
45496 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45497 }
45498 }
45499 }
45500
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)45501 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
45502 TEST_REQUIRES_ARM_NEON_FMA;
45503 for (uint32_t n = 9; n < 16; n++) {
45504 for (size_t k = 1; k <= 40; k += 9) {
45505 for (uint32_t m = 1; m <= 1; m++) {
45506 GemmMicrokernelTester()
45507 .mr(1)
45508 .nr(8)
45509 .kr(1)
45510 .sr(1)
45511 .m(m)
45512 .n(n)
45513 .k(k)
45514 .iterations(1)
45515 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45516 }
45517 }
45518 }
45519 }
45520
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)45521 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
45522 TEST_REQUIRES_ARM_NEON_FMA;
45523 for (uint32_t n = 16; n <= 24; n += 8) {
45524 for (size_t k = 1; k <= 40; k += 9) {
45525 GemmMicrokernelTester()
45526 .mr(1)
45527 .nr(8)
45528 .kr(1)
45529 .sr(1)
45530 .m(1)
45531 .n(n)
45532 .k(k)
45533 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45534 }
45535 }
45536 }
45537
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)45538 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
45539 TEST_REQUIRES_ARM_NEON_FMA;
45540 for (uint32_t n = 16; n <= 24; n += 8) {
45541 for (size_t k = 1; k <= 40; k += 9) {
45542 GemmMicrokernelTester()
45543 .mr(1)
45544 .nr(8)
45545 .kr(1)
45546 .sr(1)
45547 .m(1)
45548 .n(n)
45549 .k(k)
45550 .cn_stride(11)
45551 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45552 }
45553 }
45554 }
45555
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)45556 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
45557 TEST_REQUIRES_ARM_NEON_FMA;
45558 for (uint32_t n = 16; n <= 24; n += 8) {
45559 for (size_t k = 1; k <= 40; k += 9) {
45560 for (uint32_t m = 1; m <= 1; m++) {
45561 GemmMicrokernelTester()
45562 .mr(1)
45563 .nr(8)
45564 .kr(1)
45565 .sr(1)
45566 .m(m)
45567 .n(n)
45568 .k(k)
45569 .iterations(1)
45570 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45571 }
45572 }
45573 }
45574 }
45575
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel)45576 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
45577 TEST_REQUIRES_ARM_NEON_FMA;
45578 for (size_t k = 1; k <= 40; k += 9) {
45579 GemmMicrokernelTester()
45580 .mr(1)
45581 .nr(8)
45582 .kr(1)
45583 .sr(1)
45584 .m(1)
45585 .n(8)
45586 .k(k)
45587 .ks(3)
45588 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45589 }
45590 }
45591
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)45592 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
45593 TEST_REQUIRES_ARM_NEON_FMA;
45594 for (size_t k = 1; k <= 40; k += 9) {
45595 for (uint32_t n = 1; n <= 8; n++) {
45596 for (uint32_t m = 1; m <= 1; m++) {
45597 GemmMicrokernelTester()
45598 .mr(1)
45599 .nr(8)
45600 .kr(1)
45601 .sr(1)
45602 .m(m)
45603 .n(n)
45604 .k(k)
45605 .ks(3)
45606 .iterations(1)
45607 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45608 }
45609 }
45610 }
45611 }
45612
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_small_kernel)45613 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
45614 TEST_REQUIRES_ARM_NEON_FMA;
45615 for (uint32_t n = 9; n < 16; n++) {
45616 for (size_t k = 1; k <= 40; k += 9) {
45617 GemmMicrokernelTester()
45618 .mr(1)
45619 .nr(8)
45620 .kr(1)
45621 .sr(1)
45622 .m(1)
45623 .n(n)
45624 .k(k)
45625 .ks(3)
45626 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45627 }
45628 }
45629 }
45630
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_small_kernel)45631 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
45632 TEST_REQUIRES_ARM_NEON_FMA;
45633 for (uint32_t n = 16; n <= 24; n += 8) {
45634 for (size_t k = 1; k <= 40; k += 9) {
45635 GemmMicrokernelTester()
45636 .mr(1)
45637 .nr(8)
45638 .kr(1)
45639 .sr(1)
45640 .m(1)
45641 .n(n)
45642 .k(k)
45643 .ks(3)
45644 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45645 }
45646 }
45647 }
45648
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)45649 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
45650 TEST_REQUIRES_ARM_NEON_FMA;
45651 for (size_t k = 1; k <= 40; k += 9) {
45652 for (uint32_t n = 1; n <= 8; n++) {
45653 for (uint32_t m = 1; m <= 1; m++) {
45654 GemmMicrokernelTester()
45655 .mr(1)
45656 .nr(8)
45657 .kr(1)
45658 .sr(1)
45659 .m(m)
45660 .n(n)
45661 .k(k)
45662 .cm_stride(11)
45663 .iterations(1)
45664 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45665 }
45666 }
45667 }
45668 }
45669
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,a_offset)45670 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
45671 TEST_REQUIRES_ARM_NEON_FMA;
45672 for (size_t k = 1; k <= 40; k += 9) {
45673 GemmMicrokernelTester()
45674 .mr(1)
45675 .nr(8)
45676 .kr(1)
45677 .sr(1)
45678 .m(1)
45679 .n(8)
45680 .k(k)
45681 .ks(3)
45682 .a_offset(43)
45683 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45684 }
45685 }
45686
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,zero)45687 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
45688 TEST_REQUIRES_ARM_NEON_FMA;
45689 for (size_t k = 1; k <= 40; k += 9) {
45690 for (uint32_t mz = 0; mz < 1; mz++) {
45691 GemmMicrokernelTester()
45692 .mr(1)
45693 .nr(8)
45694 .kr(1)
45695 .sr(1)
45696 .m(1)
45697 .n(8)
45698 .k(k)
45699 .ks(3)
45700 .a_offset(43)
45701 .zero_index(mz)
45702 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45703 }
45704 }
45705 }
45706
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,qmin)45707 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
45708 TEST_REQUIRES_ARM_NEON_FMA;
45709 GemmMicrokernelTester()
45710 .mr(1)
45711 .nr(8)
45712 .kr(1)
45713 .sr(1)
45714 .m(1)
45715 .n(8)
45716 .k(8)
45717 .qmin(128)
45718 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45719 }
45720
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,qmax)45721 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
45722 TEST_REQUIRES_ARM_NEON_FMA;
45723 GemmMicrokernelTester()
45724 .mr(1)
45725 .nr(8)
45726 .kr(1)
45727 .sr(1)
45728 .m(1)
45729 .n(8)
45730 .k(8)
45731 .qmax(128)
45732 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45733 }
45734
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)45735 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
45736 TEST_REQUIRES_ARM_NEON_FMA;
45737 GemmMicrokernelTester()
45738 .mr(1)
45739 .nr(8)
45740 .kr(1)
45741 .sr(1)
45742 .m(1)
45743 .n(8)
45744 .k(8)
45745 .cm_stride(11)
45746 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
45747 }
45748 #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
45749
45750
45751 #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)45752 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
45753 TEST_REQUIRES_ARM_NEON_FMA;
45754 GemmMicrokernelTester()
45755 .mr(1)
45756 .nr(8)
45757 .kr(1)
45758 .sr(1)
45759 .m(1)
45760 .n(8)
45761 .k(8)
45762 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45763 }
45764
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)45765 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
45766 TEST_REQUIRES_ARM_NEON_FMA;
45767 GemmMicrokernelTester()
45768 .mr(1)
45769 .nr(8)
45770 .kr(1)
45771 .sr(1)
45772 .m(1)
45773 .n(8)
45774 .k(8)
45775 .cn_stride(11)
45776 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45777 }
45778
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)45779 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
45780 TEST_REQUIRES_ARM_NEON_FMA;
45781 for (uint32_t n = 1; n <= 8; n++) {
45782 for (uint32_t m = 1; m <= 1; m++) {
45783 GemmMicrokernelTester()
45784 .mr(1)
45785 .nr(8)
45786 .kr(1)
45787 .sr(1)
45788 .m(m)
45789 .n(n)
45790 .k(8)
45791 .iterations(1)
45792 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45793 }
45794 }
45795 }
45796
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)45797 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
45798 TEST_REQUIRES_ARM_NEON_FMA;
45799 for (uint32_t m = 1; m <= 1; m++) {
45800 GemmMicrokernelTester()
45801 .mr(1)
45802 .nr(8)
45803 .kr(1)
45804 .sr(1)
45805 .m(m)
45806 .n(8)
45807 .k(8)
45808 .iterations(1)
45809 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45810 }
45811 }
45812
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)45813 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
45814 TEST_REQUIRES_ARM_NEON_FMA;
45815 for (uint32_t n = 1; n <= 8; n++) {
45816 GemmMicrokernelTester()
45817 .mr(1)
45818 .nr(8)
45819 .kr(1)
45820 .sr(1)
45821 .m(1)
45822 .n(n)
45823 .k(8)
45824 .iterations(1)
45825 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45826 }
45827 }
45828
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)45829 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
45830 TEST_REQUIRES_ARM_NEON_FMA;
45831 GemmMicrokernelTester()
45832 .mr(1)
45833 .nr(8)
45834 .kr(1)
45835 .sr(1)
45836 .m(1)
45837 .n(8)
45838 .k(16)
45839 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45840 }
45841
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)45842 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
45843 TEST_REQUIRES_ARM_NEON_FMA;
45844 for (uint32_t n = 1; n <= 8; n++) {
45845 for (uint32_t m = 1; m <= 1; m++) {
45846 GemmMicrokernelTester()
45847 .mr(1)
45848 .nr(8)
45849 .kr(1)
45850 .sr(1)
45851 .m(m)
45852 .n(n)
45853 .k(16)
45854 .iterations(1)
45855 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45856 }
45857 }
45858 }
45859
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)45860 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
45861 TEST_REQUIRES_ARM_NEON_FMA;
45862 for (size_t k = 1; k < 16; k++) {
45863 GemmMicrokernelTester()
45864 .mr(1)
45865 .nr(8)
45866 .kr(1)
45867 .sr(1)
45868 .m(1)
45869 .n(8)
45870 .k(k)
45871 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45872 }
45873 }
45874
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)45875 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
45876 TEST_REQUIRES_ARM_NEON_FMA;
45877 for (size_t k = 1; k < 16; k++) {
45878 for (uint32_t n = 1; n <= 8; n++) {
45879 for (uint32_t m = 1; m <= 1; m++) {
45880 GemmMicrokernelTester()
45881 .mr(1)
45882 .nr(8)
45883 .kr(1)
45884 .sr(1)
45885 .m(m)
45886 .n(n)
45887 .k(k)
45888 .iterations(1)
45889 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45890 }
45891 }
45892 }
45893 }
45894
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)45895 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
45896 TEST_REQUIRES_ARM_NEON_FMA;
45897 for (size_t k = 17; k < 32; k++) {
45898 GemmMicrokernelTester()
45899 .mr(1)
45900 .nr(8)
45901 .kr(1)
45902 .sr(1)
45903 .m(1)
45904 .n(8)
45905 .k(k)
45906 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45907 }
45908 }
45909
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)45910 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
45911 TEST_REQUIRES_ARM_NEON_FMA;
45912 for (size_t k = 17; k < 32; k++) {
45913 for (uint32_t n = 1; n <= 8; n++) {
45914 for (uint32_t m = 1; m <= 1; m++) {
45915 GemmMicrokernelTester()
45916 .mr(1)
45917 .nr(8)
45918 .kr(1)
45919 .sr(1)
45920 .m(m)
45921 .n(n)
45922 .k(k)
45923 .iterations(1)
45924 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45925 }
45926 }
45927 }
45928 }
45929
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)45930 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
45931 TEST_REQUIRES_ARM_NEON_FMA;
45932 for (size_t k = 24; k <= 80; k += 8) {
45933 GemmMicrokernelTester()
45934 .mr(1)
45935 .nr(8)
45936 .kr(1)
45937 .sr(1)
45938 .m(1)
45939 .n(8)
45940 .k(k)
45941 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45942 }
45943 }
45944
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)45945 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
45946 TEST_REQUIRES_ARM_NEON_FMA;
45947 for (size_t k = 24; k <= 80; k += 8) {
45948 for (uint32_t n = 1; n <= 8; n++) {
45949 for (uint32_t m = 1; m <= 1; m++) {
45950 GemmMicrokernelTester()
45951 .mr(1)
45952 .nr(8)
45953 .kr(1)
45954 .sr(1)
45955 .m(m)
45956 .n(n)
45957 .k(k)
45958 .iterations(1)
45959 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45960 }
45961 }
45962 }
45963 }
45964
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)45965 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
45966 TEST_REQUIRES_ARM_NEON_FMA;
45967 for (uint32_t n = 9; n < 16; n++) {
45968 for (size_t k = 1; k <= 40; k += 9) {
45969 GemmMicrokernelTester()
45970 .mr(1)
45971 .nr(8)
45972 .kr(1)
45973 .sr(1)
45974 .m(1)
45975 .n(n)
45976 .k(k)
45977 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45978 }
45979 }
45980 }
45981
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)45982 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
45983 TEST_REQUIRES_ARM_NEON_FMA;
45984 for (uint32_t n = 9; n < 16; n++) {
45985 for (size_t k = 1; k <= 40; k += 9) {
45986 GemmMicrokernelTester()
45987 .mr(1)
45988 .nr(8)
45989 .kr(1)
45990 .sr(1)
45991 .m(1)
45992 .n(n)
45993 .k(k)
45994 .cn_stride(11)
45995 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
45996 }
45997 }
45998 }
45999
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)46000 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
46001 TEST_REQUIRES_ARM_NEON_FMA;
46002 for (uint32_t n = 9; n < 16; n++) {
46003 for (size_t k = 1; k <= 40; k += 9) {
46004 for (uint32_t m = 1; m <= 1; m++) {
46005 GemmMicrokernelTester()
46006 .mr(1)
46007 .nr(8)
46008 .kr(1)
46009 .sr(1)
46010 .m(m)
46011 .n(n)
46012 .k(k)
46013 .iterations(1)
46014 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46015 }
46016 }
46017 }
46018 }
46019
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)46020 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
46021 TEST_REQUIRES_ARM_NEON_FMA;
46022 for (uint32_t n = 16; n <= 24; n += 8) {
46023 for (size_t k = 1; k <= 40; k += 9) {
46024 GemmMicrokernelTester()
46025 .mr(1)
46026 .nr(8)
46027 .kr(1)
46028 .sr(1)
46029 .m(1)
46030 .n(n)
46031 .k(k)
46032 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46033 }
46034 }
46035 }
46036
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)46037 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
46038 TEST_REQUIRES_ARM_NEON_FMA;
46039 for (uint32_t n = 16; n <= 24; n += 8) {
46040 for (size_t k = 1; k <= 40; k += 9) {
46041 GemmMicrokernelTester()
46042 .mr(1)
46043 .nr(8)
46044 .kr(1)
46045 .sr(1)
46046 .m(1)
46047 .n(n)
46048 .k(k)
46049 .cn_stride(11)
46050 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46051 }
46052 }
46053 }
46054
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)46055 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
46056 TEST_REQUIRES_ARM_NEON_FMA;
46057 for (uint32_t n = 16; n <= 24; n += 8) {
46058 for (size_t k = 1; k <= 40; k += 9) {
46059 for (uint32_t m = 1; m <= 1; m++) {
46060 GemmMicrokernelTester()
46061 .mr(1)
46062 .nr(8)
46063 .kr(1)
46064 .sr(1)
46065 .m(m)
46066 .n(n)
46067 .k(k)
46068 .iterations(1)
46069 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46070 }
46071 }
46072 }
46073 }
46074
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)46075 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
46076 TEST_REQUIRES_ARM_NEON_FMA;
46077 for (size_t k = 1; k <= 40; k += 9) {
46078 GemmMicrokernelTester()
46079 .mr(1)
46080 .nr(8)
46081 .kr(1)
46082 .sr(1)
46083 .m(1)
46084 .n(8)
46085 .k(k)
46086 .ks(3)
46087 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46088 }
46089 }
46090
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)46091 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
46092 TEST_REQUIRES_ARM_NEON_FMA;
46093 for (size_t k = 1; k <= 40; k += 9) {
46094 for (uint32_t n = 1; n <= 8; n++) {
46095 for (uint32_t m = 1; m <= 1; m++) {
46096 GemmMicrokernelTester()
46097 .mr(1)
46098 .nr(8)
46099 .kr(1)
46100 .sr(1)
46101 .m(m)
46102 .n(n)
46103 .k(k)
46104 .ks(3)
46105 .iterations(1)
46106 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46107 }
46108 }
46109 }
46110 }
46111
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_small_kernel)46112 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
46113 TEST_REQUIRES_ARM_NEON_FMA;
46114 for (uint32_t n = 9; n < 16; n++) {
46115 for (size_t k = 1; k <= 40; k += 9) {
46116 GemmMicrokernelTester()
46117 .mr(1)
46118 .nr(8)
46119 .kr(1)
46120 .sr(1)
46121 .m(1)
46122 .n(n)
46123 .k(k)
46124 .ks(3)
46125 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46126 }
46127 }
46128 }
46129
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_small_kernel)46130 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
46131 TEST_REQUIRES_ARM_NEON_FMA;
46132 for (uint32_t n = 16; n <= 24; n += 8) {
46133 for (size_t k = 1; k <= 40; k += 9) {
46134 GemmMicrokernelTester()
46135 .mr(1)
46136 .nr(8)
46137 .kr(1)
46138 .sr(1)
46139 .m(1)
46140 .n(n)
46141 .k(k)
46142 .ks(3)
46143 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46144 }
46145 }
46146 }
46147
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)46148 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
46149 TEST_REQUIRES_ARM_NEON_FMA;
46150 for (size_t k = 1; k <= 40; k += 9) {
46151 for (uint32_t n = 1; n <= 8; n++) {
46152 for (uint32_t m = 1; m <= 1; m++) {
46153 GemmMicrokernelTester()
46154 .mr(1)
46155 .nr(8)
46156 .kr(1)
46157 .sr(1)
46158 .m(m)
46159 .n(n)
46160 .k(k)
46161 .cm_stride(11)
46162 .iterations(1)
46163 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46164 }
46165 }
46166 }
46167 }
46168
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)46169 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
46170 TEST_REQUIRES_ARM_NEON_FMA;
46171 for (size_t k = 1; k <= 40; k += 9) {
46172 GemmMicrokernelTester()
46173 .mr(1)
46174 .nr(8)
46175 .kr(1)
46176 .sr(1)
46177 .m(1)
46178 .n(8)
46179 .k(k)
46180 .ks(3)
46181 .a_offset(43)
46182 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46183 }
46184 }
46185
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)46186 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
46187 TEST_REQUIRES_ARM_NEON_FMA;
46188 for (size_t k = 1; k <= 40; k += 9) {
46189 for (uint32_t mz = 0; mz < 1; mz++) {
46190 GemmMicrokernelTester()
46191 .mr(1)
46192 .nr(8)
46193 .kr(1)
46194 .sr(1)
46195 .m(1)
46196 .n(8)
46197 .k(k)
46198 .ks(3)
46199 .a_offset(43)
46200 .zero_index(mz)
46201 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46202 }
46203 }
46204 }
46205
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)46206 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
46207 TEST_REQUIRES_ARM_NEON_FMA;
46208 GemmMicrokernelTester()
46209 .mr(1)
46210 .nr(8)
46211 .kr(1)
46212 .sr(1)
46213 .m(1)
46214 .n(8)
46215 .k(8)
46216 .qmin(128)
46217 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46218 }
46219
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)46220 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
46221 TEST_REQUIRES_ARM_NEON_FMA;
46222 GemmMicrokernelTester()
46223 .mr(1)
46224 .nr(8)
46225 .kr(1)
46226 .sr(1)
46227 .m(1)
46228 .n(8)
46229 .k(8)
46230 .qmax(128)
46231 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46232 }
46233
TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)46234 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
46235 TEST_REQUIRES_ARM_NEON_FMA;
46236 GemmMicrokernelTester()
46237 .mr(1)
46238 .nr(8)
46239 .kr(1)
46240 .sr(1)
46241 .m(1)
46242 .n(8)
46243 .k(8)
46244 .cm_stride(11)
46245 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46246 }
46247 #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
46248
46249
46250 #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)46251 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
46252 TEST_REQUIRES_ARM_NEON_FMA;
46253 GemmMicrokernelTester()
46254 .mr(4)
46255 .nr(8)
46256 .kr(1)
46257 .sr(1)
46258 .m(4)
46259 .n(8)
46260 .k(8)
46261 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46262 }
46263
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)46264 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
46265 TEST_REQUIRES_ARM_NEON_FMA;
46266 GemmMicrokernelTester()
46267 .mr(4)
46268 .nr(8)
46269 .kr(1)
46270 .sr(1)
46271 .m(4)
46272 .n(8)
46273 .k(8)
46274 .cn_stride(11)
46275 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46276 }
46277
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)46278 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
46279 TEST_REQUIRES_ARM_NEON_FMA;
46280 for (uint32_t n = 1; n <= 8; n++) {
46281 for (uint32_t m = 1; m <= 4; m++) {
46282 GemmMicrokernelTester()
46283 .mr(4)
46284 .nr(8)
46285 .kr(1)
46286 .sr(1)
46287 .m(m)
46288 .n(n)
46289 .k(8)
46290 .iterations(1)
46291 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46292 }
46293 }
46294 }
46295
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)46296 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
46297 TEST_REQUIRES_ARM_NEON_FMA;
46298 for (uint32_t m = 1; m <= 4; m++) {
46299 GemmMicrokernelTester()
46300 .mr(4)
46301 .nr(8)
46302 .kr(1)
46303 .sr(1)
46304 .m(m)
46305 .n(8)
46306 .k(8)
46307 .iterations(1)
46308 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46309 }
46310 }
46311
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)46312 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
46313 TEST_REQUIRES_ARM_NEON_FMA;
46314 for (uint32_t n = 1; n <= 8; n++) {
46315 GemmMicrokernelTester()
46316 .mr(4)
46317 .nr(8)
46318 .kr(1)
46319 .sr(1)
46320 .m(4)
46321 .n(n)
46322 .k(8)
46323 .iterations(1)
46324 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46325 }
46326 }
46327
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)46328 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
46329 TEST_REQUIRES_ARM_NEON_FMA;
46330 GemmMicrokernelTester()
46331 .mr(4)
46332 .nr(8)
46333 .kr(1)
46334 .sr(1)
46335 .m(4)
46336 .n(8)
46337 .k(16)
46338 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46339 }
46340
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)46341 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
46342 TEST_REQUIRES_ARM_NEON_FMA;
46343 for (uint32_t n = 1; n <= 8; n++) {
46344 for (uint32_t m = 1; m <= 4; m++) {
46345 GemmMicrokernelTester()
46346 .mr(4)
46347 .nr(8)
46348 .kr(1)
46349 .sr(1)
46350 .m(m)
46351 .n(n)
46352 .k(16)
46353 .iterations(1)
46354 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46355 }
46356 }
46357 }
46358
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)46359 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
46360 TEST_REQUIRES_ARM_NEON_FMA;
46361 for (size_t k = 1; k < 16; k++) {
46362 GemmMicrokernelTester()
46363 .mr(4)
46364 .nr(8)
46365 .kr(1)
46366 .sr(1)
46367 .m(4)
46368 .n(8)
46369 .k(k)
46370 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46371 }
46372 }
46373
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)46374 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
46375 TEST_REQUIRES_ARM_NEON_FMA;
46376 for (size_t k = 1; k < 16; k++) {
46377 for (uint32_t n = 1; n <= 8; n++) {
46378 for (uint32_t m = 1; m <= 4; m++) {
46379 GemmMicrokernelTester()
46380 .mr(4)
46381 .nr(8)
46382 .kr(1)
46383 .sr(1)
46384 .m(m)
46385 .n(n)
46386 .k(k)
46387 .iterations(1)
46388 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46389 }
46390 }
46391 }
46392 }
46393
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)46394 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
46395 TEST_REQUIRES_ARM_NEON_FMA;
46396 for (size_t k = 17; k < 32; k++) {
46397 GemmMicrokernelTester()
46398 .mr(4)
46399 .nr(8)
46400 .kr(1)
46401 .sr(1)
46402 .m(4)
46403 .n(8)
46404 .k(k)
46405 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46406 }
46407 }
46408
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)46409 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
46410 TEST_REQUIRES_ARM_NEON_FMA;
46411 for (size_t k = 17; k < 32; k++) {
46412 for (uint32_t n = 1; n <= 8; n++) {
46413 for (uint32_t m = 1; m <= 4; m++) {
46414 GemmMicrokernelTester()
46415 .mr(4)
46416 .nr(8)
46417 .kr(1)
46418 .sr(1)
46419 .m(m)
46420 .n(n)
46421 .k(k)
46422 .iterations(1)
46423 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46424 }
46425 }
46426 }
46427 }
46428
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)46429 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
46430 TEST_REQUIRES_ARM_NEON_FMA;
46431 for (size_t k = 24; k <= 80; k += 8) {
46432 GemmMicrokernelTester()
46433 .mr(4)
46434 .nr(8)
46435 .kr(1)
46436 .sr(1)
46437 .m(4)
46438 .n(8)
46439 .k(k)
46440 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46441 }
46442 }
46443
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)46444 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
46445 TEST_REQUIRES_ARM_NEON_FMA;
46446 for (size_t k = 24; k <= 80; k += 8) {
46447 for (uint32_t n = 1; n <= 8; n++) {
46448 for (uint32_t m = 1; m <= 4; m++) {
46449 GemmMicrokernelTester()
46450 .mr(4)
46451 .nr(8)
46452 .kr(1)
46453 .sr(1)
46454 .m(m)
46455 .n(n)
46456 .k(k)
46457 .iterations(1)
46458 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46459 }
46460 }
46461 }
46462 }
46463
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)46464 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
46465 TEST_REQUIRES_ARM_NEON_FMA;
46466 for (uint32_t n = 9; n < 16; n++) {
46467 for (size_t k = 1; k <= 40; k += 9) {
46468 GemmMicrokernelTester()
46469 .mr(4)
46470 .nr(8)
46471 .kr(1)
46472 .sr(1)
46473 .m(4)
46474 .n(n)
46475 .k(k)
46476 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46477 }
46478 }
46479 }
46480
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)46481 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
46482 TEST_REQUIRES_ARM_NEON_FMA;
46483 for (uint32_t n = 9; n < 16; n++) {
46484 for (size_t k = 1; k <= 40; k += 9) {
46485 GemmMicrokernelTester()
46486 .mr(4)
46487 .nr(8)
46488 .kr(1)
46489 .sr(1)
46490 .m(4)
46491 .n(n)
46492 .k(k)
46493 .cn_stride(11)
46494 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46495 }
46496 }
46497 }
46498
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)46499 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
46500 TEST_REQUIRES_ARM_NEON_FMA;
46501 for (uint32_t n = 9; n < 16; n++) {
46502 for (size_t k = 1; k <= 40; k += 9) {
46503 for (uint32_t m = 1; m <= 4; m++) {
46504 GemmMicrokernelTester()
46505 .mr(4)
46506 .nr(8)
46507 .kr(1)
46508 .sr(1)
46509 .m(m)
46510 .n(n)
46511 .k(k)
46512 .iterations(1)
46513 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46514 }
46515 }
46516 }
46517 }
46518
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)46519 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
46520 TEST_REQUIRES_ARM_NEON_FMA;
46521 for (uint32_t n = 16; n <= 24; n += 8) {
46522 for (size_t k = 1; k <= 40; k += 9) {
46523 GemmMicrokernelTester()
46524 .mr(4)
46525 .nr(8)
46526 .kr(1)
46527 .sr(1)
46528 .m(4)
46529 .n(n)
46530 .k(k)
46531 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46532 }
46533 }
46534 }
46535
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)46536 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
46537 TEST_REQUIRES_ARM_NEON_FMA;
46538 for (uint32_t n = 16; n <= 24; n += 8) {
46539 for (size_t k = 1; k <= 40; k += 9) {
46540 GemmMicrokernelTester()
46541 .mr(4)
46542 .nr(8)
46543 .kr(1)
46544 .sr(1)
46545 .m(4)
46546 .n(n)
46547 .k(k)
46548 .cn_stride(11)
46549 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46550 }
46551 }
46552 }
46553
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)46554 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
46555 TEST_REQUIRES_ARM_NEON_FMA;
46556 for (uint32_t n = 16; n <= 24; n += 8) {
46557 for (size_t k = 1; k <= 40; k += 9) {
46558 for (uint32_t m = 1; m <= 4; m++) {
46559 GemmMicrokernelTester()
46560 .mr(4)
46561 .nr(8)
46562 .kr(1)
46563 .sr(1)
46564 .m(m)
46565 .n(n)
46566 .k(k)
46567 .iterations(1)
46568 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46569 }
46570 }
46571 }
46572 }
46573
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)46574 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
46575 TEST_REQUIRES_ARM_NEON_FMA;
46576 for (size_t k = 1; k <= 40; k += 9) {
46577 GemmMicrokernelTester()
46578 .mr(4)
46579 .nr(8)
46580 .kr(1)
46581 .sr(1)
46582 .m(4)
46583 .n(8)
46584 .k(k)
46585 .ks(3)
46586 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46587 }
46588 }
46589
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)46590 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
46591 TEST_REQUIRES_ARM_NEON_FMA;
46592 for (size_t k = 1; k <= 40; k += 9) {
46593 for (uint32_t n = 1; n <= 8; n++) {
46594 for (uint32_t m = 1; m <= 4; m++) {
46595 GemmMicrokernelTester()
46596 .mr(4)
46597 .nr(8)
46598 .kr(1)
46599 .sr(1)
46600 .m(m)
46601 .n(n)
46602 .k(k)
46603 .ks(3)
46604 .iterations(1)
46605 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46606 }
46607 }
46608 }
46609 }
46610
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_small_kernel)46611 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
46612 TEST_REQUIRES_ARM_NEON_FMA;
46613 for (uint32_t n = 9; n < 16; n++) {
46614 for (size_t k = 1; k <= 40; k += 9) {
46615 GemmMicrokernelTester()
46616 .mr(4)
46617 .nr(8)
46618 .kr(1)
46619 .sr(1)
46620 .m(4)
46621 .n(n)
46622 .k(k)
46623 .ks(3)
46624 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46625 }
46626 }
46627 }
46628
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_small_kernel)46629 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
46630 TEST_REQUIRES_ARM_NEON_FMA;
46631 for (uint32_t n = 16; n <= 24; n += 8) {
46632 for (size_t k = 1; k <= 40; k += 9) {
46633 GemmMicrokernelTester()
46634 .mr(4)
46635 .nr(8)
46636 .kr(1)
46637 .sr(1)
46638 .m(4)
46639 .n(n)
46640 .k(k)
46641 .ks(3)
46642 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46643 }
46644 }
46645 }
46646
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)46647 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
46648 TEST_REQUIRES_ARM_NEON_FMA;
46649 for (size_t k = 1; k <= 40; k += 9) {
46650 for (uint32_t n = 1; n <= 8; n++) {
46651 for (uint32_t m = 1; m <= 4; m++) {
46652 GemmMicrokernelTester()
46653 .mr(4)
46654 .nr(8)
46655 .kr(1)
46656 .sr(1)
46657 .m(m)
46658 .n(n)
46659 .k(k)
46660 .cm_stride(11)
46661 .iterations(1)
46662 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46663 }
46664 }
46665 }
46666 }
46667
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)46668 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
46669 TEST_REQUIRES_ARM_NEON_FMA;
46670 for (size_t k = 1; k <= 40; k += 9) {
46671 GemmMicrokernelTester()
46672 .mr(4)
46673 .nr(8)
46674 .kr(1)
46675 .sr(1)
46676 .m(4)
46677 .n(8)
46678 .k(k)
46679 .ks(3)
46680 .a_offset(163)
46681 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46682 }
46683 }
46684
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)46685 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
46686 TEST_REQUIRES_ARM_NEON_FMA;
46687 for (size_t k = 1; k <= 40; k += 9) {
46688 for (uint32_t mz = 0; mz < 4; mz++) {
46689 GemmMicrokernelTester()
46690 .mr(4)
46691 .nr(8)
46692 .kr(1)
46693 .sr(1)
46694 .m(4)
46695 .n(8)
46696 .k(k)
46697 .ks(3)
46698 .a_offset(163)
46699 .zero_index(mz)
46700 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46701 }
46702 }
46703 }
46704
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)46705 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
46706 TEST_REQUIRES_ARM_NEON_FMA;
46707 GemmMicrokernelTester()
46708 .mr(4)
46709 .nr(8)
46710 .kr(1)
46711 .sr(1)
46712 .m(4)
46713 .n(8)
46714 .k(8)
46715 .qmin(128)
46716 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46717 }
46718
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)46719 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
46720 TEST_REQUIRES_ARM_NEON_FMA;
46721 GemmMicrokernelTester()
46722 .mr(4)
46723 .nr(8)
46724 .kr(1)
46725 .sr(1)
46726 .m(4)
46727 .n(8)
46728 .k(8)
46729 .qmax(128)
46730 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46731 }
46732
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)46733 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
46734 TEST_REQUIRES_ARM_NEON_FMA;
46735 GemmMicrokernelTester()
46736 .mr(4)
46737 .nr(8)
46738 .kr(1)
46739 .sr(1)
46740 .m(4)
46741 .n(8)
46742 .k(8)
46743 .cm_stride(11)
46744 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
46745 }
46746 #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
46747
46748
46749 #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_eq_4)46750 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
46751 TEST_REQUIRES_ARM_NEON_FMA;
46752 GemmMicrokernelTester()
46753 .mr(6)
46754 .nr(8)
46755 .kr(1)
46756 .sr(1)
46757 .m(6)
46758 .n(8)
46759 .k(4)
46760 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46761 }
46762
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,strided_cn)46763 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, strided_cn) {
46764 TEST_REQUIRES_ARM_NEON_FMA;
46765 GemmMicrokernelTester()
46766 .mr(6)
46767 .nr(8)
46768 .kr(1)
46769 .sr(1)
46770 .m(6)
46771 .n(8)
46772 .k(4)
46773 .cn_stride(11)
46774 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46775 }
46776
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile)46777 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
46778 TEST_REQUIRES_ARM_NEON_FMA;
46779 for (uint32_t n = 1; n <= 8; n++) {
46780 for (uint32_t m = 1; m <= 6; m++) {
46781 GemmMicrokernelTester()
46782 .mr(6)
46783 .nr(8)
46784 .kr(1)
46785 .sr(1)
46786 .m(m)
46787 .n(n)
46788 .k(4)
46789 .iterations(1)
46790 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46791 }
46792 }
46793 }
46794
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_m)46795 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
46796 TEST_REQUIRES_ARM_NEON_FMA;
46797 for (uint32_t m = 1; m <= 6; m++) {
46798 GemmMicrokernelTester()
46799 .mr(6)
46800 .nr(8)
46801 .kr(1)
46802 .sr(1)
46803 .m(m)
46804 .n(8)
46805 .k(4)
46806 .iterations(1)
46807 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46808 }
46809 }
46810
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_n)46811 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
46812 TEST_REQUIRES_ARM_NEON_FMA;
46813 for (uint32_t n = 1; n <= 8; n++) {
46814 GemmMicrokernelTester()
46815 .mr(6)
46816 .nr(8)
46817 .kr(1)
46818 .sr(1)
46819 .m(6)
46820 .n(n)
46821 .k(4)
46822 .iterations(1)
46823 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46824 }
46825 }
46826
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_lt_4)46827 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_lt_4) {
46828 TEST_REQUIRES_ARM_NEON_FMA;
46829 for (size_t k = 1; k < 4; k++) {
46830 GemmMicrokernelTester()
46831 .mr(6)
46832 .nr(8)
46833 .kr(1)
46834 .sr(1)
46835 .m(6)
46836 .n(8)
46837 .k(k)
46838 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46839 }
46840 }
46841
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_lt_4_subtile)46842 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
46843 TEST_REQUIRES_ARM_NEON_FMA;
46844 for (size_t k = 1; k < 4; k++) {
46845 for (uint32_t n = 1; n <= 8; n++) {
46846 for (uint32_t m = 1; m <= 6; m++) {
46847 GemmMicrokernelTester()
46848 .mr(6)
46849 .nr(8)
46850 .kr(1)
46851 .sr(1)
46852 .m(m)
46853 .n(n)
46854 .k(k)
46855 .iterations(1)
46856 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46857 }
46858 }
46859 }
46860 }
46861
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_gt_4)46862 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_gt_4) {
46863 TEST_REQUIRES_ARM_NEON_FMA;
46864 for (size_t k = 5; k < 8; k++) {
46865 GemmMicrokernelTester()
46866 .mr(6)
46867 .nr(8)
46868 .kr(1)
46869 .sr(1)
46870 .m(6)
46871 .n(8)
46872 .k(k)
46873 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46874 }
46875 }
46876
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_gt_4_subtile)46877 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
46878 TEST_REQUIRES_ARM_NEON_FMA;
46879 for (size_t k = 5; k < 8; k++) {
46880 for (uint32_t n = 1; n <= 8; n++) {
46881 for (uint32_t m = 1; m <= 6; m++) {
46882 GemmMicrokernelTester()
46883 .mr(6)
46884 .nr(8)
46885 .kr(1)
46886 .sr(1)
46887 .m(m)
46888 .n(n)
46889 .k(k)
46890 .iterations(1)
46891 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46892 }
46893 }
46894 }
46895 }
46896
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_div_4)46897 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_div_4) {
46898 TEST_REQUIRES_ARM_NEON_FMA;
46899 for (size_t k = 8; k <= 40; k += 4) {
46900 GemmMicrokernelTester()
46901 .mr(6)
46902 .nr(8)
46903 .kr(1)
46904 .sr(1)
46905 .m(6)
46906 .n(8)
46907 .k(k)
46908 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46909 }
46910 }
46911
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,k_div_4_subtile)46912 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
46913 TEST_REQUIRES_ARM_NEON_FMA;
46914 for (size_t k = 8; k <= 40; k += 4) {
46915 for (uint32_t n = 1; n <= 8; n++) {
46916 for (uint32_t m = 1; m <= 6; m++) {
46917 GemmMicrokernelTester()
46918 .mr(6)
46919 .nr(8)
46920 .kr(1)
46921 .sr(1)
46922 .m(m)
46923 .n(n)
46924 .k(k)
46925 .iterations(1)
46926 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46927 }
46928 }
46929 }
46930 }
46931
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_gt_8)46932 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_gt_8) {
46933 TEST_REQUIRES_ARM_NEON_FMA;
46934 for (uint32_t n = 9; n < 16; n++) {
46935 for (size_t k = 1; k <= 20; k += 5) {
46936 GemmMicrokernelTester()
46937 .mr(6)
46938 .nr(8)
46939 .kr(1)
46940 .sr(1)
46941 .m(6)
46942 .n(n)
46943 .k(k)
46944 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46945 }
46946 }
46947 }
46948
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_gt_8_strided_cn)46949 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
46950 TEST_REQUIRES_ARM_NEON_FMA;
46951 for (uint32_t n = 9; n < 16; n++) {
46952 for (size_t k = 1; k <= 20; k += 5) {
46953 GemmMicrokernelTester()
46954 .mr(6)
46955 .nr(8)
46956 .kr(1)
46957 .sr(1)
46958 .m(6)
46959 .n(n)
46960 .k(k)
46961 .cn_stride(11)
46962 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46963 }
46964 }
46965 }
46966
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_gt_8_subtile)46967 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
46968 TEST_REQUIRES_ARM_NEON_FMA;
46969 for (uint32_t n = 9; n < 16; n++) {
46970 for (size_t k = 1; k <= 20; k += 5) {
46971 for (uint32_t m = 1; m <= 6; m++) {
46972 GemmMicrokernelTester()
46973 .mr(6)
46974 .nr(8)
46975 .kr(1)
46976 .sr(1)
46977 .m(m)
46978 .n(n)
46979 .k(k)
46980 .iterations(1)
46981 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
46982 }
46983 }
46984 }
46985 }
46986
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_div_8)46987 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_div_8) {
46988 TEST_REQUIRES_ARM_NEON_FMA;
46989 for (uint32_t n = 16; n <= 24; n += 8) {
46990 for (size_t k = 1; k <= 20; k += 5) {
46991 GemmMicrokernelTester()
46992 .mr(6)
46993 .nr(8)
46994 .kr(1)
46995 .sr(1)
46996 .m(6)
46997 .n(n)
46998 .k(k)
46999 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47000 }
47001 }
47002 }
47003
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_div_8_strided_cn)47004 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
47005 TEST_REQUIRES_ARM_NEON_FMA;
47006 for (uint32_t n = 16; n <= 24; n += 8) {
47007 for (size_t k = 1; k <= 20; k += 5) {
47008 GemmMicrokernelTester()
47009 .mr(6)
47010 .nr(8)
47011 .kr(1)
47012 .sr(1)
47013 .m(6)
47014 .n(n)
47015 .k(k)
47016 .cn_stride(11)
47017 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47018 }
47019 }
47020 }
47021
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_div_8_subtile)47022 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
47023 TEST_REQUIRES_ARM_NEON_FMA;
47024 for (uint32_t n = 16; n <= 24; n += 8) {
47025 for (size_t k = 1; k <= 20; k += 5) {
47026 for (uint32_t m = 1; m <= 6; m++) {
47027 GemmMicrokernelTester()
47028 .mr(6)
47029 .nr(8)
47030 .kr(1)
47031 .sr(1)
47032 .m(m)
47033 .n(n)
47034 .k(k)
47035 .iterations(1)
47036 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47037 }
47038 }
47039 }
47040 }
47041
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,small_kernel)47042 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, small_kernel) {
47043 TEST_REQUIRES_ARM_NEON_FMA;
47044 for (size_t k = 1; k <= 20; k += 5) {
47045 GemmMicrokernelTester()
47046 .mr(6)
47047 .nr(8)
47048 .kr(1)
47049 .sr(1)
47050 .m(6)
47051 .n(8)
47052 .k(k)
47053 .ks(3)
47054 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47055 }
47056 }
47057
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,small_kernel_subtile)47058 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, small_kernel_subtile) {
47059 TEST_REQUIRES_ARM_NEON_FMA;
47060 for (size_t k = 1; k <= 20; k += 5) {
47061 for (uint32_t n = 1; n <= 8; n++) {
47062 for (uint32_t m = 1; m <= 6; m++) {
47063 GemmMicrokernelTester()
47064 .mr(6)
47065 .nr(8)
47066 .kr(1)
47067 .sr(1)
47068 .m(m)
47069 .n(n)
47070 .k(k)
47071 .ks(3)
47072 .iterations(1)
47073 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47074 }
47075 }
47076 }
47077 }
47078
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_gt_8_small_kernel)47079 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_gt_8_small_kernel) {
47080 TEST_REQUIRES_ARM_NEON_FMA;
47081 for (uint32_t n = 9; n < 16; n++) {
47082 for (size_t k = 1; k <= 20; k += 5) {
47083 GemmMicrokernelTester()
47084 .mr(6)
47085 .nr(8)
47086 .kr(1)
47087 .sr(1)
47088 .m(6)
47089 .n(n)
47090 .k(k)
47091 .ks(3)
47092 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47093 }
47094 }
47095 }
47096
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,n_div_8_small_kernel)47097 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, n_div_8_small_kernel) {
47098 TEST_REQUIRES_ARM_NEON_FMA;
47099 for (uint32_t n = 16; n <= 24; n += 8) {
47100 for (size_t k = 1; k <= 20; k += 5) {
47101 GemmMicrokernelTester()
47102 .mr(6)
47103 .nr(8)
47104 .kr(1)
47105 .sr(1)
47106 .m(6)
47107 .n(n)
47108 .k(k)
47109 .ks(3)
47110 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47111 }
47112 }
47113 }
47114
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,strided_cm_subtile)47115 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
47116 TEST_REQUIRES_ARM_NEON_FMA;
47117 for (size_t k = 1; k <= 20; k += 5) {
47118 for (uint32_t n = 1; n <= 8; n++) {
47119 for (uint32_t m = 1; m <= 6; m++) {
47120 GemmMicrokernelTester()
47121 .mr(6)
47122 .nr(8)
47123 .kr(1)
47124 .sr(1)
47125 .m(m)
47126 .n(n)
47127 .k(k)
47128 .cm_stride(11)
47129 .iterations(1)
47130 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47131 }
47132 }
47133 }
47134 }
47135
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,a_offset)47136 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, a_offset) {
47137 TEST_REQUIRES_ARM_NEON_FMA;
47138 for (size_t k = 1; k <= 20; k += 5) {
47139 GemmMicrokernelTester()
47140 .mr(6)
47141 .nr(8)
47142 .kr(1)
47143 .sr(1)
47144 .m(6)
47145 .n(8)
47146 .k(k)
47147 .ks(3)
47148 .a_offset(127)
47149 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47150 }
47151 }
47152
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,zero)47153 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, zero) {
47154 TEST_REQUIRES_ARM_NEON_FMA;
47155 for (size_t k = 1; k <= 20; k += 5) {
47156 for (uint32_t mz = 0; mz < 6; mz++) {
47157 GemmMicrokernelTester()
47158 .mr(6)
47159 .nr(8)
47160 .kr(1)
47161 .sr(1)
47162 .m(6)
47163 .n(8)
47164 .k(k)
47165 .ks(3)
47166 .a_offset(127)
47167 .zero_index(mz)
47168 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47169 }
47170 }
47171 }
47172
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,qmin)47173 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, qmin) {
47174 TEST_REQUIRES_ARM_NEON_FMA;
47175 GemmMicrokernelTester()
47176 .mr(6)
47177 .nr(8)
47178 .kr(1)
47179 .sr(1)
47180 .m(6)
47181 .n(8)
47182 .k(4)
47183 .qmin(128)
47184 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47185 }
47186
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,qmax)47187 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, qmax) {
47188 TEST_REQUIRES_ARM_NEON_FMA;
47189 GemmMicrokernelTester()
47190 .mr(6)
47191 .nr(8)
47192 .kr(1)
47193 .sr(1)
47194 .m(6)
47195 .n(8)
47196 .k(4)
47197 .qmax(128)
47198 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47199 }
47200
TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128,strided_cm)47201 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_LD128, strided_cm) {
47202 TEST_REQUIRES_ARM_NEON_FMA;
47203 GemmMicrokernelTester()
47204 .mr(6)
47205 .nr(8)
47206 .kr(1)
47207 .sr(1)
47208 .m(6)
47209 .n(8)
47210 .k(4)
47211 .cm_stride(11)
47212 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
47213 }
47214 #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
47215