1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-gemminc-minmax.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)28 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
29 TEST_REQUIRES_ARM_NEON_FMA;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(1)
36 .n(8)
37 .k(8)
38 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39 }
40
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)41 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
42 TEST_REQUIRES_ARM_NEON_FMA;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(1)
49 .n(8)
50 .k(8)
51 .cn_stride(11)
52 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
53 }
54
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_strided_a)55 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_strided_a) {
56 TEST_REQUIRES_ARM_NEON_FMA;
57 GemmMicrokernelTester()
58 .mr(1)
59 .nr(8)
60 .kr(1)
61 .sr(1)
62 .m(1)
63 .n(8)
64 .k(8)
65 .a_stride(11)
66 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
67 }
68
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)69 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
70 TEST_REQUIRES_ARM_NEON_FMA;
71 for (uint32_t n = 1; n <= 8; n++) {
72 for (uint32_t m = 1; m <= 1; m++) {
73 GemmMicrokernelTester()
74 .mr(1)
75 .nr(8)
76 .kr(1)
77 .sr(1)
78 .m(m)
79 .n(n)
80 .k(8)
81 .iterations(1)
82 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
83 }
84 }
85 }
86
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)87 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
88 TEST_REQUIRES_ARM_NEON_FMA;
89 for (uint32_t m = 1; m <= 1; m++) {
90 GemmMicrokernelTester()
91 .mr(1)
92 .nr(8)
93 .kr(1)
94 .sr(1)
95 .m(m)
96 .n(8)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
100 }
101 }
102
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)103 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
104 TEST_REQUIRES_ARM_NEON_FMA;
105 for (uint32_t n = 1; n <= 8; n++) {
106 GemmMicrokernelTester()
107 .mr(1)
108 .nr(8)
109 .kr(1)
110 .sr(1)
111 .m(1)
112 .n(n)
113 .k(8)
114 .iterations(1)
115 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
116 }
117 }
118
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)119 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
120 TEST_REQUIRES_ARM_NEON_FMA;
121 GemmMicrokernelTester()
122 .mr(1)
123 .nr(8)
124 .kr(1)
125 .sr(1)
126 .m(1)
127 .n(8)
128 .k(16)
129 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
130 }
131
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_strided_a)132 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_strided_a) {
133 TEST_REQUIRES_ARM_NEON_FMA;
134 GemmMicrokernelTester()
135 .mr(1)
136 .nr(8)
137 .kr(1)
138 .sr(1)
139 .m(1)
140 .n(8)
141 .k(16)
142 .a_stride(19)
143 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
144 }
145
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)146 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
147 TEST_REQUIRES_ARM_NEON_FMA;
148 for (uint32_t n = 1; n <= 8; n++) {
149 for (uint32_t m = 1; m <= 1; m++) {
150 GemmMicrokernelTester()
151 .mr(1)
152 .nr(8)
153 .kr(1)
154 .sr(1)
155 .m(m)
156 .n(n)
157 .k(16)
158 .iterations(1)
159 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
160 }
161 }
162 }
163
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)164 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
165 TEST_REQUIRES_ARM_NEON_FMA;
166 for (size_t k = 1; k < 16; k++) {
167 GemmMicrokernelTester()
168 .mr(1)
169 .nr(8)
170 .kr(1)
171 .sr(1)
172 .m(1)
173 .n(8)
174 .k(k)
175 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
176 }
177 }
178
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_strided_a)179 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_strided_a) {
180 TEST_REQUIRES_ARM_NEON_FMA;
181 for (size_t k = 1; k < 16; k++) {
182 GemmMicrokernelTester()
183 .mr(1)
184 .nr(8)
185 .kr(1)
186 .sr(1)
187 .m(1)
188 .n(8)
189 .k(k)
190 .a_stride(19)
191 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
192 }
193 }
194
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)195 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
196 TEST_REQUIRES_ARM_NEON_FMA;
197 for (size_t k = 1; k < 16; k++) {
198 for (uint32_t n = 1; n <= 8; n++) {
199 for (uint32_t m = 1; m <= 1; m++) {
200 GemmMicrokernelTester()
201 .mr(1)
202 .nr(8)
203 .kr(1)
204 .sr(1)
205 .m(m)
206 .n(n)
207 .k(k)
208 .iterations(1)
209 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
210 }
211 }
212 }
213 }
214
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)215 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
216 TEST_REQUIRES_ARM_NEON_FMA;
217 for (size_t k = 17; k < 32; k++) {
218 GemmMicrokernelTester()
219 .mr(1)
220 .nr(8)
221 .kr(1)
222 .sr(1)
223 .m(1)
224 .n(8)
225 .k(k)
226 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
227 }
228 }
229
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_strided_a)230 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_strided_a) {
231 TEST_REQUIRES_ARM_NEON_FMA;
232 for (size_t k = 17; k < 32; k++) {
233 GemmMicrokernelTester()
234 .mr(1)
235 .nr(8)
236 .kr(1)
237 .sr(1)
238 .m(1)
239 .n(8)
240 .k(k)
241 .a_stride(37)
242 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
243 }
244 }
245
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)246 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
247 TEST_REQUIRES_ARM_NEON_FMA;
248 for (size_t k = 17; k < 32; k++) {
249 for (uint32_t n = 1; n <= 8; n++) {
250 for (uint32_t m = 1; m <= 1; m++) {
251 GemmMicrokernelTester()
252 .mr(1)
253 .nr(8)
254 .kr(1)
255 .sr(1)
256 .m(m)
257 .n(n)
258 .k(k)
259 .iterations(1)
260 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
261 }
262 }
263 }
264 }
265
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)266 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
267 TEST_REQUIRES_ARM_NEON_FMA;
268 for (size_t k = 24; k <= 80; k += 8) {
269 GemmMicrokernelTester()
270 .mr(1)
271 .nr(8)
272 .kr(1)
273 .sr(1)
274 .m(1)
275 .n(8)
276 .k(k)
277 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
278 }
279 }
280
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_strided_a)281 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_strided_a) {
282 TEST_REQUIRES_ARM_NEON_FMA;
283 for (size_t k = 24; k <= 80; k += 8) {
284 GemmMicrokernelTester()
285 .mr(1)
286 .nr(8)
287 .kr(1)
288 .sr(1)
289 .m(1)
290 .n(8)
291 .k(k)
292 .a_stride(83)
293 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
294 }
295 }
296
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)297 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
298 TEST_REQUIRES_ARM_NEON_FMA;
299 for (size_t k = 24; k <= 80; k += 8) {
300 for (uint32_t n = 1; n <= 8; n++) {
301 for (uint32_t m = 1; m <= 1; m++) {
302 GemmMicrokernelTester()
303 .mr(1)
304 .nr(8)
305 .kr(1)
306 .sr(1)
307 .m(m)
308 .n(n)
309 .k(k)
310 .iterations(1)
311 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
312 }
313 }
314 }
315 }
316
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)317 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
318 TEST_REQUIRES_ARM_NEON_FMA;
319 for (uint32_t n = 9; n < 16; n++) {
320 for (size_t k = 1; k <= 40; k += 9) {
321 GemmMicrokernelTester()
322 .mr(1)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(1)
327 .n(n)
328 .k(k)
329 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
330 }
331 }
332 }
333
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)334 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
335 TEST_REQUIRES_ARM_NEON_FMA;
336 for (uint32_t n = 9; n < 16; n++) {
337 for (size_t k = 1; k <= 40; k += 9) {
338 GemmMicrokernelTester()
339 .mr(1)
340 .nr(8)
341 .kr(1)
342 .sr(1)
343 .m(1)
344 .n(n)
345 .k(k)
346 .cn_stride(11)
347 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
348 }
349 }
350 }
351
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_a)352 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
353 TEST_REQUIRES_ARM_NEON_FMA;
354 for (uint32_t n = 9; n < 16; n++) {
355 for (size_t k = 1; k <= 40; k += 9) {
356 GemmMicrokernelTester()
357 .mr(1)
358 .nr(8)
359 .kr(1)
360 .sr(1)
361 .m(1)
362 .n(n)
363 .k(k)
364 .a_stride(43)
365 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
366 }
367 }
368 }
369
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)370 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
371 TEST_REQUIRES_ARM_NEON_FMA;
372 for (uint32_t n = 9; n < 16; n++) {
373 for (size_t k = 1; k <= 40; k += 9) {
374 for (uint32_t m = 1; m <= 1; m++) {
375 GemmMicrokernelTester()
376 .mr(1)
377 .nr(8)
378 .kr(1)
379 .sr(1)
380 .m(m)
381 .n(n)
382 .k(k)
383 .iterations(1)
384 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
385 }
386 }
387 }
388 }
389
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)390 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
391 TEST_REQUIRES_ARM_NEON_FMA;
392 for (uint32_t n = 16; n <= 24; n += 8) {
393 for (size_t k = 1; k <= 40; k += 9) {
394 GemmMicrokernelTester()
395 .mr(1)
396 .nr(8)
397 .kr(1)
398 .sr(1)
399 .m(1)
400 .n(n)
401 .k(k)
402 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
403 }
404 }
405 }
406
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)407 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
408 TEST_REQUIRES_ARM_NEON_FMA;
409 for (uint32_t n = 16; n <= 24; n += 8) {
410 for (size_t k = 1; k <= 40; k += 9) {
411 GemmMicrokernelTester()
412 .mr(1)
413 .nr(8)
414 .kr(1)
415 .sr(1)
416 .m(1)
417 .n(n)
418 .k(k)
419 .cn_stride(11)
420 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
421 }
422 }
423 }
424
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_a)425 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
426 TEST_REQUIRES_ARM_NEON_FMA;
427 for (uint32_t n = 16; n <= 24; n += 8) {
428 for (size_t k = 1; k <= 40; k += 9) {
429 GemmMicrokernelTester()
430 .mr(1)
431 .nr(8)
432 .kr(1)
433 .sr(1)
434 .m(1)
435 .n(n)
436 .k(k)
437 .a_stride(43)
438 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
439 }
440 }
441 }
442
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)443 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
444 TEST_REQUIRES_ARM_NEON_FMA;
445 for (uint32_t n = 16; n <= 24; n += 8) {
446 for (size_t k = 1; k <= 40; k += 9) {
447 for (uint32_t m = 1; m <= 1; m++) {
448 GemmMicrokernelTester()
449 .mr(1)
450 .nr(8)
451 .kr(1)
452 .sr(1)
453 .m(m)
454 .n(n)
455 .k(k)
456 .iterations(1)
457 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
458 }
459 }
460 }
461 }
462
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)463 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
464 TEST_REQUIRES_ARM_NEON_FMA;
465 for (size_t k = 1; k <= 40; k += 9) {
466 for (uint32_t n = 1; n <= 8; n++) {
467 for (uint32_t m = 1; m <= 1; m++) {
468 GemmMicrokernelTester()
469 .mr(1)
470 .nr(8)
471 .kr(1)
472 .sr(1)
473 .m(m)
474 .n(n)
475 .k(k)
476 .cm_stride(11)
477 .iterations(1)
478 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
479 }
480 }
481 }
482 }
483
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,qmin)484 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
485 TEST_REQUIRES_ARM_NEON_FMA;
486 GemmMicrokernelTester()
487 .mr(1)
488 .nr(8)
489 .kr(1)
490 .sr(1)
491 .m(1)
492 .n(8)
493 .k(8)
494 .qmin(128)
495 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
496 }
497
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,qmax)498 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
499 TEST_REQUIRES_ARM_NEON_FMA;
500 GemmMicrokernelTester()
501 .mr(1)
502 .nr(8)
503 .kr(1)
504 .sr(1)
505 .m(1)
506 .n(8)
507 .k(8)
508 .qmax(128)
509 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
510 }
511
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)512 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
513 TEST_REQUIRES_ARM_NEON_FMA;
514 GemmMicrokernelTester()
515 .mr(1)
516 .nr(8)
517 .kr(1)
518 .sr(1)
519 .m(1)
520 .n(8)
521 .k(8)
522 .cm_stride(11)
523 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
524 }
525 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
526
527
528 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_eq_2)529 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2) {
530 TEST_REQUIRES_ARM_NEON_FMA;
531 GemmMicrokernelTester()
532 .mr(1)
533 .nr(8)
534 .kr(1)
535 .sr(1)
536 .m(1)
537 .n(8)
538 .k(2)
539 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
540 }
541
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,strided_cn)542 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cn) {
543 TEST_REQUIRES_ARM_NEON_FMA;
544 GemmMicrokernelTester()
545 .mr(1)
546 .nr(8)
547 .kr(1)
548 .sr(1)
549 .m(1)
550 .n(8)
551 .k(2)
552 .cn_stride(11)
553 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
554 }
555
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_eq_2_strided_a)556 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
557 TEST_REQUIRES_ARM_NEON_FMA;
558 GemmMicrokernelTester()
559 .mr(1)
560 .nr(8)
561 .kr(1)
562 .sr(1)
563 .m(1)
564 .n(8)
565 .k(2)
566 .a_stride(5)
567 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
568 }
569
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile)570 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
571 TEST_REQUIRES_ARM_NEON_FMA;
572 for (uint32_t n = 1; n <= 8; n++) {
573 for (uint32_t m = 1; m <= 1; m++) {
574 GemmMicrokernelTester()
575 .mr(1)
576 .nr(8)
577 .kr(1)
578 .sr(1)
579 .m(m)
580 .n(n)
581 .k(2)
582 .iterations(1)
583 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
584 }
585 }
586 }
587
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_m)588 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
589 TEST_REQUIRES_ARM_NEON_FMA;
590 for (uint32_t m = 1; m <= 1; m++) {
591 GemmMicrokernelTester()
592 .mr(1)
593 .nr(8)
594 .kr(1)
595 .sr(1)
596 .m(m)
597 .n(8)
598 .k(2)
599 .iterations(1)
600 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
601 }
602 }
603
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_n)604 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
605 TEST_REQUIRES_ARM_NEON_FMA;
606 for (uint32_t n = 1; n <= 8; n++) {
607 GemmMicrokernelTester()
608 .mr(1)
609 .nr(8)
610 .kr(1)
611 .sr(1)
612 .m(1)
613 .n(n)
614 .k(2)
615 .iterations(1)
616 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
617 }
618 }
619
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_lt_2)620 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2) {
621 TEST_REQUIRES_ARM_NEON_FMA;
622 for (size_t k = 1; k < 2; k++) {
623 GemmMicrokernelTester()
624 .mr(1)
625 .nr(8)
626 .kr(1)
627 .sr(1)
628 .m(1)
629 .n(8)
630 .k(k)
631 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
632 }
633 }
634
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_lt_2_strided_a)635 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
636 TEST_REQUIRES_ARM_NEON_FMA;
637 for (size_t k = 1; k < 2; k++) {
638 GemmMicrokernelTester()
639 .mr(1)
640 .nr(8)
641 .kr(1)
642 .sr(1)
643 .m(1)
644 .n(8)
645 .k(k)
646 .a_stride(5)
647 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
648 }
649 }
650
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_lt_2_subtile)651 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
652 TEST_REQUIRES_ARM_NEON_FMA;
653 for (size_t k = 1; k < 2; k++) {
654 for (uint32_t n = 1; n <= 8; n++) {
655 for (uint32_t m = 1; m <= 1; m++) {
656 GemmMicrokernelTester()
657 .mr(1)
658 .nr(8)
659 .kr(1)
660 .sr(1)
661 .m(m)
662 .n(n)
663 .k(k)
664 .iterations(1)
665 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
666 }
667 }
668 }
669 }
670
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_gt_2)671 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2) {
672 TEST_REQUIRES_ARM_NEON_FMA;
673 for (size_t k = 3; k < 4; k++) {
674 GemmMicrokernelTester()
675 .mr(1)
676 .nr(8)
677 .kr(1)
678 .sr(1)
679 .m(1)
680 .n(8)
681 .k(k)
682 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
683 }
684 }
685
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_gt_2_strided_a)686 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
687 TEST_REQUIRES_ARM_NEON_FMA;
688 for (size_t k = 3; k < 4; k++) {
689 GemmMicrokernelTester()
690 .mr(1)
691 .nr(8)
692 .kr(1)
693 .sr(1)
694 .m(1)
695 .n(8)
696 .k(k)
697 .a_stride(7)
698 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
699 }
700 }
701
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_gt_2_subtile)702 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
703 TEST_REQUIRES_ARM_NEON_FMA;
704 for (size_t k = 3; k < 4; k++) {
705 for (uint32_t n = 1; n <= 8; n++) {
706 for (uint32_t m = 1; m <= 1; m++) {
707 GemmMicrokernelTester()
708 .mr(1)
709 .nr(8)
710 .kr(1)
711 .sr(1)
712 .m(m)
713 .n(n)
714 .k(k)
715 .iterations(1)
716 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
717 }
718 }
719 }
720 }
721
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_div_2)722 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2) {
723 TEST_REQUIRES_ARM_NEON_FMA;
724 for (size_t k = 4; k <= 20; k += 2) {
725 GemmMicrokernelTester()
726 .mr(1)
727 .nr(8)
728 .kr(1)
729 .sr(1)
730 .m(1)
731 .n(8)
732 .k(k)
733 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
734 }
735 }
736
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_div_2_strided_a)737 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
738 TEST_REQUIRES_ARM_NEON_FMA;
739 for (size_t k = 4; k <= 20; k += 2) {
740 GemmMicrokernelTester()
741 .mr(1)
742 .nr(8)
743 .kr(1)
744 .sr(1)
745 .m(1)
746 .n(8)
747 .k(k)
748 .a_stride(23)
749 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
750 }
751 }
752
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,k_div_2_subtile)753 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
754 TEST_REQUIRES_ARM_NEON_FMA;
755 for (size_t k = 4; k <= 20; k += 2) {
756 for (uint32_t n = 1; n <= 8; n++) {
757 for (uint32_t m = 1; m <= 1; m++) {
758 GemmMicrokernelTester()
759 .mr(1)
760 .nr(8)
761 .kr(1)
762 .sr(1)
763 .m(m)
764 .n(n)
765 .k(k)
766 .iterations(1)
767 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
768 }
769 }
770 }
771 }
772
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_gt_8)773 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8) {
774 TEST_REQUIRES_ARM_NEON_FMA;
775 for (uint32_t n = 9; n < 16; n++) {
776 for (size_t k = 1; k <= 10; k += 3) {
777 GemmMicrokernelTester()
778 .mr(1)
779 .nr(8)
780 .kr(1)
781 .sr(1)
782 .m(1)
783 .n(n)
784 .k(k)
785 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
786 }
787 }
788 }
789
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_cn)790 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
791 TEST_REQUIRES_ARM_NEON_FMA;
792 for (uint32_t n = 9; n < 16; n++) {
793 for (size_t k = 1; k <= 10; k += 3) {
794 GemmMicrokernelTester()
795 .mr(1)
796 .nr(8)
797 .kr(1)
798 .sr(1)
799 .m(1)
800 .n(n)
801 .k(k)
802 .cn_stride(11)
803 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
804 }
805 }
806 }
807
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_a)808 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
809 TEST_REQUIRES_ARM_NEON_FMA;
810 for (uint32_t n = 9; n < 16; n++) {
811 for (size_t k = 1; k <= 10; k += 3) {
812 GemmMicrokernelTester()
813 .mr(1)
814 .nr(8)
815 .kr(1)
816 .sr(1)
817 .m(1)
818 .n(n)
819 .k(k)
820 .a_stride(13)
821 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
822 }
823 }
824 }
825
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_gt_8_subtile)826 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
827 TEST_REQUIRES_ARM_NEON_FMA;
828 for (uint32_t n = 9; n < 16; n++) {
829 for (size_t k = 1; k <= 10; k += 3) {
830 for (uint32_t m = 1; m <= 1; m++) {
831 GemmMicrokernelTester()
832 .mr(1)
833 .nr(8)
834 .kr(1)
835 .sr(1)
836 .m(m)
837 .n(n)
838 .k(k)
839 .iterations(1)
840 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
841 }
842 }
843 }
844 }
845
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_div_8)846 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8) {
847 TEST_REQUIRES_ARM_NEON_FMA;
848 for (uint32_t n = 16; n <= 24; n += 8) {
849 for (size_t k = 1; k <= 10; k += 3) {
850 GemmMicrokernelTester()
851 .mr(1)
852 .nr(8)
853 .kr(1)
854 .sr(1)
855 .m(1)
856 .n(n)
857 .k(k)
858 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
859 }
860 }
861 }
862
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_div_8_strided_cn)863 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
864 TEST_REQUIRES_ARM_NEON_FMA;
865 for (uint32_t n = 16; n <= 24; n += 8) {
866 for (size_t k = 1; k <= 10; k += 3) {
867 GemmMicrokernelTester()
868 .mr(1)
869 .nr(8)
870 .kr(1)
871 .sr(1)
872 .m(1)
873 .n(n)
874 .k(k)
875 .cn_stride(11)
876 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
877 }
878 }
879 }
880
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_div_8_strided_a)881 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
882 TEST_REQUIRES_ARM_NEON_FMA;
883 for (uint32_t n = 16; n <= 24; n += 8) {
884 for (size_t k = 1; k <= 10; k += 3) {
885 GemmMicrokernelTester()
886 .mr(1)
887 .nr(8)
888 .kr(1)
889 .sr(1)
890 .m(1)
891 .n(n)
892 .k(k)
893 .a_stride(13)
894 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
895 }
896 }
897 }
898
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,n_div_8_subtile)899 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
900 TEST_REQUIRES_ARM_NEON_FMA;
901 for (uint32_t n = 16; n <= 24; n += 8) {
902 for (size_t k = 1; k <= 10; k += 3) {
903 for (uint32_t m = 1; m <= 1; m++) {
904 GemmMicrokernelTester()
905 .mr(1)
906 .nr(8)
907 .kr(1)
908 .sr(1)
909 .m(m)
910 .n(n)
911 .k(k)
912 .iterations(1)
913 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
914 }
915 }
916 }
917 }
918
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,strided_cm_subtile)919 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
920 TEST_REQUIRES_ARM_NEON_FMA;
921 for (size_t k = 1; k <= 10; k += 3) {
922 for (uint32_t n = 1; n <= 8; n++) {
923 for (uint32_t m = 1; m <= 1; m++) {
924 GemmMicrokernelTester()
925 .mr(1)
926 .nr(8)
927 .kr(1)
928 .sr(1)
929 .m(m)
930 .n(n)
931 .k(k)
932 .cm_stride(11)
933 .iterations(1)
934 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
935 }
936 }
937 }
938 }
939
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,qmin)940 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, qmin) {
941 TEST_REQUIRES_ARM_NEON_FMA;
942 GemmMicrokernelTester()
943 .mr(1)
944 .nr(8)
945 .kr(1)
946 .sr(1)
947 .m(1)
948 .n(8)
949 .k(2)
950 .qmin(128)
951 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
952 }
953
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,qmax)954 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, qmax) {
955 TEST_REQUIRES_ARM_NEON_FMA;
956 GemmMicrokernelTester()
957 .mr(1)
958 .nr(8)
959 .kr(1)
960 .sr(1)
961 .m(1)
962 .n(8)
963 .k(2)
964 .qmax(128)
965 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
966 }
967
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64,strided_cm)968 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cm) {
969 TEST_REQUIRES_ARM_NEON_FMA;
970 GemmMicrokernelTester()
971 .mr(1)
972 .nr(8)
973 .kr(1)
974 .sr(1)
975 .m(1)
976 .n(8)
977 .k(2)
978 .cm_stride(11)
979 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
980 }
981 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
982
983
984 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)985 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
986 TEST_REQUIRES_ARM_NEON_FMA;
987 GemmMicrokernelTester()
988 .mr(4)
989 .nr(8)
990 .kr(1)
991 .sr(1)
992 .m(4)
993 .n(8)
994 .k(4)
995 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
996 }
997
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,strided_cn)998 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
999 TEST_REQUIRES_ARM_NEON_FMA;
1000 GemmMicrokernelTester()
1001 .mr(4)
1002 .nr(8)
1003 .kr(1)
1004 .sr(1)
1005 .m(4)
1006 .n(8)
1007 .k(4)
1008 .cn_stride(11)
1009 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1010 }
1011
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_strided_a)1012 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
1013 TEST_REQUIRES_ARM_NEON_FMA;
1014 GemmMicrokernelTester()
1015 .mr(4)
1016 .nr(8)
1017 .kr(1)
1018 .sr(1)
1019 .m(4)
1020 .n(8)
1021 .k(4)
1022 .a_stride(7)
1023 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1024 }
1025
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)1026 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
1027 TEST_REQUIRES_ARM_NEON_FMA;
1028 for (uint32_t n = 1; n <= 8; n++) {
1029 for (uint32_t m = 1; m <= 4; m++) {
1030 GemmMicrokernelTester()
1031 .mr(4)
1032 .nr(8)
1033 .kr(1)
1034 .sr(1)
1035 .m(m)
1036 .n(n)
1037 .k(4)
1038 .iterations(1)
1039 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1040 }
1041 }
1042 }
1043
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)1044 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
1045 TEST_REQUIRES_ARM_NEON_FMA;
1046 for (uint32_t m = 1; m <= 4; m++) {
1047 GemmMicrokernelTester()
1048 .mr(4)
1049 .nr(8)
1050 .kr(1)
1051 .sr(1)
1052 .m(m)
1053 .n(8)
1054 .k(4)
1055 .iterations(1)
1056 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1057 }
1058 }
1059
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)1060 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
1061 TEST_REQUIRES_ARM_NEON_FMA;
1062 for (uint32_t n = 1; n <= 8; n++) {
1063 GemmMicrokernelTester()
1064 .mr(4)
1065 .nr(8)
1066 .kr(1)
1067 .sr(1)
1068 .m(4)
1069 .n(n)
1070 .k(4)
1071 .iterations(1)
1072 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1073 }
1074 }
1075
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)1076 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
1077 TEST_REQUIRES_ARM_NEON_FMA;
1078 GemmMicrokernelTester()
1079 .mr(4)
1080 .nr(8)
1081 .kr(1)
1082 .sr(1)
1083 .m(4)
1084 .n(8)
1085 .k(8)
1086 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1087 }
1088
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_strided_a)1089 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
1090 TEST_REQUIRES_ARM_NEON_FMA;
1091 GemmMicrokernelTester()
1092 .mr(4)
1093 .nr(8)
1094 .kr(1)
1095 .sr(1)
1096 .m(4)
1097 .n(8)
1098 .k(8)
1099 .a_stride(11)
1100 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1101 }
1102
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)1103 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
1104 TEST_REQUIRES_ARM_NEON_FMA;
1105 for (uint32_t n = 1; n <= 8; n++) {
1106 for (uint32_t m = 1; m <= 4; m++) {
1107 GemmMicrokernelTester()
1108 .mr(4)
1109 .nr(8)
1110 .kr(1)
1111 .sr(1)
1112 .m(m)
1113 .n(n)
1114 .k(8)
1115 .iterations(1)
1116 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1117 }
1118 }
1119 }
1120
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)1121 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
1122 TEST_REQUIRES_ARM_NEON_FMA;
1123 for (size_t k = 1; k < 8; k++) {
1124 GemmMicrokernelTester()
1125 .mr(4)
1126 .nr(8)
1127 .kr(1)
1128 .sr(1)
1129 .m(4)
1130 .n(8)
1131 .k(k)
1132 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1133 }
1134 }
1135
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_strided_a)1136 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
1137 TEST_REQUIRES_ARM_NEON_FMA;
1138 for (size_t k = 1; k < 8; k++) {
1139 GemmMicrokernelTester()
1140 .mr(4)
1141 .nr(8)
1142 .kr(1)
1143 .sr(1)
1144 .m(4)
1145 .n(8)
1146 .k(k)
1147 .a_stride(11)
1148 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1149 }
1150 }
1151
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)1152 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
1153 TEST_REQUIRES_ARM_NEON_FMA;
1154 for (size_t k = 1; k < 8; k++) {
1155 for (uint32_t n = 1; n <= 8; n++) {
1156 for (uint32_t m = 1; m <= 4; m++) {
1157 GemmMicrokernelTester()
1158 .mr(4)
1159 .nr(8)
1160 .kr(1)
1161 .sr(1)
1162 .m(m)
1163 .n(n)
1164 .k(k)
1165 .iterations(1)
1166 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1167 }
1168 }
1169 }
1170 }
1171
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)1172 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
1173 TEST_REQUIRES_ARM_NEON_FMA;
1174 for (size_t k = 9; k < 16; k++) {
1175 GemmMicrokernelTester()
1176 .mr(4)
1177 .nr(8)
1178 .kr(1)
1179 .sr(1)
1180 .m(4)
1181 .n(8)
1182 .k(k)
1183 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1184 }
1185 }
1186
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_strided_a)1187 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
1188 TEST_REQUIRES_ARM_NEON_FMA;
1189 for (size_t k = 9; k < 16; k++) {
1190 GemmMicrokernelTester()
1191 .mr(4)
1192 .nr(8)
1193 .kr(1)
1194 .sr(1)
1195 .m(4)
1196 .n(8)
1197 .k(k)
1198 .a_stride(19)
1199 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1200 }
1201 }
1202
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)1203 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
1204 TEST_REQUIRES_ARM_NEON_FMA;
1205 for (size_t k = 9; k < 16; k++) {
1206 for (uint32_t n = 1; n <= 8; n++) {
1207 for (uint32_t m = 1; m <= 4; m++) {
1208 GemmMicrokernelTester()
1209 .mr(4)
1210 .nr(8)
1211 .kr(1)
1212 .sr(1)
1213 .m(m)
1214 .n(n)
1215 .k(k)
1216 .iterations(1)
1217 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1218 }
1219 }
1220 }
1221 }
1222
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4)1223 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
1224 TEST_REQUIRES_ARM_NEON_FMA;
1225 for (size_t k = 12; k <= 40; k += 4) {
1226 GemmMicrokernelTester()
1227 .mr(4)
1228 .nr(8)
1229 .kr(1)
1230 .sr(1)
1231 .m(4)
1232 .n(8)
1233 .k(k)
1234 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1235 }
1236 }
1237
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4_strided_a)1238 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
1239 TEST_REQUIRES_ARM_NEON_FMA;
1240 for (size_t k = 12; k <= 40; k += 4) {
1241 GemmMicrokernelTester()
1242 .mr(4)
1243 .nr(8)
1244 .kr(1)
1245 .sr(1)
1246 .m(4)
1247 .n(8)
1248 .k(k)
1249 .a_stride(43)
1250 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1251 }
1252 }
1253
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)1254 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
1255 TEST_REQUIRES_ARM_NEON_FMA;
1256 for (size_t k = 12; k <= 40; k += 4) {
1257 for (uint32_t n = 1; n <= 8; n++) {
1258 for (uint32_t m = 1; m <= 4; m++) {
1259 GemmMicrokernelTester()
1260 .mr(4)
1261 .nr(8)
1262 .kr(1)
1263 .sr(1)
1264 .m(m)
1265 .n(n)
1266 .k(k)
1267 .iterations(1)
1268 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1269 }
1270 }
1271 }
1272 }
1273
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8)1274 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
1275 TEST_REQUIRES_ARM_NEON_FMA;
1276 for (uint32_t n = 9; n < 16; n++) {
1277 for (size_t k = 1; k <= 20; k += 5) {
1278 GemmMicrokernelTester()
1279 .mr(4)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(4)
1284 .n(n)
1285 .k(k)
1286 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1287 }
1288 }
1289 }
1290
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_cn)1291 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
1292 TEST_REQUIRES_ARM_NEON_FMA;
1293 for (uint32_t n = 9; n < 16; n++) {
1294 for (size_t k = 1; k <= 20; k += 5) {
1295 GemmMicrokernelTester()
1296 .mr(4)
1297 .nr(8)
1298 .kr(1)
1299 .sr(1)
1300 .m(4)
1301 .n(n)
1302 .k(k)
1303 .cn_stride(11)
1304 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1305 }
1306 }
1307 }
1308
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_a)1309 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_a) {
1310 TEST_REQUIRES_ARM_NEON_FMA;
1311 for (uint32_t n = 9; n < 16; n++) {
1312 for (size_t k = 1; k <= 20; k += 5) {
1313 GemmMicrokernelTester()
1314 .mr(4)
1315 .nr(8)
1316 .kr(1)
1317 .sr(1)
1318 .m(4)
1319 .n(n)
1320 .k(k)
1321 .a_stride(23)
1322 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1323 }
1324 }
1325 }
1326
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_subtile)1327 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
1328 TEST_REQUIRES_ARM_NEON_FMA;
1329 for (uint32_t n = 9; n < 16; n++) {
1330 for (size_t k = 1; k <= 20; k += 5) {
1331 for (uint32_t m = 1; m <= 4; m++) {
1332 GemmMicrokernelTester()
1333 .mr(4)
1334 .nr(8)
1335 .kr(1)
1336 .sr(1)
1337 .m(m)
1338 .n(n)
1339 .k(k)
1340 .iterations(1)
1341 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1342 }
1343 }
1344 }
1345 }
1346
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8)1347 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
1348 TEST_REQUIRES_ARM_NEON_FMA;
1349 for (uint32_t n = 16; n <= 24; n += 8) {
1350 for (size_t k = 1; k <= 20; k += 5) {
1351 GemmMicrokernelTester()
1352 .mr(4)
1353 .nr(8)
1354 .kr(1)
1355 .sr(1)
1356 .m(4)
1357 .n(n)
1358 .k(k)
1359 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1360 }
1361 }
1362 }
1363
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_cn)1364 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
1365 TEST_REQUIRES_ARM_NEON_FMA;
1366 for (uint32_t n = 16; n <= 24; n += 8) {
1367 for (size_t k = 1; k <= 20; k += 5) {
1368 GemmMicrokernelTester()
1369 .mr(4)
1370 .nr(8)
1371 .kr(1)
1372 .sr(1)
1373 .m(4)
1374 .n(n)
1375 .k(k)
1376 .cn_stride(11)
1377 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1378 }
1379 }
1380 }
1381
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_a)1382 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_a) {
1383 TEST_REQUIRES_ARM_NEON_FMA;
1384 for (uint32_t n = 16; n <= 24; n += 8) {
1385 for (size_t k = 1; k <= 20; k += 5) {
1386 GemmMicrokernelTester()
1387 .mr(4)
1388 .nr(8)
1389 .kr(1)
1390 .sr(1)
1391 .m(4)
1392 .n(n)
1393 .k(k)
1394 .a_stride(23)
1395 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1396 }
1397 }
1398 }
1399
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_subtile)1400 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
1401 TEST_REQUIRES_ARM_NEON_FMA;
1402 for (uint32_t n = 16; n <= 24; n += 8) {
1403 for (size_t k = 1; k <= 20; k += 5) {
1404 for (uint32_t m = 1; m <= 4; m++) {
1405 GemmMicrokernelTester()
1406 .mr(4)
1407 .nr(8)
1408 .kr(1)
1409 .sr(1)
1410 .m(m)
1411 .n(n)
1412 .k(k)
1413 .iterations(1)
1414 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1415 }
1416 }
1417 }
1418 }
1419
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)1420 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
1421 TEST_REQUIRES_ARM_NEON_FMA;
1422 for (size_t k = 1; k <= 20; k += 5) {
1423 for (uint32_t n = 1; n <= 8; n++) {
1424 for (uint32_t m = 1; m <= 4; m++) {
1425 GemmMicrokernelTester()
1426 .mr(4)
1427 .nr(8)
1428 .kr(1)
1429 .sr(1)
1430 .m(m)
1431 .n(n)
1432 .k(k)
1433 .cm_stride(11)
1434 .iterations(1)
1435 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1436 }
1437 }
1438 }
1439 }
1440
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,qmin)1441 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
1442 TEST_REQUIRES_ARM_NEON_FMA;
1443 GemmMicrokernelTester()
1444 .mr(4)
1445 .nr(8)
1446 .kr(1)
1447 .sr(1)
1448 .m(4)
1449 .n(8)
1450 .k(4)
1451 .qmin(128)
1452 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1453 }
1454
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,qmax)1455 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
1456 TEST_REQUIRES_ARM_NEON_FMA;
1457 GemmMicrokernelTester()
1458 .mr(4)
1459 .nr(8)
1460 .kr(1)
1461 .sr(1)
1462 .m(4)
1463 .n(8)
1464 .k(4)
1465 .qmax(128)
1466 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1467 }
1468
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm)1469 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
1470 TEST_REQUIRES_ARM_NEON_FMA;
1471 GemmMicrokernelTester()
1472 .mr(4)
1473 .nr(8)
1474 .kr(1)
1475 .sr(1)
1476 .m(4)
1477 .n(8)
1478 .k(4)
1479 .cm_stride(11)
1480 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1481 }
1482 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1483
1484
1485 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)1486 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
1487 TEST_REQUIRES_ARM_NEON_FMA;
1488 GemmMicrokernelTester()
1489 .mr(5)
1490 .nr(8)
1491 .kr(1)
1492 .sr(1)
1493 .m(5)
1494 .n(8)
1495 .k(8)
1496 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1497 }
1498
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)1499 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
1500 TEST_REQUIRES_ARM_NEON_FMA;
1501 GemmMicrokernelTester()
1502 .mr(5)
1503 .nr(8)
1504 .kr(1)
1505 .sr(1)
1506 .m(5)
1507 .n(8)
1508 .k(8)
1509 .cn_stride(11)
1510 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1511 }
1512
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_strided_a)1513 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
1514 TEST_REQUIRES_ARM_NEON_FMA;
1515 GemmMicrokernelTester()
1516 .mr(5)
1517 .nr(8)
1518 .kr(1)
1519 .sr(1)
1520 .m(5)
1521 .n(8)
1522 .k(8)
1523 .a_stride(11)
1524 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1525 }
1526
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)1527 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
1528 TEST_REQUIRES_ARM_NEON_FMA;
1529 for (uint32_t n = 1; n <= 8; n++) {
1530 for (uint32_t m = 1; m <= 5; m++) {
1531 GemmMicrokernelTester()
1532 .mr(5)
1533 .nr(8)
1534 .kr(1)
1535 .sr(1)
1536 .m(m)
1537 .n(n)
1538 .k(8)
1539 .iterations(1)
1540 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1541 }
1542 }
1543 }
1544
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)1545 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
1546 TEST_REQUIRES_ARM_NEON_FMA;
1547 for (uint32_t m = 1; m <= 5; m++) {
1548 GemmMicrokernelTester()
1549 .mr(5)
1550 .nr(8)
1551 .kr(1)
1552 .sr(1)
1553 .m(m)
1554 .n(8)
1555 .k(8)
1556 .iterations(1)
1557 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1558 }
1559 }
1560
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)1561 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
1562 TEST_REQUIRES_ARM_NEON_FMA;
1563 for (uint32_t n = 1; n <= 8; n++) {
1564 GemmMicrokernelTester()
1565 .mr(5)
1566 .nr(8)
1567 .kr(1)
1568 .sr(1)
1569 .m(5)
1570 .n(n)
1571 .k(8)
1572 .iterations(1)
1573 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1574 }
1575 }
1576
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)1577 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
1578 TEST_REQUIRES_ARM_NEON_FMA;
1579 GemmMicrokernelTester()
1580 .mr(5)
1581 .nr(8)
1582 .kr(1)
1583 .sr(1)
1584 .m(5)
1585 .n(8)
1586 .k(16)
1587 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1588 }
1589
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_strided_a)1590 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
1591 TEST_REQUIRES_ARM_NEON_FMA;
1592 GemmMicrokernelTester()
1593 .mr(5)
1594 .nr(8)
1595 .kr(1)
1596 .sr(1)
1597 .m(5)
1598 .n(8)
1599 .k(16)
1600 .a_stride(19)
1601 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1602 }
1603
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)1604 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
1605 TEST_REQUIRES_ARM_NEON_FMA;
1606 for (uint32_t n = 1; n <= 8; n++) {
1607 for (uint32_t m = 1; m <= 5; m++) {
1608 GemmMicrokernelTester()
1609 .mr(5)
1610 .nr(8)
1611 .kr(1)
1612 .sr(1)
1613 .m(m)
1614 .n(n)
1615 .k(16)
1616 .iterations(1)
1617 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1618 }
1619 }
1620 }
1621
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)1622 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
1623 TEST_REQUIRES_ARM_NEON_FMA;
1624 for (size_t k = 1; k < 16; k++) {
1625 GemmMicrokernelTester()
1626 .mr(5)
1627 .nr(8)
1628 .kr(1)
1629 .sr(1)
1630 .m(5)
1631 .n(8)
1632 .k(k)
1633 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1634 }
1635 }
1636
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_strided_a)1637 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
1638 TEST_REQUIRES_ARM_NEON_FMA;
1639 for (size_t k = 1; k < 16; k++) {
1640 GemmMicrokernelTester()
1641 .mr(5)
1642 .nr(8)
1643 .kr(1)
1644 .sr(1)
1645 .m(5)
1646 .n(8)
1647 .k(k)
1648 .a_stride(19)
1649 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1650 }
1651 }
1652
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)1653 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
1654 TEST_REQUIRES_ARM_NEON_FMA;
1655 for (size_t k = 1; k < 16; k++) {
1656 for (uint32_t n = 1; n <= 8; n++) {
1657 for (uint32_t m = 1; m <= 5; m++) {
1658 GemmMicrokernelTester()
1659 .mr(5)
1660 .nr(8)
1661 .kr(1)
1662 .sr(1)
1663 .m(m)
1664 .n(n)
1665 .k(k)
1666 .iterations(1)
1667 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1668 }
1669 }
1670 }
1671 }
1672
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)1673 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
1674 TEST_REQUIRES_ARM_NEON_FMA;
1675 for (size_t k = 17; k < 32; k++) {
1676 GemmMicrokernelTester()
1677 .mr(5)
1678 .nr(8)
1679 .kr(1)
1680 .sr(1)
1681 .m(5)
1682 .n(8)
1683 .k(k)
1684 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1685 }
1686 }
1687
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_strided_a)1688 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
1689 TEST_REQUIRES_ARM_NEON_FMA;
1690 for (size_t k = 17; k < 32; k++) {
1691 GemmMicrokernelTester()
1692 .mr(5)
1693 .nr(8)
1694 .kr(1)
1695 .sr(1)
1696 .m(5)
1697 .n(8)
1698 .k(k)
1699 .a_stride(37)
1700 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1701 }
1702 }
1703
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)1704 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
1705 TEST_REQUIRES_ARM_NEON_FMA;
1706 for (size_t k = 17; k < 32; k++) {
1707 for (uint32_t n = 1; n <= 8; n++) {
1708 for (uint32_t m = 1; m <= 5; m++) {
1709 GemmMicrokernelTester()
1710 .mr(5)
1711 .nr(8)
1712 .kr(1)
1713 .sr(1)
1714 .m(m)
1715 .n(n)
1716 .k(k)
1717 .iterations(1)
1718 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1719 }
1720 }
1721 }
1722 }
1723
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)1724 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
1725 TEST_REQUIRES_ARM_NEON_FMA;
1726 for (size_t k = 24; k <= 80; k += 8) {
1727 GemmMicrokernelTester()
1728 .mr(5)
1729 .nr(8)
1730 .kr(1)
1731 .sr(1)
1732 .m(5)
1733 .n(8)
1734 .k(k)
1735 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1736 }
1737 }
1738
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_strided_a)1739 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
1740 TEST_REQUIRES_ARM_NEON_FMA;
1741 for (size_t k = 24; k <= 80; k += 8) {
1742 GemmMicrokernelTester()
1743 .mr(5)
1744 .nr(8)
1745 .kr(1)
1746 .sr(1)
1747 .m(5)
1748 .n(8)
1749 .k(k)
1750 .a_stride(83)
1751 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1752 }
1753 }
1754
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)1755 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
1756 TEST_REQUIRES_ARM_NEON_FMA;
1757 for (size_t k = 24; k <= 80; k += 8) {
1758 for (uint32_t n = 1; n <= 8; n++) {
1759 for (uint32_t m = 1; m <= 5; m++) {
1760 GemmMicrokernelTester()
1761 .mr(5)
1762 .nr(8)
1763 .kr(1)
1764 .sr(1)
1765 .m(m)
1766 .n(n)
1767 .k(k)
1768 .iterations(1)
1769 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1770 }
1771 }
1772 }
1773 }
1774
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)1775 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
1776 TEST_REQUIRES_ARM_NEON_FMA;
1777 for (uint32_t n = 9; n < 16; n++) {
1778 for (size_t k = 1; k <= 40; k += 9) {
1779 GemmMicrokernelTester()
1780 .mr(5)
1781 .nr(8)
1782 .kr(1)
1783 .sr(1)
1784 .m(5)
1785 .n(n)
1786 .k(k)
1787 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1788 }
1789 }
1790 }
1791
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)1792 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
1793 TEST_REQUIRES_ARM_NEON_FMA;
1794 for (uint32_t n = 9; n < 16; n++) {
1795 for (size_t k = 1; k <= 40; k += 9) {
1796 GemmMicrokernelTester()
1797 .mr(5)
1798 .nr(8)
1799 .kr(1)
1800 .sr(1)
1801 .m(5)
1802 .n(n)
1803 .k(k)
1804 .cn_stride(11)
1805 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1806 }
1807 }
1808 }
1809
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_a)1810 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
1811 TEST_REQUIRES_ARM_NEON_FMA;
1812 for (uint32_t n = 9; n < 16; n++) {
1813 for (size_t k = 1; k <= 40; k += 9) {
1814 GemmMicrokernelTester()
1815 .mr(5)
1816 .nr(8)
1817 .kr(1)
1818 .sr(1)
1819 .m(5)
1820 .n(n)
1821 .k(k)
1822 .a_stride(43)
1823 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1824 }
1825 }
1826 }
1827
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)1828 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
1829 TEST_REQUIRES_ARM_NEON_FMA;
1830 for (uint32_t n = 9; n < 16; n++) {
1831 for (size_t k = 1; k <= 40; k += 9) {
1832 for (uint32_t m = 1; m <= 5; m++) {
1833 GemmMicrokernelTester()
1834 .mr(5)
1835 .nr(8)
1836 .kr(1)
1837 .sr(1)
1838 .m(m)
1839 .n(n)
1840 .k(k)
1841 .iterations(1)
1842 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1843 }
1844 }
1845 }
1846 }
1847
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)1848 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
1849 TEST_REQUIRES_ARM_NEON_FMA;
1850 for (uint32_t n = 16; n <= 24; n += 8) {
1851 for (size_t k = 1; k <= 40; k += 9) {
1852 GemmMicrokernelTester()
1853 .mr(5)
1854 .nr(8)
1855 .kr(1)
1856 .sr(1)
1857 .m(5)
1858 .n(n)
1859 .k(k)
1860 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1861 }
1862 }
1863 }
1864
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)1865 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
1866 TEST_REQUIRES_ARM_NEON_FMA;
1867 for (uint32_t n = 16; n <= 24; n += 8) {
1868 for (size_t k = 1; k <= 40; k += 9) {
1869 GemmMicrokernelTester()
1870 .mr(5)
1871 .nr(8)
1872 .kr(1)
1873 .sr(1)
1874 .m(5)
1875 .n(n)
1876 .k(k)
1877 .cn_stride(11)
1878 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1879 }
1880 }
1881 }
1882
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_a)1883 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
1884 TEST_REQUIRES_ARM_NEON_FMA;
1885 for (uint32_t n = 16; n <= 24; n += 8) {
1886 for (size_t k = 1; k <= 40; k += 9) {
1887 GemmMicrokernelTester()
1888 .mr(5)
1889 .nr(8)
1890 .kr(1)
1891 .sr(1)
1892 .m(5)
1893 .n(n)
1894 .k(k)
1895 .a_stride(43)
1896 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1897 }
1898 }
1899 }
1900
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)1901 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
1902 TEST_REQUIRES_ARM_NEON_FMA;
1903 for (uint32_t n = 16; n <= 24; n += 8) {
1904 for (size_t k = 1; k <= 40; k += 9) {
1905 for (uint32_t m = 1; m <= 5; m++) {
1906 GemmMicrokernelTester()
1907 .mr(5)
1908 .nr(8)
1909 .kr(1)
1910 .sr(1)
1911 .m(m)
1912 .n(n)
1913 .k(k)
1914 .iterations(1)
1915 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1916 }
1917 }
1918 }
1919 }
1920
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)1921 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
1922 TEST_REQUIRES_ARM_NEON_FMA;
1923 for (size_t k = 1; k <= 40; k += 9) {
1924 for (uint32_t n = 1; n <= 8; n++) {
1925 for (uint32_t m = 1; m <= 5; m++) {
1926 GemmMicrokernelTester()
1927 .mr(5)
1928 .nr(8)
1929 .kr(1)
1930 .sr(1)
1931 .m(m)
1932 .n(n)
1933 .k(k)
1934 .cm_stride(11)
1935 .iterations(1)
1936 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1937 }
1938 }
1939 }
1940 }
1941
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)1942 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
1943 TEST_REQUIRES_ARM_NEON_FMA;
1944 GemmMicrokernelTester()
1945 .mr(5)
1946 .nr(8)
1947 .kr(1)
1948 .sr(1)
1949 .m(5)
1950 .n(8)
1951 .k(8)
1952 .qmin(128)
1953 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1954 }
1955
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)1956 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
1957 TEST_REQUIRES_ARM_NEON_FMA;
1958 GemmMicrokernelTester()
1959 .mr(5)
1960 .nr(8)
1961 .kr(1)
1962 .sr(1)
1963 .m(5)
1964 .n(8)
1965 .k(8)
1966 .qmax(128)
1967 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1968 }
1969
TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)1970 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
1971 TEST_REQUIRES_ARM_NEON_FMA;
1972 GemmMicrokernelTester()
1973 .mr(5)
1974 .nr(8)
1975 .kr(1)
1976 .sr(1)
1977 .m(5)
1978 .n(8)
1979 .k(8)
1980 .cm_stride(11)
1981 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1982 }
1983 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1984
1985
1986 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)1987 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
1988 TEST_REQUIRES_ARM_NEON_FMA;
1989 GemmMicrokernelTester()
1990 .mr(6)
1991 .nr(8)
1992 .kr(1)
1993 .sr(1)
1994 .m(6)
1995 .n(8)
1996 .k(4)
1997 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1998 }
1999
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,strided_cn)2000 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
2001 TEST_REQUIRES_ARM_NEON_FMA;
2002 GemmMicrokernelTester()
2003 .mr(6)
2004 .nr(8)
2005 .kr(1)
2006 .sr(1)
2007 .m(6)
2008 .n(8)
2009 .k(4)
2010 .cn_stride(11)
2011 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2012 }
2013
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_strided_a)2014 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
2015 TEST_REQUIRES_ARM_NEON_FMA;
2016 GemmMicrokernelTester()
2017 .mr(6)
2018 .nr(8)
2019 .kr(1)
2020 .sr(1)
2021 .m(6)
2022 .n(8)
2023 .k(4)
2024 .a_stride(7)
2025 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2026 }
2027
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)2028 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
2029 TEST_REQUIRES_ARM_NEON_FMA;
2030 for (uint32_t n = 1; n <= 8; n++) {
2031 for (uint32_t m = 1; m <= 6; m++) {
2032 GemmMicrokernelTester()
2033 .mr(6)
2034 .nr(8)
2035 .kr(1)
2036 .sr(1)
2037 .m(m)
2038 .n(n)
2039 .k(4)
2040 .iterations(1)
2041 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2042 }
2043 }
2044 }
2045
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)2046 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
2047 TEST_REQUIRES_ARM_NEON_FMA;
2048 for (uint32_t m = 1; m <= 6; m++) {
2049 GemmMicrokernelTester()
2050 .mr(6)
2051 .nr(8)
2052 .kr(1)
2053 .sr(1)
2054 .m(m)
2055 .n(8)
2056 .k(4)
2057 .iterations(1)
2058 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2059 }
2060 }
2061
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)2062 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
2063 TEST_REQUIRES_ARM_NEON_FMA;
2064 for (uint32_t n = 1; n <= 8; n++) {
2065 GemmMicrokernelTester()
2066 .mr(6)
2067 .nr(8)
2068 .kr(1)
2069 .sr(1)
2070 .m(6)
2071 .n(n)
2072 .k(4)
2073 .iterations(1)
2074 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2075 }
2076 }
2077
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)2078 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
2079 TEST_REQUIRES_ARM_NEON_FMA;
2080 GemmMicrokernelTester()
2081 .mr(6)
2082 .nr(8)
2083 .kr(1)
2084 .sr(1)
2085 .m(6)
2086 .n(8)
2087 .k(8)
2088 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2089 }
2090
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_strided_a)2091 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
2092 TEST_REQUIRES_ARM_NEON_FMA;
2093 GemmMicrokernelTester()
2094 .mr(6)
2095 .nr(8)
2096 .kr(1)
2097 .sr(1)
2098 .m(6)
2099 .n(8)
2100 .k(8)
2101 .a_stride(11)
2102 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2103 }
2104
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)2105 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
2106 TEST_REQUIRES_ARM_NEON_FMA;
2107 for (uint32_t n = 1; n <= 8; n++) {
2108 for (uint32_t m = 1; m <= 6; m++) {
2109 GemmMicrokernelTester()
2110 .mr(6)
2111 .nr(8)
2112 .kr(1)
2113 .sr(1)
2114 .m(m)
2115 .n(n)
2116 .k(8)
2117 .iterations(1)
2118 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2119 }
2120 }
2121 }
2122
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)2123 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
2124 TEST_REQUIRES_ARM_NEON_FMA;
2125 for (size_t k = 1; k < 8; k++) {
2126 GemmMicrokernelTester()
2127 .mr(6)
2128 .nr(8)
2129 .kr(1)
2130 .sr(1)
2131 .m(6)
2132 .n(8)
2133 .k(k)
2134 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2135 }
2136 }
2137
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_strided_a)2138 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
2139 TEST_REQUIRES_ARM_NEON_FMA;
2140 for (size_t k = 1; k < 8; k++) {
2141 GemmMicrokernelTester()
2142 .mr(6)
2143 .nr(8)
2144 .kr(1)
2145 .sr(1)
2146 .m(6)
2147 .n(8)
2148 .k(k)
2149 .a_stride(11)
2150 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2151 }
2152 }
2153
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)2154 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
2155 TEST_REQUIRES_ARM_NEON_FMA;
2156 for (size_t k = 1; k < 8; k++) {
2157 for (uint32_t n = 1; n <= 8; n++) {
2158 for (uint32_t m = 1; m <= 6; m++) {
2159 GemmMicrokernelTester()
2160 .mr(6)
2161 .nr(8)
2162 .kr(1)
2163 .sr(1)
2164 .m(m)
2165 .n(n)
2166 .k(k)
2167 .iterations(1)
2168 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2169 }
2170 }
2171 }
2172 }
2173
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)2174 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
2175 TEST_REQUIRES_ARM_NEON_FMA;
2176 for (size_t k = 9; k < 16; k++) {
2177 GemmMicrokernelTester()
2178 .mr(6)
2179 .nr(8)
2180 .kr(1)
2181 .sr(1)
2182 .m(6)
2183 .n(8)
2184 .k(k)
2185 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2186 }
2187 }
2188
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_strided_a)2189 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
2190 TEST_REQUIRES_ARM_NEON_FMA;
2191 for (size_t k = 9; k < 16; k++) {
2192 GemmMicrokernelTester()
2193 .mr(6)
2194 .nr(8)
2195 .kr(1)
2196 .sr(1)
2197 .m(6)
2198 .n(8)
2199 .k(k)
2200 .a_stride(19)
2201 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2202 }
2203 }
2204
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)2205 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
2206 TEST_REQUIRES_ARM_NEON_FMA;
2207 for (size_t k = 9; k < 16; k++) {
2208 for (uint32_t n = 1; n <= 8; n++) {
2209 for (uint32_t m = 1; m <= 6; m++) {
2210 GemmMicrokernelTester()
2211 .mr(6)
2212 .nr(8)
2213 .kr(1)
2214 .sr(1)
2215 .m(m)
2216 .n(n)
2217 .k(k)
2218 .iterations(1)
2219 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2220 }
2221 }
2222 }
2223 }
2224
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4)2225 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
2226 TEST_REQUIRES_ARM_NEON_FMA;
2227 for (size_t k = 12; k <= 40; k += 4) {
2228 GemmMicrokernelTester()
2229 .mr(6)
2230 .nr(8)
2231 .kr(1)
2232 .sr(1)
2233 .m(6)
2234 .n(8)
2235 .k(k)
2236 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2237 }
2238 }
2239
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4_strided_a)2240 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
2241 TEST_REQUIRES_ARM_NEON_FMA;
2242 for (size_t k = 12; k <= 40; k += 4) {
2243 GemmMicrokernelTester()
2244 .mr(6)
2245 .nr(8)
2246 .kr(1)
2247 .sr(1)
2248 .m(6)
2249 .n(8)
2250 .k(k)
2251 .a_stride(43)
2252 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2253 }
2254 }
2255
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)2256 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
2257 TEST_REQUIRES_ARM_NEON_FMA;
2258 for (size_t k = 12; k <= 40; k += 4) {
2259 for (uint32_t n = 1; n <= 8; n++) {
2260 for (uint32_t m = 1; m <= 6; m++) {
2261 GemmMicrokernelTester()
2262 .mr(6)
2263 .nr(8)
2264 .kr(1)
2265 .sr(1)
2266 .m(m)
2267 .n(n)
2268 .k(k)
2269 .iterations(1)
2270 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2271 }
2272 }
2273 }
2274 }
2275
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8)2276 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
2277 TEST_REQUIRES_ARM_NEON_FMA;
2278 for (uint32_t n = 9; n < 16; n++) {
2279 for (size_t k = 1; k <= 20; k += 5) {
2280 GemmMicrokernelTester()
2281 .mr(6)
2282 .nr(8)
2283 .kr(1)
2284 .sr(1)
2285 .m(6)
2286 .n(n)
2287 .k(k)
2288 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2289 }
2290 }
2291 }
2292
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_cn)2293 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
2294 TEST_REQUIRES_ARM_NEON_FMA;
2295 for (uint32_t n = 9; n < 16; n++) {
2296 for (size_t k = 1; k <= 20; k += 5) {
2297 GemmMicrokernelTester()
2298 .mr(6)
2299 .nr(8)
2300 .kr(1)
2301 .sr(1)
2302 .m(6)
2303 .n(n)
2304 .k(k)
2305 .cn_stride(11)
2306 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2307 }
2308 }
2309 }
2310
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_a)2311 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_a) {
2312 TEST_REQUIRES_ARM_NEON_FMA;
2313 for (uint32_t n = 9; n < 16; n++) {
2314 for (size_t k = 1; k <= 20; k += 5) {
2315 GemmMicrokernelTester()
2316 .mr(6)
2317 .nr(8)
2318 .kr(1)
2319 .sr(1)
2320 .m(6)
2321 .n(n)
2322 .k(k)
2323 .a_stride(23)
2324 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2325 }
2326 }
2327 }
2328
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_subtile)2329 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
2330 TEST_REQUIRES_ARM_NEON_FMA;
2331 for (uint32_t n = 9; n < 16; n++) {
2332 for (size_t k = 1; k <= 20; k += 5) {
2333 for (uint32_t m = 1; m <= 6; m++) {
2334 GemmMicrokernelTester()
2335 .mr(6)
2336 .nr(8)
2337 .kr(1)
2338 .sr(1)
2339 .m(m)
2340 .n(n)
2341 .k(k)
2342 .iterations(1)
2343 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2344 }
2345 }
2346 }
2347 }
2348
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8)2349 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
2350 TEST_REQUIRES_ARM_NEON_FMA;
2351 for (uint32_t n = 16; n <= 24; n += 8) {
2352 for (size_t k = 1; k <= 20; k += 5) {
2353 GemmMicrokernelTester()
2354 .mr(6)
2355 .nr(8)
2356 .kr(1)
2357 .sr(1)
2358 .m(6)
2359 .n(n)
2360 .k(k)
2361 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2362 }
2363 }
2364 }
2365
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_cn)2366 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
2367 TEST_REQUIRES_ARM_NEON_FMA;
2368 for (uint32_t n = 16; n <= 24; n += 8) {
2369 for (size_t k = 1; k <= 20; k += 5) {
2370 GemmMicrokernelTester()
2371 .mr(6)
2372 .nr(8)
2373 .kr(1)
2374 .sr(1)
2375 .m(6)
2376 .n(n)
2377 .k(k)
2378 .cn_stride(11)
2379 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2380 }
2381 }
2382 }
2383
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_a)2384 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_a) {
2385 TEST_REQUIRES_ARM_NEON_FMA;
2386 for (uint32_t n = 16; n <= 24; n += 8) {
2387 for (size_t k = 1; k <= 20; k += 5) {
2388 GemmMicrokernelTester()
2389 .mr(6)
2390 .nr(8)
2391 .kr(1)
2392 .sr(1)
2393 .m(6)
2394 .n(n)
2395 .k(k)
2396 .a_stride(23)
2397 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2398 }
2399 }
2400 }
2401
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_subtile)2402 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
2403 TEST_REQUIRES_ARM_NEON_FMA;
2404 for (uint32_t n = 16; n <= 24; n += 8) {
2405 for (size_t k = 1; k <= 20; k += 5) {
2406 for (uint32_t m = 1; m <= 6; m++) {
2407 GemmMicrokernelTester()
2408 .mr(6)
2409 .nr(8)
2410 .kr(1)
2411 .sr(1)
2412 .m(m)
2413 .n(n)
2414 .k(k)
2415 .iterations(1)
2416 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2417 }
2418 }
2419 }
2420 }
2421
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)2422 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
2423 TEST_REQUIRES_ARM_NEON_FMA;
2424 for (size_t k = 1; k <= 20; k += 5) {
2425 for (uint32_t n = 1; n <= 8; n++) {
2426 for (uint32_t m = 1; m <= 6; m++) {
2427 GemmMicrokernelTester()
2428 .mr(6)
2429 .nr(8)
2430 .kr(1)
2431 .sr(1)
2432 .m(m)
2433 .n(n)
2434 .k(k)
2435 .cm_stride(11)
2436 .iterations(1)
2437 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2438 }
2439 }
2440 }
2441 }
2442
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,qmin)2443 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
2444 TEST_REQUIRES_ARM_NEON_FMA;
2445 GemmMicrokernelTester()
2446 .mr(6)
2447 .nr(8)
2448 .kr(1)
2449 .sr(1)
2450 .m(6)
2451 .n(8)
2452 .k(4)
2453 .qmin(128)
2454 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2455 }
2456
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,qmax)2457 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
2458 TEST_REQUIRES_ARM_NEON_FMA;
2459 GemmMicrokernelTester()
2460 .mr(6)
2461 .nr(8)
2462 .kr(1)
2463 .sr(1)
2464 .m(6)
2465 .n(8)
2466 .k(4)
2467 .qmax(128)
2468 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2469 }
2470
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm)2471 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
2472 TEST_REQUIRES_ARM_NEON_FMA;
2473 GemmMicrokernelTester()
2474 .mr(6)
2475 .nr(8)
2476 .kr(1)
2477 .sr(1)
2478 .m(6)
2479 .n(8)
2480 .k(4)
2481 .cm_stride(11)
2482 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2483 }
2484 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2485
2486
2487 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8)2488 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8) {
2489 TEST_REQUIRES_ARM_NEON_FMA;
2490 GemmMicrokernelTester()
2491 .mr(6)
2492 .nr(8)
2493 .kr(1)
2494 .sr(1)
2495 .m(6)
2496 .n(8)
2497 .k(8)
2498 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2499 }
2500
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,strided_cn)2501 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cn) {
2502 TEST_REQUIRES_ARM_NEON_FMA;
2503 GemmMicrokernelTester()
2504 .mr(6)
2505 .nr(8)
2506 .kr(1)
2507 .sr(1)
2508 .m(6)
2509 .n(8)
2510 .k(8)
2511 .cn_stride(11)
2512 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2513 }
2514
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8_strided_a)2515 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_strided_a) {
2516 TEST_REQUIRES_ARM_NEON_FMA;
2517 GemmMicrokernelTester()
2518 .mr(6)
2519 .nr(8)
2520 .kr(1)
2521 .sr(1)
2522 .m(6)
2523 .n(8)
2524 .k(8)
2525 .a_stride(11)
2526 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2527 }
2528
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8_subtile)2529 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile) {
2530 TEST_REQUIRES_ARM_NEON_FMA;
2531 for (uint32_t n = 1; n <= 8; n++) {
2532 for (uint32_t m = 1; m <= 6; m++) {
2533 GemmMicrokernelTester()
2534 .mr(6)
2535 .nr(8)
2536 .kr(1)
2537 .sr(1)
2538 .m(m)
2539 .n(n)
2540 .k(8)
2541 .iterations(1)
2542 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2543 }
2544 }
2545 }
2546
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8_subtile_m)2547 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_m) {
2548 TEST_REQUIRES_ARM_NEON_FMA;
2549 for (uint32_t m = 1; m <= 6; m++) {
2550 GemmMicrokernelTester()
2551 .mr(6)
2552 .nr(8)
2553 .kr(1)
2554 .sr(1)
2555 .m(m)
2556 .n(8)
2557 .k(8)
2558 .iterations(1)
2559 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2560 }
2561 }
2562
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8_subtile_n)2563 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_n) {
2564 TEST_REQUIRES_ARM_NEON_FMA;
2565 for (uint32_t n = 1; n <= 8; n++) {
2566 GemmMicrokernelTester()
2567 .mr(6)
2568 .nr(8)
2569 .kr(1)
2570 .sr(1)
2571 .m(6)
2572 .n(n)
2573 .k(8)
2574 .iterations(1)
2575 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2576 }
2577 }
2578
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_16)2579 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16) {
2580 TEST_REQUIRES_ARM_NEON_FMA;
2581 GemmMicrokernelTester()
2582 .mr(6)
2583 .nr(8)
2584 .kr(1)
2585 .sr(1)
2586 .m(6)
2587 .n(8)
2588 .k(16)
2589 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2590 }
2591
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_16_strided_a)2592 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16_strided_a) {
2593 TEST_REQUIRES_ARM_NEON_FMA;
2594 GemmMicrokernelTester()
2595 .mr(6)
2596 .nr(8)
2597 .kr(1)
2598 .sr(1)
2599 .m(6)
2600 .n(8)
2601 .k(16)
2602 .a_stride(19)
2603 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2604 }
2605
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_16_subtile)2606 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16_subtile) {
2607 TEST_REQUIRES_ARM_NEON_FMA;
2608 for (uint32_t n = 1; n <= 8; n++) {
2609 for (uint32_t m = 1; m <= 6; m++) {
2610 GemmMicrokernelTester()
2611 .mr(6)
2612 .nr(8)
2613 .kr(1)
2614 .sr(1)
2615 .m(m)
2616 .n(n)
2617 .k(16)
2618 .iterations(1)
2619 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2620 }
2621 }
2622 }
2623
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_lt_16)2624 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16) {
2625 TEST_REQUIRES_ARM_NEON_FMA;
2626 for (size_t k = 1; k < 16; k++) {
2627 GemmMicrokernelTester()
2628 .mr(6)
2629 .nr(8)
2630 .kr(1)
2631 .sr(1)
2632 .m(6)
2633 .n(8)
2634 .k(k)
2635 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2636 }
2637 }
2638
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_lt_16_strided_a)2639 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16_strided_a) {
2640 TEST_REQUIRES_ARM_NEON_FMA;
2641 for (size_t k = 1; k < 16; k++) {
2642 GemmMicrokernelTester()
2643 .mr(6)
2644 .nr(8)
2645 .kr(1)
2646 .sr(1)
2647 .m(6)
2648 .n(8)
2649 .k(k)
2650 .a_stride(19)
2651 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2652 }
2653 }
2654
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_lt_16_subtile)2655 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16_subtile) {
2656 TEST_REQUIRES_ARM_NEON_FMA;
2657 for (size_t k = 1; k < 16; k++) {
2658 for (uint32_t n = 1; n <= 8; n++) {
2659 for (uint32_t m = 1; m <= 6; m++) {
2660 GemmMicrokernelTester()
2661 .mr(6)
2662 .nr(8)
2663 .kr(1)
2664 .sr(1)
2665 .m(m)
2666 .n(n)
2667 .k(k)
2668 .iterations(1)
2669 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2670 }
2671 }
2672 }
2673 }
2674
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_gt_16)2675 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16) {
2676 TEST_REQUIRES_ARM_NEON_FMA;
2677 for (size_t k = 17; k < 32; k++) {
2678 GemmMicrokernelTester()
2679 .mr(6)
2680 .nr(8)
2681 .kr(1)
2682 .sr(1)
2683 .m(6)
2684 .n(8)
2685 .k(k)
2686 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2687 }
2688 }
2689
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_gt_16_strided_a)2690 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16_strided_a) {
2691 TEST_REQUIRES_ARM_NEON_FMA;
2692 for (size_t k = 17; k < 32; k++) {
2693 GemmMicrokernelTester()
2694 .mr(6)
2695 .nr(8)
2696 .kr(1)
2697 .sr(1)
2698 .m(6)
2699 .n(8)
2700 .k(k)
2701 .a_stride(37)
2702 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2703 }
2704 }
2705
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_gt_16_subtile)2706 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16_subtile) {
2707 TEST_REQUIRES_ARM_NEON_FMA;
2708 for (size_t k = 17; k < 32; k++) {
2709 for (uint32_t n = 1; n <= 8; n++) {
2710 for (uint32_t m = 1; m <= 6; m++) {
2711 GemmMicrokernelTester()
2712 .mr(6)
2713 .nr(8)
2714 .kr(1)
2715 .sr(1)
2716 .m(m)
2717 .n(n)
2718 .k(k)
2719 .iterations(1)
2720 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2721 }
2722 }
2723 }
2724 }
2725
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_div_8)2726 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8) {
2727 TEST_REQUIRES_ARM_NEON_FMA;
2728 for (size_t k = 24; k <= 80; k += 8) {
2729 GemmMicrokernelTester()
2730 .mr(6)
2731 .nr(8)
2732 .kr(1)
2733 .sr(1)
2734 .m(6)
2735 .n(8)
2736 .k(k)
2737 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2738 }
2739 }
2740
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_div_8_strided_a)2741 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8_strided_a) {
2742 TEST_REQUIRES_ARM_NEON_FMA;
2743 for (size_t k = 24; k <= 80; k += 8) {
2744 GemmMicrokernelTester()
2745 .mr(6)
2746 .nr(8)
2747 .kr(1)
2748 .sr(1)
2749 .m(6)
2750 .n(8)
2751 .k(k)
2752 .a_stride(83)
2753 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2754 }
2755 }
2756
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_div_8_subtile)2757 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8_subtile) {
2758 TEST_REQUIRES_ARM_NEON_FMA;
2759 for (size_t k = 24; k <= 80; k += 8) {
2760 for (uint32_t n = 1; n <= 8; n++) {
2761 for (uint32_t m = 1; m <= 6; m++) {
2762 GemmMicrokernelTester()
2763 .mr(6)
2764 .nr(8)
2765 .kr(1)
2766 .sr(1)
2767 .m(m)
2768 .n(n)
2769 .k(k)
2770 .iterations(1)
2771 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2772 }
2773 }
2774 }
2775 }
2776
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8)2777 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8) {
2778 TEST_REQUIRES_ARM_NEON_FMA;
2779 for (uint32_t n = 9; n < 16; n++) {
2780 for (size_t k = 1; k <= 40; k += 9) {
2781 GemmMicrokernelTester()
2782 .mr(6)
2783 .nr(8)
2784 .kr(1)
2785 .sr(1)
2786 .m(6)
2787 .n(n)
2788 .k(k)
2789 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2790 }
2791 }
2792 }
2793
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8_strided_cn)2794 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_strided_cn) {
2795 TEST_REQUIRES_ARM_NEON_FMA;
2796 for (uint32_t n = 9; n < 16; n++) {
2797 for (size_t k = 1; k <= 40; k += 9) {
2798 GemmMicrokernelTester()
2799 .mr(6)
2800 .nr(8)
2801 .kr(1)
2802 .sr(1)
2803 .m(6)
2804 .n(n)
2805 .k(k)
2806 .cn_stride(11)
2807 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2808 }
2809 }
2810 }
2811
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8_strided_a)2812 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_strided_a) {
2813 TEST_REQUIRES_ARM_NEON_FMA;
2814 for (uint32_t n = 9; n < 16; n++) {
2815 for (size_t k = 1; k <= 40; k += 9) {
2816 GemmMicrokernelTester()
2817 .mr(6)
2818 .nr(8)
2819 .kr(1)
2820 .sr(1)
2821 .m(6)
2822 .n(n)
2823 .k(k)
2824 .a_stride(43)
2825 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2826 }
2827 }
2828 }
2829
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8_subtile)2830 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_subtile) {
2831 TEST_REQUIRES_ARM_NEON_FMA;
2832 for (uint32_t n = 9; n < 16; n++) {
2833 for (size_t k = 1; k <= 40; k += 9) {
2834 for (uint32_t m = 1; m <= 6; m++) {
2835 GemmMicrokernelTester()
2836 .mr(6)
2837 .nr(8)
2838 .kr(1)
2839 .sr(1)
2840 .m(m)
2841 .n(n)
2842 .k(k)
2843 .iterations(1)
2844 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2845 }
2846 }
2847 }
2848 }
2849
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8)2850 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8) {
2851 TEST_REQUIRES_ARM_NEON_FMA;
2852 for (uint32_t n = 16; n <= 24; n += 8) {
2853 for (size_t k = 1; k <= 40; k += 9) {
2854 GemmMicrokernelTester()
2855 .mr(6)
2856 .nr(8)
2857 .kr(1)
2858 .sr(1)
2859 .m(6)
2860 .n(n)
2861 .k(k)
2862 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2863 }
2864 }
2865 }
2866
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8_strided_cn)2867 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_strided_cn) {
2868 TEST_REQUIRES_ARM_NEON_FMA;
2869 for (uint32_t n = 16; n <= 24; n += 8) {
2870 for (size_t k = 1; k <= 40; k += 9) {
2871 GemmMicrokernelTester()
2872 .mr(6)
2873 .nr(8)
2874 .kr(1)
2875 .sr(1)
2876 .m(6)
2877 .n(n)
2878 .k(k)
2879 .cn_stride(11)
2880 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2881 }
2882 }
2883 }
2884
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8_strided_a)2885 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_strided_a) {
2886 TEST_REQUIRES_ARM_NEON_FMA;
2887 for (uint32_t n = 16; n <= 24; n += 8) {
2888 for (size_t k = 1; k <= 40; k += 9) {
2889 GemmMicrokernelTester()
2890 .mr(6)
2891 .nr(8)
2892 .kr(1)
2893 .sr(1)
2894 .m(6)
2895 .n(n)
2896 .k(k)
2897 .a_stride(43)
2898 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2899 }
2900 }
2901 }
2902
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8_subtile)2903 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_subtile) {
2904 TEST_REQUIRES_ARM_NEON_FMA;
2905 for (uint32_t n = 16; n <= 24; n += 8) {
2906 for (size_t k = 1; k <= 40; k += 9) {
2907 for (uint32_t m = 1; m <= 6; m++) {
2908 GemmMicrokernelTester()
2909 .mr(6)
2910 .nr(8)
2911 .kr(1)
2912 .sr(1)
2913 .m(m)
2914 .n(n)
2915 .k(k)
2916 .iterations(1)
2917 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2918 }
2919 }
2920 }
2921 }
2922
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,strided_cm_subtile)2923 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm_subtile) {
2924 TEST_REQUIRES_ARM_NEON_FMA;
2925 for (size_t k = 1; k <= 40; k += 9) {
2926 for (uint32_t n = 1; n <= 8; n++) {
2927 for (uint32_t m = 1; m <= 6; m++) {
2928 GemmMicrokernelTester()
2929 .mr(6)
2930 .nr(8)
2931 .kr(1)
2932 .sr(1)
2933 .m(m)
2934 .n(n)
2935 .k(k)
2936 .cm_stride(11)
2937 .iterations(1)
2938 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2939 }
2940 }
2941 }
2942 }
2943
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,qmin)2944 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmin) {
2945 TEST_REQUIRES_ARM_NEON_FMA;
2946 GemmMicrokernelTester()
2947 .mr(6)
2948 .nr(8)
2949 .kr(1)
2950 .sr(1)
2951 .m(6)
2952 .n(8)
2953 .k(8)
2954 .qmin(128)
2955 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2956 }
2957
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,qmax)2958 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmax) {
2959 TEST_REQUIRES_ARM_NEON_FMA;
2960 GemmMicrokernelTester()
2961 .mr(6)
2962 .nr(8)
2963 .kr(1)
2964 .sr(1)
2965 .m(6)
2966 .n(8)
2967 .k(8)
2968 .qmax(128)
2969 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2970 }
2971
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,strided_cm)2972 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm) {
2973 TEST_REQUIRES_ARM_NEON_FMA;
2974 GemmMicrokernelTester()
2975 .mr(6)
2976 .nr(8)
2977 .kr(1)
2978 .sr(1)
2979 .m(6)
2980 .n(8)
2981 .k(8)
2982 .cm_stride(11)
2983 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2984 }
2985 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2986
2987
2988 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2)2989 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2) {
2990 TEST_REQUIRES_ARM_NEON_FMA;
2991 GemmMicrokernelTester()
2992 .mr(6)
2993 .nr(8)
2994 .kr(1)
2995 .sr(1)
2996 .m(6)
2997 .n(8)
2998 .k(2)
2999 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3000 }
3001
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,strided_cn)3002 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cn) {
3003 TEST_REQUIRES_ARM_NEON_FMA;
3004 GemmMicrokernelTester()
3005 .mr(6)
3006 .nr(8)
3007 .kr(1)
3008 .sr(1)
3009 .m(6)
3010 .n(8)
3011 .k(2)
3012 .cn_stride(11)
3013 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3014 }
3015
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2_strided_a)3016 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
3017 TEST_REQUIRES_ARM_NEON_FMA;
3018 GemmMicrokernelTester()
3019 .mr(6)
3020 .nr(8)
3021 .kr(1)
3022 .sr(1)
3023 .m(6)
3024 .n(8)
3025 .k(2)
3026 .a_stride(5)
3027 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3028 }
3029
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile)3030 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
3031 TEST_REQUIRES_ARM_NEON_FMA;
3032 for (uint32_t n = 1; n <= 8; n++) {
3033 for (uint32_t m = 1; m <= 6; m++) {
3034 GemmMicrokernelTester()
3035 .mr(6)
3036 .nr(8)
3037 .kr(1)
3038 .sr(1)
3039 .m(m)
3040 .n(n)
3041 .k(2)
3042 .iterations(1)
3043 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3044 }
3045 }
3046 }
3047
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_m)3048 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
3049 TEST_REQUIRES_ARM_NEON_FMA;
3050 for (uint32_t m = 1; m <= 6; m++) {
3051 GemmMicrokernelTester()
3052 .mr(6)
3053 .nr(8)
3054 .kr(1)
3055 .sr(1)
3056 .m(m)
3057 .n(8)
3058 .k(2)
3059 .iterations(1)
3060 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3061 }
3062 }
3063
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_n)3064 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
3065 TEST_REQUIRES_ARM_NEON_FMA;
3066 for (uint32_t n = 1; n <= 8; n++) {
3067 GemmMicrokernelTester()
3068 .mr(6)
3069 .nr(8)
3070 .kr(1)
3071 .sr(1)
3072 .m(6)
3073 .n(n)
3074 .k(2)
3075 .iterations(1)
3076 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3077 }
3078 }
3079
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_lt_2)3080 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2) {
3081 TEST_REQUIRES_ARM_NEON_FMA;
3082 for (size_t k = 1; k < 2; k++) {
3083 GemmMicrokernelTester()
3084 .mr(6)
3085 .nr(8)
3086 .kr(1)
3087 .sr(1)
3088 .m(6)
3089 .n(8)
3090 .k(k)
3091 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3092 }
3093 }
3094
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_lt_2_strided_a)3095 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
3096 TEST_REQUIRES_ARM_NEON_FMA;
3097 for (size_t k = 1; k < 2; k++) {
3098 GemmMicrokernelTester()
3099 .mr(6)
3100 .nr(8)
3101 .kr(1)
3102 .sr(1)
3103 .m(6)
3104 .n(8)
3105 .k(k)
3106 .a_stride(5)
3107 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3108 }
3109 }
3110
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_lt_2_subtile)3111 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
3112 TEST_REQUIRES_ARM_NEON_FMA;
3113 for (size_t k = 1; k < 2; k++) {
3114 for (uint32_t n = 1; n <= 8; n++) {
3115 for (uint32_t m = 1; m <= 6; m++) {
3116 GemmMicrokernelTester()
3117 .mr(6)
3118 .nr(8)
3119 .kr(1)
3120 .sr(1)
3121 .m(m)
3122 .n(n)
3123 .k(k)
3124 .iterations(1)
3125 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3126 }
3127 }
3128 }
3129 }
3130
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_gt_2)3131 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2) {
3132 TEST_REQUIRES_ARM_NEON_FMA;
3133 for (size_t k = 3; k < 4; k++) {
3134 GemmMicrokernelTester()
3135 .mr(6)
3136 .nr(8)
3137 .kr(1)
3138 .sr(1)
3139 .m(6)
3140 .n(8)
3141 .k(k)
3142 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3143 }
3144 }
3145
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_gt_2_strided_a)3146 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
3147 TEST_REQUIRES_ARM_NEON_FMA;
3148 for (size_t k = 3; k < 4; k++) {
3149 GemmMicrokernelTester()
3150 .mr(6)
3151 .nr(8)
3152 .kr(1)
3153 .sr(1)
3154 .m(6)
3155 .n(8)
3156 .k(k)
3157 .a_stride(7)
3158 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3159 }
3160 }
3161
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_gt_2_subtile)3162 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
3163 TEST_REQUIRES_ARM_NEON_FMA;
3164 for (size_t k = 3; k < 4; k++) {
3165 for (uint32_t n = 1; n <= 8; n++) {
3166 for (uint32_t m = 1; m <= 6; m++) {
3167 GemmMicrokernelTester()
3168 .mr(6)
3169 .nr(8)
3170 .kr(1)
3171 .sr(1)
3172 .m(m)
3173 .n(n)
3174 .k(k)
3175 .iterations(1)
3176 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3177 }
3178 }
3179 }
3180 }
3181
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_div_2)3182 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2) {
3183 TEST_REQUIRES_ARM_NEON_FMA;
3184 for (size_t k = 4; k <= 20; k += 2) {
3185 GemmMicrokernelTester()
3186 .mr(6)
3187 .nr(8)
3188 .kr(1)
3189 .sr(1)
3190 .m(6)
3191 .n(8)
3192 .k(k)
3193 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3194 }
3195 }
3196
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_div_2_strided_a)3197 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
3198 TEST_REQUIRES_ARM_NEON_FMA;
3199 for (size_t k = 4; k <= 20; k += 2) {
3200 GemmMicrokernelTester()
3201 .mr(6)
3202 .nr(8)
3203 .kr(1)
3204 .sr(1)
3205 .m(6)
3206 .n(8)
3207 .k(k)
3208 .a_stride(23)
3209 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3210 }
3211 }
3212
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_div_2_subtile)3213 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
3214 TEST_REQUIRES_ARM_NEON_FMA;
3215 for (size_t k = 4; k <= 20; k += 2) {
3216 for (uint32_t n = 1; n <= 8; n++) {
3217 for (uint32_t m = 1; m <= 6; m++) {
3218 GemmMicrokernelTester()
3219 .mr(6)
3220 .nr(8)
3221 .kr(1)
3222 .sr(1)
3223 .m(m)
3224 .n(n)
3225 .k(k)
3226 .iterations(1)
3227 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3228 }
3229 }
3230 }
3231 }
3232
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8)3233 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8) {
3234 TEST_REQUIRES_ARM_NEON_FMA;
3235 for (uint32_t n = 9; n < 16; n++) {
3236 for (size_t k = 1; k <= 10; k += 3) {
3237 GemmMicrokernelTester()
3238 .mr(6)
3239 .nr(8)
3240 .kr(1)
3241 .sr(1)
3242 .m(6)
3243 .n(n)
3244 .k(k)
3245 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3246 }
3247 }
3248 }
3249
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_cn)3250 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
3251 TEST_REQUIRES_ARM_NEON_FMA;
3252 for (uint32_t n = 9; n < 16; n++) {
3253 for (size_t k = 1; k <= 10; k += 3) {
3254 GemmMicrokernelTester()
3255 .mr(6)
3256 .nr(8)
3257 .kr(1)
3258 .sr(1)
3259 .m(6)
3260 .n(n)
3261 .k(k)
3262 .cn_stride(11)
3263 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3264 }
3265 }
3266 }
3267
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_a)3268 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
3269 TEST_REQUIRES_ARM_NEON_FMA;
3270 for (uint32_t n = 9; n < 16; n++) {
3271 for (size_t k = 1; k <= 10; k += 3) {
3272 GemmMicrokernelTester()
3273 .mr(6)
3274 .nr(8)
3275 .kr(1)
3276 .sr(1)
3277 .m(6)
3278 .n(n)
3279 .k(k)
3280 .a_stride(13)
3281 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3282 }
3283 }
3284 }
3285
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8_subtile)3286 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
3287 TEST_REQUIRES_ARM_NEON_FMA;
3288 for (uint32_t n = 9; n < 16; n++) {
3289 for (size_t k = 1; k <= 10; k += 3) {
3290 for (uint32_t m = 1; m <= 6; m++) {
3291 GemmMicrokernelTester()
3292 .mr(6)
3293 .nr(8)
3294 .kr(1)
3295 .sr(1)
3296 .m(m)
3297 .n(n)
3298 .k(k)
3299 .iterations(1)
3300 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3301 }
3302 }
3303 }
3304 }
3305
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8)3306 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8) {
3307 TEST_REQUIRES_ARM_NEON_FMA;
3308 for (uint32_t n = 16; n <= 24; n += 8) {
3309 for (size_t k = 1; k <= 10; k += 3) {
3310 GemmMicrokernelTester()
3311 .mr(6)
3312 .nr(8)
3313 .kr(1)
3314 .sr(1)
3315 .m(6)
3316 .n(n)
3317 .k(k)
3318 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3319 }
3320 }
3321 }
3322
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8_strided_cn)3323 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
3324 TEST_REQUIRES_ARM_NEON_FMA;
3325 for (uint32_t n = 16; n <= 24; n += 8) {
3326 for (size_t k = 1; k <= 10; k += 3) {
3327 GemmMicrokernelTester()
3328 .mr(6)
3329 .nr(8)
3330 .kr(1)
3331 .sr(1)
3332 .m(6)
3333 .n(n)
3334 .k(k)
3335 .cn_stride(11)
3336 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3337 }
3338 }
3339 }
3340
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8_strided_a)3341 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
3342 TEST_REQUIRES_ARM_NEON_FMA;
3343 for (uint32_t n = 16; n <= 24; n += 8) {
3344 for (size_t k = 1; k <= 10; k += 3) {
3345 GemmMicrokernelTester()
3346 .mr(6)
3347 .nr(8)
3348 .kr(1)
3349 .sr(1)
3350 .m(6)
3351 .n(n)
3352 .k(k)
3353 .a_stride(13)
3354 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3355 }
3356 }
3357 }
3358
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8_subtile)3359 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
3360 TEST_REQUIRES_ARM_NEON_FMA;
3361 for (uint32_t n = 16; n <= 24; n += 8) {
3362 for (size_t k = 1; k <= 10; k += 3) {
3363 for (uint32_t m = 1; m <= 6; m++) {
3364 GemmMicrokernelTester()
3365 .mr(6)
3366 .nr(8)
3367 .kr(1)
3368 .sr(1)
3369 .m(m)
3370 .n(n)
3371 .k(k)
3372 .iterations(1)
3373 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3374 }
3375 }
3376 }
3377 }
3378
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,strided_cm_subtile)3379 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
3380 TEST_REQUIRES_ARM_NEON_FMA;
3381 for (size_t k = 1; k <= 10; k += 3) {
3382 for (uint32_t n = 1; n <= 8; n++) {
3383 for (uint32_t m = 1; m <= 6; m++) {
3384 GemmMicrokernelTester()
3385 .mr(6)
3386 .nr(8)
3387 .kr(1)
3388 .sr(1)
3389 .m(m)
3390 .n(n)
3391 .k(k)
3392 .cm_stride(11)
3393 .iterations(1)
3394 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3395 }
3396 }
3397 }
3398 }
3399
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,qmin)3400 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmin) {
3401 TEST_REQUIRES_ARM_NEON_FMA;
3402 GemmMicrokernelTester()
3403 .mr(6)
3404 .nr(8)
3405 .kr(1)
3406 .sr(1)
3407 .m(6)
3408 .n(8)
3409 .k(2)
3410 .qmin(128)
3411 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3412 }
3413
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,qmax)3414 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmax) {
3415 TEST_REQUIRES_ARM_NEON_FMA;
3416 GemmMicrokernelTester()
3417 .mr(6)
3418 .nr(8)
3419 .kr(1)
3420 .sr(1)
3421 .m(6)
3422 .n(8)
3423 .k(2)
3424 .qmax(128)
3425 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3426 }
3427
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64,strided_cm)3428 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm) {
3429 TEST_REQUIRES_ARM_NEON_FMA;
3430 GemmMicrokernelTester()
3431 .mr(6)
3432 .nr(8)
3433 .kr(1)
3434 .sr(1)
3435 .m(6)
3436 .n(8)
3437 .k(2)
3438 .cm_stride(11)
3439 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3440 }
3441 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3442
3443
3444 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4)3445 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
3446 TEST_REQUIRES_ARM_NEON_FMA;
3447 GemmMicrokernelTester()
3448 .mr(6)
3449 .nr(8)
3450 .kr(1)
3451 .sr(1)
3452 .m(6)
3453 .n(8)
3454 .k(4)
3455 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3456 }
3457
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,strided_cn)3458 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cn) {
3459 TEST_REQUIRES_ARM_NEON_FMA;
3460 GemmMicrokernelTester()
3461 .mr(6)
3462 .nr(8)
3463 .kr(1)
3464 .sr(1)
3465 .m(6)
3466 .n(8)
3467 .k(4)
3468 .cn_stride(11)
3469 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3470 }
3471
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4_strided_a)3472 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_strided_a) {
3473 TEST_REQUIRES_ARM_NEON_FMA;
3474 GemmMicrokernelTester()
3475 .mr(6)
3476 .nr(8)
3477 .kr(1)
3478 .sr(1)
3479 .m(6)
3480 .n(8)
3481 .k(4)
3482 .a_stride(7)
3483 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3484 }
3485
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile)3486 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
3487 TEST_REQUIRES_ARM_NEON_FMA;
3488 for (uint32_t n = 1; n <= 8; n++) {
3489 for (uint32_t m = 1; m <= 6; m++) {
3490 GemmMicrokernelTester()
3491 .mr(6)
3492 .nr(8)
3493 .kr(1)
3494 .sr(1)
3495 .m(m)
3496 .n(n)
3497 .k(4)
3498 .iterations(1)
3499 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3500 }
3501 }
3502 }
3503
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_m)3504 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
3505 TEST_REQUIRES_ARM_NEON_FMA;
3506 for (uint32_t m = 1; m <= 6; m++) {
3507 GemmMicrokernelTester()
3508 .mr(6)
3509 .nr(8)
3510 .kr(1)
3511 .sr(1)
3512 .m(m)
3513 .n(8)
3514 .k(4)
3515 .iterations(1)
3516 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3517 }
3518 }
3519
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_n)3520 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
3521 TEST_REQUIRES_ARM_NEON_FMA;
3522 for (uint32_t n = 1; n <= 8; n++) {
3523 GemmMicrokernelTester()
3524 .mr(6)
3525 .nr(8)
3526 .kr(1)
3527 .sr(1)
3528 .m(6)
3529 .n(n)
3530 .k(4)
3531 .iterations(1)
3532 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3533 }
3534 }
3535
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_lt_4)3536 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4) {
3537 TEST_REQUIRES_ARM_NEON_FMA;
3538 for (size_t k = 1; k < 4; k++) {
3539 GemmMicrokernelTester()
3540 .mr(6)
3541 .nr(8)
3542 .kr(1)
3543 .sr(1)
3544 .m(6)
3545 .n(8)
3546 .k(k)
3547 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3548 }
3549 }
3550
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_lt_4_strided_a)3551 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_strided_a) {
3552 TEST_REQUIRES_ARM_NEON_FMA;
3553 for (size_t k = 1; k < 4; k++) {
3554 GemmMicrokernelTester()
3555 .mr(6)
3556 .nr(8)
3557 .kr(1)
3558 .sr(1)
3559 .m(6)
3560 .n(8)
3561 .k(k)
3562 .a_stride(7)
3563 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3564 }
3565 }
3566
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_lt_4_subtile)3567 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
3568 TEST_REQUIRES_ARM_NEON_FMA;
3569 for (size_t k = 1; k < 4; k++) {
3570 for (uint32_t n = 1; n <= 8; n++) {
3571 for (uint32_t m = 1; m <= 6; m++) {
3572 GemmMicrokernelTester()
3573 .mr(6)
3574 .nr(8)
3575 .kr(1)
3576 .sr(1)
3577 .m(m)
3578 .n(n)
3579 .k(k)
3580 .iterations(1)
3581 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3582 }
3583 }
3584 }
3585 }
3586
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_gt_4)3587 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4) {
3588 TEST_REQUIRES_ARM_NEON_FMA;
3589 for (size_t k = 5; k < 8; k++) {
3590 GemmMicrokernelTester()
3591 .mr(6)
3592 .nr(8)
3593 .kr(1)
3594 .sr(1)
3595 .m(6)
3596 .n(8)
3597 .k(k)
3598 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3599 }
3600 }
3601
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_gt_4_strided_a)3602 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_strided_a) {
3603 TEST_REQUIRES_ARM_NEON_FMA;
3604 for (size_t k = 5; k < 8; k++) {
3605 GemmMicrokernelTester()
3606 .mr(6)
3607 .nr(8)
3608 .kr(1)
3609 .sr(1)
3610 .m(6)
3611 .n(8)
3612 .k(k)
3613 .a_stride(11)
3614 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3615 }
3616 }
3617
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_gt_4_subtile)3618 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
3619 TEST_REQUIRES_ARM_NEON_FMA;
3620 for (size_t k = 5; k < 8; k++) {
3621 for (uint32_t n = 1; n <= 8; n++) {
3622 for (uint32_t m = 1; m <= 6; m++) {
3623 GemmMicrokernelTester()
3624 .mr(6)
3625 .nr(8)
3626 .kr(1)
3627 .sr(1)
3628 .m(m)
3629 .n(n)
3630 .k(k)
3631 .iterations(1)
3632 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3633 }
3634 }
3635 }
3636 }
3637
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_div_4)3638 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4) {
3639 TEST_REQUIRES_ARM_NEON_FMA;
3640 for (size_t k = 8; k <= 40; k += 4) {
3641 GemmMicrokernelTester()
3642 .mr(6)
3643 .nr(8)
3644 .kr(1)
3645 .sr(1)
3646 .m(6)
3647 .n(8)
3648 .k(k)
3649 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3650 }
3651 }
3652
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_div_4_strided_a)3653 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_strided_a) {
3654 TEST_REQUIRES_ARM_NEON_FMA;
3655 for (size_t k = 8; k <= 40; k += 4) {
3656 GemmMicrokernelTester()
3657 .mr(6)
3658 .nr(8)
3659 .kr(1)
3660 .sr(1)
3661 .m(6)
3662 .n(8)
3663 .k(k)
3664 .a_stride(43)
3665 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3666 }
3667 }
3668
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_div_4_subtile)3669 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
3670 TEST_REQUIRES_ARM_NEON_FMA;
3671 for (size_t k = 8; k <= 40; k += 4) {
3672 for (uint32_t n = 1; n <= 8; n++) {
3673 for (uint32_t m = 1; m <= 6; m++) {
3674 GemmMicrokernelTester()
3675 .mr(6)
3676 .nr(8)
3677 .kr(1)
3678 .sr(1)
3679 .m(m)
3680 .n(n)
3681 .k(k)
3682 .iterations(1)
3683 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3684 }
3685 }
3686 }
3687 }
3688
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8)3689 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8) {
3690 TEST_REQUIRES_ARM_NEON_FMA;
3691 for (uint32_t n = 9; n < 16; n++) {
3692 for (size_t k = 1; k <= 20; k += 5) {
3693 GemmMicrokernelTester()
3694 .mr(6)
3695 .nr(8)
3696 .kr(1)
3697 .sr(1)
3698 .m(6)
3699 .n(n)
3700 .k(k)
3701 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3702 }
3703 }
3704 }
3705
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8_strided_cn)3706 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
3707 TEST_REQUIRES_ARM_NEON_FMA;
3708 for (uint32_t n = 9; n < 16; n++) {
3709 for (size_t k = 1; k <= 20; k += 5) {
3710 GemmMicrokernelTester()
3711 .mr(6)
3712 .nr(8)
3713 .kr(1)
3714 .sr(1)
3715 .m(6)
3716 .n(n)
3717 .k(k)
3718 .cn_stride(11)
3719 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3720 }
3721 }
3722 }
3723
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8_strided_a)3724 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_a) {
3725 TEST_REQUIRES_ARM_NEON_FMA;
3726 for (uint32_t n = 9; n < 16; n++) {
3727 for (size_t k = 1; k <= 20; k += 5) {
3728 GemmMicrokernelTester()
3729 .mr(6)
3730 .nr(8)
3731 .kr(1)
3732 .sr(1)
3733 .m(6)
3734 .n(n)
3735 .k(k)
3736 .a_stride(23)
3737 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3738 }
3739 }
3740 }
3741
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8_subtile)3742 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
3743 TEST_REQUIRES_ARM_NEON_FMA;
3744 for (uint32_t n = 9; n < 16; n++) {
3745 for (size_t k = 1; k <= 20; k += 5) {
3746 for (uint32_t m = 1; m <= 6; m++) {
3747 GemmMicrokernelTester()
3748 .mr(6)
3749 .nr(8)
3750 .kr(1)
3751 .sr(1)
3752 .m(m)
3753 .n(n)
3754 .k(k)
3755 .iterations(1)
3756 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3757 }
3758 }
3759 }
3760 }
3761
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8)3762 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8) {
3763 TEST_REQUIRES_ARM_NEON_FMA;
3764 for (uint32_t n = 16; n <= 24; n += 8) {
3765 for (size_t k = 1; k <= 20; k += 5) {
3766 GemmMicrokernelTester()
3767 .mr(6)
3768 .nr(8)
3769 .kr(1)
3770 .sr(1)
3771 .m(6)
3772 .n(n)
3773 .k(k)
3774 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3775 }
3776 }
3777 }
3778
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8_strided_cn)3779 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
3780 TEST_REQUIRES_ARM_NEON_FMA;
3781 for (uint32_t n = 16; n <= 24; n += 8) {
3782 for (size_t k = 1; k <= 20; k += 5) {
3783 GemmMicrokernelTester()
3784 .mr(6)
3785 .nr(8)
3786 .kr(1)
3787 .sr(1)
3788 .m(6)
3789 .n(n)
3790 .k(k)
3791 .cn_stride(11)
3792 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3793 }
3794 }
3795 }
3796
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8_strided_a)3797 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_a) {
3798 TEST_REQUIRES_ARM_NEON_FMA;
3799 for (uint32_t n = 16; n <= 24; n += 8) {
3800 for (size_t k = 1; k <= 20; k += 5) {
3801 GemmMicrokernelTester()
3802 .mr(6)
3803 .nr(8)
3804 .kr(1)
3805 .sr(1)
3806 .m(6)
3807 .n(n)
3808 .k(k)
3809 .a_stride(23)
3810 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3811 }
3812 }
3813 }
3814
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8_subtile)3815 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
3816 TEST_REQUIRES_ARM_NEON_FMA;
3817 for (uint32_t n = 16; n <= 24; n += 8) {
3818 for (size_t k = 1; k <= 20; k += 5) {
3819 for (uint32_t m = 1; m <= 6; m++) {
3820 GemmMicrokernelTester()
3821 .mr(6)
3822 .nr(8)
3823 .kr(1)
3824 .sr(1)
3825 .m(m)
3826 .n(n)
3827 .k(k)
3828 .iterations(1)
3829 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3830 }
3831 }
3832 }
3833 }
3834
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,strided_cm_subtile)3835 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
3836 TEST_REQUIRES_ARM_NEON_FMA;
3837 for (size_t k = 1; k <= 20; k += 5) {
3838 for (uint32_t n = 1; n <= 8; n++) {
3839 for (uint32_t m = 1; m <= 6; m++) {
3840 GemmMicrokernelTester()
3841 .mr(6)
3842 .nr(8)
3843 .kr(1)
3844 .sr(1)
3845 .m(m)
3846 .n(n)
3847 .k(k)
3848 .cm_stride(11)
3849 .iterations(1)
3850 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3851 }
3852 }
3853 }
3854 }
3855
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,qmin)3856 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmin) {
3857 TEST_REQUIRES_ARM_NEON_FMA;
3858 GemmMicrokernelTester()
3859 .mr(6)
3860 .nr(8)
3861 .kr(1)
3862 .sr(1)
3863 .m(6)
3864 .n(8)
3865 .k(4)
3866 .qmin(128)
3867 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3868 }
3869
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,qmax)3870 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmax) {
3871 TEST_REQUIRES_ARM_NEON_FMA;
3872 GemmMicrokernelTester()
3873 .mr(6)
3874 .nr(8)
3875 .kr(1)
3876 .sr(1)
3877 .m(6)
3878 .n(8)
3879 .k(4)
3880 .qmax(128)
3881 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3882 }
3883
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128,strided_cm)3884 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm) {
3885 TEST_REQUIRES_ARM_NEON_FMA;
3886 GemmMicrokernelTester()
3887 .mr(6)
3888 .nr(8)
3889 .kr(1)
3890 .sr(1)
3891 .m(6)
3892 .n(8)
3893 .k(4)
3894 .cm_stride(11)
3895 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3896 }
3897 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3898
3899
3900 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)3901 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
3902 TEST_REQUIRES_ARM_NEON_FMA;
3903 GemmMicrokernelTester()
3904 .mr(6)
3905 .nr(8)
3906 .kr(1)
3907 .sr(1)
3908 .m(6)
3909 .n(8)
3910 .k(8)
3911 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3912 }
3913
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)3914 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
3915 TEST_REQUIRES_ARM_NEON_FMA;
3916 GemmMicrokernelTester()
3917 .mr(6)
3918 .nr(8)
3919 .kr(1)
3920 .sr(1)
3921 .m(6)
3922 .n(8)
3923 .k(8)
3924 .cn_stride(11)
3925 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3926 }
3927
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_strided_a)3928 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
3929 TEST_REQUIRES_ARM_NEON_FMA;
3930 GemmMicrokernelTester()
3931 .mr(6)
3932 .nr(8)
3933 .kr(1)
3934 .sr(1)
3935 .m(6)
3936 .n(8)
3937 .k(8)
3938 .a_stride(11)
3939 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3940 }
3941
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)3942 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
3943 TEST_REQUIRES_ARM_NEON_FMA;
3944 for (uint32_t n = 1; n <= 8; n++) {
3945 for (uint32_t m = 1; m <= 6; m++) {
3946 GemmMicrokernelTester()
3947 .mr(6)
3948 .nr(8)
3949 .kr(1)
3950 .sr(1)
3951 .m(m)
3952 .n(n)
3953 .k(8)
3954 .iterations(1)
3955 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3956 }
3957 }
3958 }
3959
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)3960 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
3961 TEST_REQUIRES_ARM_NEON_FMA;
3962 for (uint32_t m = 1; m <= 6; m++) {
3963 GemmMicrokernelTester()
3964 .mr(6)
3965 .nr(8)
3966 .kr(1)
3967 .sr(1)
3968 .m(m)
3969 .n(8)
3970 .k(8)
3971 .iterations(1)
3972 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3973 }
3974 }
3975
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)3976 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
3977 TEST_REQUIRES_ARM_NEON_FMA;
3978 for (uint32_t n = 1; n <= 8; n++) {
3979 GemmMicrokernelTester()
3980 .mr(6)
3981 .nr(8)
3982 .kr(1)
3983 .sr(1)
3984 .m(6)
3985 .n(n)
3986 .k(8)
3987 .iterations(1)
3988 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3989 }
3990 }
3991
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)3992 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
3993 TEST_REQUIRES_ARM_NEON_FMA;
3994 GemmMicrokernelTester()
3995 .mr(6)
3996 .nr(8)
3997 .kr(1)
3998 .sr(1)
3999 .m(6)
4000 .n(8)
4001 .k(16)
4002 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4003 }
4004
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_strided_a)4005 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
4006 TEST_REQUIRES_ARM_NEON_FMA;
4007 GemmMicrokernelTester()
4008 .mr(6)
4009 .nr(8)
4010 .kr(1)
4011 .sr(1)
4012 .m(6)
4013 .n(8)
4014 .k(16)
4015 .a_stride(19)
4016 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4017 }
4018
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)4019 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
4020 TEST_REQUIRES_ARM_NEON_FMA;
4021 for (uint32_t n = 1; n <= 8; n++) {
4022 for (uint32_t m = 1; m <= 6; m++) {
4023 GemmMicrokernelTester()
4024 .mr(6)
4025 .nr(8)
4026 .kr(1)
4027 .sr(1)
4028 .m(m)
4029 .n(n)
4030 .k(16)
4031 .iterations(1)
4032 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4033 }
4034 }
4035 }
4036
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)4037 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
4038 TEST_REQUIRES_ARM_NEON_FMA;
4039 for (size_t k = 1; k < 16; k++) {
4040 GemmMicrokernelTester()
4041 .mr(6)
4042 .nr(8)
4043 .kr(1)
4044 .sr(1)
4045 .m(6)
4046 .n(8)
4047 .k(k)
4048 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4049 }
4050 }
4051
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_strided_a)4052 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
4053 TEST_REQUIRES_ARM_NEON_FMA;
4054 for (size_t k = 1; k < 16; k++) {
4055 GemmMicrokernelTester()
4056 .mr(6)
4057 .nr(8)
4058 .kr(1)
4059 .sr(1)
4060 .m(6)
4061 .n(8)
4062 .k(k)
4063 .a_stride(19)
4064 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4065 }
4066 }
4067
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)4068 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
4069 TEST_REQUIRES_ARM_NEON_FMA;
4070 for (size_t k = 1; k < 16; k++) {
4071 for (uint32_t n = 1; n <= 8; n++) {
4072 for (uint32_t m = 1; m <= 6; m++) {
4073 GemmMicrokernelTester()
4074 .mr(6)
4075 .nr(8)
4076 .kr(1)
4077 .sr(1)
4078 .m(m)
4079 .n(n)
4080 .k(k)
4081 .iterations(1)
4082 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4083 }
4084 }
4085 }
4086 }
4087
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)4088 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
4089 TEST_REQUIRES_ARM_NEON_FMA;
4090 for (size_t k = 17; k < 32; k++) {
4091 GemmMicrokernelTester()
4092 .mr(6)
4093 .nr(8)
4094 .kr(1)
4095 .sr(1)
4096 .m(6)
4097 .n(8)
4098 .k(k)
4099 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4100 }
4101 }
4102
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_strided_a)4103 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
4104 TEST_REQUIRES_ARM_NEON_FMA;
4105 for (size_t k = 17; k < 32; k++) {
4106 GemmMicrokernelTester()
4107 .mr(6)
4108 .nr(8)
4109 .kr(1)
4110 .sr(1)
4111 .m(6)
4112 .n(8)
4113 .k(k)
4114 .a_stride(37)
4115 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4116 }
4117 }
4118
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)4119 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
4120 TEST_REQUIRES_ARM_NEON_FMA;
4121 for (size_t k = 17; k < 32; k++) {
4122 for (uint32_t n = 1; n <= 8; n++) {
4123 for (uint32_t m = 1; m <= 6; m++) {
4124 GemmMicrokernelTester()
4125 .mr(6)
4126 .nr(8)
4127 .kr(1)
4128 .sr(1)
4129 .m(m)
4130 .n(n)
4131 .k(k)
4132 .iterations(1)
4133 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4134 }
4135 }
4136 }
4137 }
4138
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)4139 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
4140 TEST_REQUIRES_ARM_NEON_FMA;
4141 for (size_t k = 24; k <= 80; k += 8) {
4142 GemmMicrokernelTester()
4143 .mr(6)
4144 .nr(8)
4145 .kr(1)
4146 .sr(1)
4147 .m(6)
4148 .n(8)
4149 .k(k)
4150 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4151 }
4152 }
4153
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_strided_a)4154 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
4155 TEST_REQUIRES_ARM_NEON_FMA;
4156 for (size_t k = 24; k <= 80; k += 8) {
4157 GemmMicrokernelTester()
4158 .mr(6)
4159 .nr(8)
4160 .kr(1)
4161 .sr(1)
4162 .m(6)
4163 .n(8)
4164 .k(k)
4165 .a_stride(83)
4166 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4167 }
4168 }
4169
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)4170 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
4171 TEST_REQUIRES_ARM_NEON_FMA;
4172 for (size_t k = 24; k <= 80; k += 8) {
4173 for (uint32_t n = 1; n <= 8; n++) {
4174 for (uint32_t m = 1; m <= 6; m++) {
4175 GemmMicrokernelTester()
4176 .mr(6)
4177 .nr(8)
4178 .kr(1)
4179 .sr(1)
4180 .m(m)
4181 .n(n)
4182 .k(k)
4183 .iterations(1)
4184 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4185 }
4186 }
4187 }
4188 }
4189
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)4190 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
4191 TEST_REQUIRES_ARM_NEON_FMA;
4192 for (uint32_t n = 9; n < 16; n++) {
4193 for (size_t k = 1; k <= 40; k += 9) {
4194 GemmMicrokernelTester()
4195 .mr(6)
4196 .nr(8)
4197 .kr(1)
4198 .sr(1)
4199 .m(6)
4200 .n(n)
4201 .k(k)
4202 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4203 }
4204 }
4205 }
4206
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)4207 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
4208 TEST_REQUIRES_ARM_NEON_FMA;
4209 for (uint32_t n = 9; n < 16; n++) {
4210 for (size_t k = 1; k <= 40; k += 9) {
4211 GemmMicrokernelTester()
4212 .mr(6)
4213 .nr(8)
4214 .kr(1)
4215 .sr(1)
4216 .m(6)
4217 .n(n)
4218 .k(k)
4219 .cn_stride(11)
4220 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4221 }
4222 }
4223 }
4224
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_a)4225 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
4226 TEST_REQUIRES_ARM_NEON_FMA;
4227 for (uint32_t n = 9; n < 16; n++) {
4228 for (size_t k = 1; k <= 40; k += 9) {
4229 GemmMicrokernelTester()
4230 .mr(6)
4231 .nr(8)
4232 .kr(1)
4233 .sr(1)
4234 .m(6)
4235 .n(n)
4236 .k(k)
4237 .a_stride(43)
4238 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4239 }
4240 }
4241 }
4242
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)4243 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
4244 TEST_REQUIRES_ARM_NEON_FMA;
4245 for (uint32_t n = 9; n < 16; n++) {
4246 for (size_t k = 1; k <= 40; k += 9) {
4247 for (uint32_t m = 1; m <= 6; m++) {
4248 GemmMicrokernelTester()
4249 .mr(6)
4250 .nr(8)
4251 .kr(1)
4252 .sr(1)
4253 .m(m)
4254 .n(n)
4255 .k(k)
4256 .iterations(1)
4257 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4258 }
4259 }
4260 }
4261 }
4262
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)4263 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
4264 TEST_REQUIRES_ARM_NEON_FMA;
4265 for (uint32_t n = 16; n <= 24; n += 8) {
4266 for (size_t k = 1; k <= 40; k += 9) {
4267 GemmMicrokernelTester()
4268 .mr(6)
4269 .nr(8)
4270 .kr(1)
4271 .sr(1)
4272 .m(6)
4273 .n(n)
4274 .k(k)
4275 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4276 }
4277 }
4278 }
4279
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)4280 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
4281 TEST_REQUIRES_ARM_NEON_FMA;
4282 for (uint32_t n = 16; n <= 24; n += 8) {
4283 for (size_t k = 1; k <= 40; k += 9) {
4284 GemmMicrokernelTester()
4285 .mr(6)
4286 .nr(8)
4287 .kr(1)
4288 .sr(1)
4289 .m(6)
4290 .n(n)
4291 .k(k)
4292 .cn_stride(11)
4293 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4294 }
4295 }
4296 }
4297
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_a)4298 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
4299 TEST_REQUIRES_ARM_NEON_FMA;
4300 for (uint32_t n = 16; n <= 24; n += 8) {
4301 for (size_t k = 1; k <= 40; k += 9) {
4302 GemmMicrokernelTester()
4303 .mr(6)
4304 .nr(8)
4305 .kr(1)
4306 .sr(1)
4307 .m(6)
4308 .n(n)
4309 .k(k)
4310 .a_stride(43)
4311 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4312 }
4313 }
4314 }
4315
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)4316 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
4317 TEST_REQUIRES_ARM_NEON_FMA;
4318 for (uint32_t n = 16; n <= 24; n += 8) {
4319 for (size_t k = 1; k <= 40; k += 9) {
4320 for (uint32_t m = 1; m <= 6; m++) {
4321 GemmMicrokernelTester()
4322 .mr(6)
4323 .nr(8)
4324 .kr(1)
4325 .sr(1)
4326 .m(m)
4327 .n(n)
4328 .k(k)
4329 .iterations(1)
4330 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4331 }
4332 }
4333 }
4334 }
4335
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)4336 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
4337 TEST_REQUIRES_ARM_NEON_FMA;
4338 for (size_t k = 1; k <= 40; k += 9) {
4339 for (uint32_t n = 1; n <= 8; n++) {
4340 for (uint32_t m = 1; m <= 6; m++) {
4341 GemmMicrokernelTester()
4342 .mr(6)
4343 .nr(8)
4344 .kr(1)
4345 .sr(1)
4346 .m(m)
4347 .n(n)
4348 .k(k)
4349 .cm_stride(11)
4350 .iterations(1)
4351 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4352 }
4353 }
4354 }
4355 }
4356
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)4357 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
4358 TEST_REQUIRES_ARM_NEON_FMA;
4359 GemmMicrokernelTester()
4360 .mr(6)
4361 .nr(8)
4362 .kr(1)
4363 .sr(1)
4364 .m(6)
4365 .n(8)
4366 .k(8)
4367 .qmin(128)
4368 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4369 }
4370
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)4371 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
4372 TEST_REQUIRES_ARM_NEON_FMA;
4373 GemmMicrokernelTester()
4374 .mr(6)
4375 .nr(8)
4376 .kr(1)
4377 .sr(1)
4378 .m(6)
4379 .n(8)
4380 .k(8)
4381 .qmax(128)
4382 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4383 }
4384
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)4385 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
4386 TEST_REQUIRES_ARM_NEON_FMA;
4387 GemmMicrokernelTester()
4388 .mr(6)
4389 .nr(8)
4390 .kr(1)
4391 .sr(1)
4392 .m(6)
4393 .n(8)
4394 .k(8)
4395 .cm_stride(11)
4396 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4397 }
4398 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4399
4400
4401 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_eq_2)4402 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2) {
4403 TEST_REQUIRES_ARM_NEON;
4404 GemmMicrokernelTester()
4405 .mr(1)
4406 .nr(8)
4407 .kr(1)
4408 .sr(1)
4409 .m(1)
4410 .n(8)
4411 .k(2)
4412 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4413 }
4414
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,strided_cn)4415 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, strided_cn) {
4416 TEST_REQUIRES_ARM_NEON;
4417 GemmMicrokernelTester()
4418 .mr(1)
4419 .nr(8)
4420 .kr(1)
4421 .sr(1)
4422 .m(1)
4423 .n(8)
4424 .k(2)
4425 .cn_stride(11)
4426 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4427 }
4428
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_eq_2_strided_a)4429 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_strided_a) {
4430 TEST_REQUIRES_ARM_NEON;
4431 GemmMicrokernelTester()
4432 .mr(1)
4433 .nr(8)
4434 .kr(1)
4435 .sr(1)
4436 .m(1)
4437 .n(8)
4438 .k(2)
4439 .a_stride(5)
4440 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4441 }
4442
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_eq_2_subtile)4443 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile) {
4444 TEST_REQUIRES_ARM_NEON;
4445 for (uint32_t n = 1; n <= 8; n++) {
4446 for (uint32_t m = 1; m <= 1; m++) {
4447 GemmMicrokernelTester()
4448 .mr(1)
4449 .nr(8)
4450 .kr(1)
4451 .sr(1)
4452 .m(m)
4453 .n(n)
4454 .k(2)
4455 .iterations(1)
4456 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4457 }
4458 }
4459 }
4460
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_eq_2_subtile_m)4461 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
4462 TEST_REQUIRES_ARM_NEON;
4463 for (uint32_t m = 1; m <= 1; m++) {
4464 GemmMicrokernelTester()
4465 .mr(1)
4466 .nr(8)
4467 .kr(1)
4468 .sr(1)
4469 .m(m)
4470 .n(8)
4471 .k(2)
4472 .iterations(1)
4473 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4474 }
4475 }
4476
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_eq_2_subtile_n)4477 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
4478 TEST_REQUIRES_ARM_NEON;
4479 for (uint32_t n = 1; n <= 8; n++) {
4480 GemmMicrokernelTester()
4481 .mr(1)
4482 .nr(8)
4483 .kr(1)
4484 .sr(1)
4485 .m(1)
4486 .n(n)
4487 .k(2)
4488 .iterations(1)
4489 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4490 }
4491 }
4492
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_lt_2)4493 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_lt_2) {
4494 TEST_REQUIRES_ARM_NEON;
4495 for (size_t k = 1; k < 2; k++) {
4496 GemmMicrokernelTester()
4497 .mr(1)
4498 .nr(8)
4499 .kr(1)
4500 .sr(1)
4501 .m(1)
4502 .n(8)
4503 .k(k)
4504 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4505 }
4506 }
4507
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_lt_2_strided_a)4508 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_strided_a) {
4509 TEST_REQUIRES_ARM_NEON;
4510 for (size_t k = 1; k < 2; k++) {
4511 GemmMicrokernelTester()
4512 .mr(1)
4513 .nr(8)
4514 .kr(1)
4515 .sr(1)
4516 .m(1)
4517 .n(8)
4518 .k(k)
4519 .a_stride(5)
4520 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4521 }
4522 }
4523
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_lt_2_subtile)4524 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_subtile) {
4525 TEST_REQUIRES_ARM_NEON;
4526 for (size_t k = 1; k < 2; k++) {
4527 for (uint32_t n = 1; n <= 8; n++) {
4528 for (uint32_t m = 1; m <= 1; m++) {
4529 GemmMicrokernelTester()
4530 .mr(1)
4531 .nr(8)
4532 .kr(1)
4533 .sr(1)
4534 .m(m)
4535 .n(n)
4536 .k(k)
4537 .iterations(1)
4538 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4539 }
4540 }
4541 }
4542 }
4543
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_gt_2)4544 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_gt_2) {
4545 TEST_REQUIRES_ARM_NEON;
4546 for (size_t k = 3; k < 4; k++) {
4547 GemmMicrokernelTester()
4548 .mr(1)
4549 .nr(8)
4550 .kr(1)
4551 .sr(1)
4552 .m(1)
4553 .n(8)
4554 .k(k)
4555 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4556 }
4557 }
4558
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_gt_2_strided_a)4559 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_strided_a) {
4560 TEST_REQUIRES_ARM_NEON;
4561 for (size_t k = 3; k < 4; k++) {
4562 GemmMicrokernelTester()
4563 .mr(1)
4564 .nr(8)
4565 .kr(1)
4566 .sr(1)
4567 .m(1)
4568 .n(8)
4569 .k(k)
4570 .a_stride(7)
4571 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4572 }
4573 }
4574
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_gt_2_subtile)4575 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_subtile) {
4576 TEST_REQUIRES_ARM_NEON;
4577 for (size_t k = 3; k < 4; k++) {
4578 for (uint32_t n = 1; n <= 8; n++) {
4579 for (uint32_t m = 1; m <= 1; m++) {
4580 GemmMicrokernelTester()
4581 .mr(1)
4582 .nr(8)
4583 .kr(1)
4584 .sr(1)
4585 .m(m)
4586 .n(n)
4587 .k(k)
4588 .iterations(1)
4589 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4590 }
4591 }
4592 }
4593 }
4594
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_div_2)4595 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_div_2) {
4596 TEST_REQUIRES_ARM_NEON;
4597 for (size_t k = 4; k <= 20; k += 2) {
4598 GemmMicrokernelTester()
4599 .mr(1)
4600 .nr(8)
4601 .kr(1)
4602 .sr(1)
4603 .m(1)
4604 .n(8)
4605 .k(k)
4606 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4607 }
4608 }
4609
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_div_2_strided_a)4610 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_div_2_strided_a) {
4611 TEST_REQUIRES_ARM_NEON;
4612 for (size_t k = 4; k <= 20; k += 2) {
4613 GemmMicrokernelTester()
4614 .mr(1)
4615 .nr(8)
4616 .kr(1)
4617 .sr(1)
4618 .m(1)
4619 .n(8)
4620 .k(k)
4621 .a_stride(23)
4622 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4623 }
4624 }
4625
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,k_div_2_subtile)4626 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_div_2_subtile) {
4627 TEST_REQUIRES_ARM_NEON;
4628 for (size_t k = 4; k <= 20; k += 2) {
4629 for (uint32_t n = 1; n <= 8; n++) {
4630 for (uint32_t m = 1; m <= 1; m++) {
4631 GemmMicrokernelTester()
4632 .mr(1)
4633 .nr(8)
4634 .kr(1)
4635 .sr(1)
4636 .m(m)
4637 .n(n)
4638 .k(k)
4639 .iterations(1)
4640 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4641 }
4642 }
4643 }
4644 }
4645
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_gt_8)4646 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8) {
4647 TEST_REQUIRES_ARM_NEON;
4648 for (uint32_t n = 9; n < 16; n++) {
4649 for (size_t k = 1; k <= 10; k += 3) {
4650 GemmMicrokernelTester()
4651 .mr(1)
4652 .nr(8)
4653 .kr(1)
4654 .sr(1)
4655 .m(1)
4656 .n(n)
4657 .k(k)
4658 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4659 }
4660 }
4661 }
4662
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_gt_8_strided_cn)4663 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
4664 TEST_REQUIRES_ARM_NEON;
4665 for (uint32_t n = 9; n < 16; n++) {
4666 for (size_t k = 1; k <= 10; k += 3) {
4667 GemmMicrokernelTester()
4668 .mr(1)
4669 .nr(8)
4670 .kr(1)
4671 .sr(1)
4672 .m(1)
4673 .n(n)
4674 .k(k)
4675 .cn_stride(11)
4676 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4677 }
4678 }
4679 }
4680
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_gt_8_strided_a)4681 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_a) {
4682 TEST_REQUIRES_ARM_NEON;
4683 for (uint32_t n = 9; n < 16; n++) {
4684 for (size_t k = 1; k <= 10; k += 3) {
4685 GemmMicrokernelTester()
4686 .mr(1)
4687 .nr(8)
4688 .kr(1)
4689 .sr(1)
4690 .m(1)
4691 .n(n)
4692 .k(k)
4693 .a_stride(13)
4694 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4695 }
4696 }
4697 }
4698
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_gt_8_subtile)4699 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_subtile) {
4700 TEST_REQUIRES_ARM_NEON;
4701 for (uint32_t n = 9; n < 16; n++) {
4702 for (size_t k = 1; k <= 10; k += 3) {
4703 for (uint32_t m = 1; m <= 1; m++) {
4704 GemmMicrokernelTester()
4705 .mr(1)
4706 .nr(8)
4707 .kr(1)
4708 .sr(1)
4709 .m(m)
4710 .n(n)
4711 .k(k)
4712 .iterations(1)
4713 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4714 }
4715 }
4716 }
4717 }
4718
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_div_8)4719 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8) {
4720 TEST_REQUIRES_ARM_NEON;
4721 for (uint32_t n = 16; n <= 24; n += 8) {
4722 for (size_t k = 1; k <= 10; k += 3) {
4723 GemmMicrokernelTester()
4724 .mr(1)
4725 .nr(8)
4726 .kr(1)
4727 .sr(1)
4728 .m(1)
4729 .n(n)
4730 .k(k)
4731 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4732 }
4733 }
4734 }
4735
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_div_8_strided_cn)4736 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_cn) {
4737 TEST_REQUIRES_ARM_NEON;
4738 for (uint32_t n = 16; n <= 24; n += 8) {
4739 for (size_t k = 1; k <= 10; k += 3) {
4740 GemmMicrokernelTester()
4741 .mr(1)
4742 .nr(8)
4743 .kr(1)
4744 .sr(1)
4745 .m(1)
4746 .n(n)
4747 .k(k)
4748 .cn_stride(11)
4749 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4750 }
4751 }
4752 }
4753
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_div_8_strided_a)4754 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_a) {
4755 TEST_REQUIRES_ARM_NEON;
4756 for (uint32_t n = 16; n <= 24; n += 8) {
4757 for (size_t k = 1; k <= 10; k += 3) {
4758 GemmMicrokernelTester()
4759 .mr(1)
4760 .nr(8)
4761 .kr(1)
4762 .sr(1)
4763 .m(1)
4764 .n(n)
4765 .k(k)
4766 .a_stride(13)
4767 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4768 }
4769 }
4770 }
4771
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,n_div_8_subtile)4772 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8_subtile) {
4773 TEST_REQUIRES_ARM_NEON;
4774 for (uint32_t n = 16; n <= 24; n += 8) {
4775 for (size_t k = 1; k <= 10; k += 3) {
4776 for (uint32_t m = 1; m <= 1; m++) {
4777 GemmMicrokernelTester()
4778 .mr(1)
4779 .nr(8)
4780 .kr(1)
4781 .sr(1)
4782 .m(m)
4783 .n(n)
4784 .k(k)
4785 .iterations(1)
4786 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4787 }
4788 }
4789 }
4790 }
4791
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,strided_cm_subtile)4792 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, strided_cm_subtile) {
4793 TEST_REQUIRES_ARM_NEON;
4794 for (size_t k = 1; k <= 10; k += 3) {
4795 for (uint32_t n = 1; n <= 8; n++) {
4796 for (uint32_t m = 1; m <= 1; m++) {
4797 GemmMicrokernelTester()
4798 .mr(1)
4799 .nr(8)
4800 .kr(1)
4801 .sr(1)
4802 .m(m)
4803 .n(n)
4804 .k(k)
4805 .cm_stride(11)
4806 .iterations(1)
4807 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4808 }
4809 }
4810 }
4811 }
4812
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,qmin)4813 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, qmin) {
4814 TEST_REQUIRES_ARM_NEON;
4815 GemmMicrokernelTester()
4816 .mr(1)
4817 .nr(8)
4818 .kr(1)
4819 .sr(1)
4820 .m(1)
4821 .n(8)
4822 .k(2)
4823 .qmin(128)
4824 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4825 }
4826
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,qmax)4827 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, qmax) {
4828 TEST_REQUIRES_ARM_NEON;
4829 GemmMicrokernelTester()
4830 .mr(1)
4831 .nr(8)
4832 .kr(1)
4833 .sr(1)
4834 .m(1)
4835 .n(8)
4836 .k(2)
4837 .qmax(128)
4838 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4839 }
4840
TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64,strided_cm)4841 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, strided_cm) {
4842 TEST_REQUIRES_ARM_NEON;
4843 GemmMicrokernelTester()
4844 .mr(1)
4845 .nr(8)
4846 .kr(1)
4847 .sr(1)
4848 .m(1)
4849 .n(8)
4850 .k(2)
4851 .cm_stride(11)
4852 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4853 }
4854 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4855
4856
4857 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2)4858 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2) {
4859 TEST_REQUIRES_ARM_NEON_FMA;
4860 GemmMicrokernelTester()
4861 .mr(1)
4862 .nr(8)
4863 .kr(1)
4864 .sr(1)
4865 .m(1)
4866 .n(8)
4867 .k(2)
4868 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4869 }
4870
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,strided_cn)4871 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cn) {
4872 TEST_REQUIRES_ARM_NEON_FMA;
4873 GemmMicrokernelTester()
4874 .mr(1)
4875 .nr(8)
4876 .kr(1)
4877 .sr(1)
4878 .m(1)
4879 .n(8)
4880 .k(2)
4881 .cn_stride(11)
4882 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4883 }
4884
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2_strided_a)4885 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_strided_a) {
4886 TEST_REQUIRES_ARM_NEON_FMA;
4887 GemmMicrokernelTester()
4888 .mr(1)
4889 .nr(8)
4890 .kr(1)
4891 .sr(1)
4892 .m(1)
4893 .n(8)
4894 .k(2)
4895 .a_stride(5)
4896 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4897 }
4898
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2_subtile)4899 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
4900 TEST_REQUIRES_ARM_NEON_FMA;
4901 for (uint32_t n = 1; n <= 8; n++) {
4902 for (uint32_t m = 1; m <= 1; m++) {
4903 GemmMicrokernelTester()
4904 .mr(1)
4905 .nr(8)
4906 .kr(1)
4907 .sr(1)
4908 .m(m)
4909 .n(n)
4910 .k(2)
4911 .iterations(1)
4912 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4913 }
4914 }
4915 }
4916
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2_subtile_m)4917 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
4918 TEST_REQUIRES_ARM_NEON_FMA;
4919 for (uint32_t m = 1; m <= 1; m++) {
4920 GemmMicrokernelTester()
4921 .mr(1)
4922 .nr(8)
4923 .kr(1)
4924 .sr(1)
4925 .m(m)
4926 .n(8)
4927 .k(2)
4928 .iterations(1)
4929 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4930 }
4931 }
4932
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2_subtile_n)4933 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
4934 TEST_REQUIRES_ARM_NEON_FMA;
4935 for (uint32_t n = 1; n <= 8; n++) {
4936 GemmMicrokernelTester()
4937 .mr(1)
4938 .nr(8)
4939 .kr(1)
4940 .sr(1)
4941 .m(1)
4942 .n(n)
4943 .k(2)
4944 .iterations(1)
4945 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4946 }
4947 }
4948
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_lt_2)4949 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2) {
4950 TEST_REQUIRES_ARM_NEON_FMA;
4951 for (size_t k = 1; k < 2; k++) {
4952 GemmMicrokernelTester()
4953 .mr(1)
4954 .nr(8)
4955 .kr(1)
4956 .sr(1)
4957 .m(1)
4958 .n(8)
4959 .k(k)
4960 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4961 }
4962 }
4963
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_lt_2_strided_a)4964 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2_strided_a) {
4965 TEST_REQUIRES_ARM_NEON_FMA;
4966 for (size_t k = 1; k < 2; k++) {
4967 GemmMicrokernelTester()
4968 .mr(1)
4969 .nr(8)
4970 .kr(1)
4971 .sr(1)
4972 .m(1)
4973 .n(8)
4974 .k(k)
4975 .a_stride(5)
4976 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4977 }
4978 }
4979
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_lt_2_subtile)4980 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
4981 TEST_REQUIRES_ARM_NEON_FMA;
4982 for (size_t k = 1; k < 2; k++) {
4983 for (uint32_t n = 1; n <= 8; n++) {
4984 for (uint32_t m = 1; m <= 1; m++) {
4985 GemmMicrokernelTester()
4986 .mr(1)
4987 .nr(8)
4988 .kr(1)
4989 .sr(1)
4990 .m(m)
4991 .n(n)
4992 .k(k)
4993 .iterations(1)
4994 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
4995 }
4996 }
4997 }
4998 }
4999
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_gt_2)5000 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2) {
5001 TEST_REQUIRES_ARM_NEON_FMA;
5002 for (size_t k = 3; k < 4; k++) {
5003 GemmMicrokernelTester()
5004 .mr(1)
5005 .nr(8)
5006 .kr(1)
5007 .sr(1)
5008 .m(1)
5009 .n(8)
5010 .k(k)
5011 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5012 }
5013 }
5014
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_gt_2_strided_a)5015 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2_strided_a) {
5016 TEST_REQUIRES_ARM_NEON_FMA;
5017 for (size_t k = 3; k < 4; k++) {
5018 GemmMicrokernelTester()
5019 .mr(1)
5020 .nr(8)
5021 .kr(1)
5022 .sr(1)
5023 .m(1)
5024 .n(8)
5025 .k(k)
5026 .a_stride(7)
5027 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5028 }
5029 }
5030
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_gt_2_subtile)5031 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
5032 TEST_REQUIRES_ARM_NEON_FMA;
5033 for (size_t k = 3; k < 4; k++) {
5034 for (uint32_t n = 1; n <= 8; n++) {
5035 for (uint32_t m = 1; m <= 1; m++) {
5036 GemmMicrokernelTester()
5037 .mr(1)
5038 .nr(8)
5039 .kr(1)
5040 .sr(1)
5041 .m(m)
5042 .n(n)
5043 .k(k)
5044 .iterations(1)
5045 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5046 }
5047 }
5048 }
5049 }
5050
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_div_2)5051 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2) {
5052 TEST_REQUIRES_ARM_NEON_FMA;
5053 for (size_t k = 4; k <= 20; k += 2) {
5054 GemmMicrokernelTester()
5055 .mr(1)
5056 .nr(8)
5057 .kr(1)
5058 .sr(1)
5059 .m(1)
5060 .n(8)
5061 .k(k)
5062 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5063 }
5064 }
5065
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_div_2_strided_a)5066 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2_strided_a) {
5067 TEST_REQUIRES_ARM_NEON_FMA;
5068 for (size_t k = 4; k <= 20; k += 2) {
5069 GemmMicrokernelTester()
5070 .mr(1)
5071 .nr(8)
5072 .kr(1)
5073 .sr(1)
5074 .m(1)
5075 .n(8)
5076 .k(k)
5077 .a_stride(23)
5078 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5079 }
5080 }
5081
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,k_div_2_subtile)5082 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
5083 TEST_REQUIRES_ARM_NEON_FMA;
5084 for (size_t k = 4; k <= 20; k += 2) {
5085 for (uint32_t n = 1; n <= 8; n++) {
5086 for (uint32_t m = 1; m <= 1; m++) {
5087 GemmMicrokernelTester()
5088 .mr(1)
5089 .nr(8)
5090 .kr(1)
5091 .sr(1)
5092 .m(m)
5093 .n(n)
5094 .k(k)
5095 .iterations(1)
5096 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5097 }
5098 }
5099 }
5100 }
5101
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8)5102 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8) {
5103 TEST_REQUIRES_ARM_NEON_FMA;
5104 for (uint32_t n = 9; n < 16; n++) {
5105 for (size_t k = 1; k <= 10; k += 3) {
5106 GemmMicrokernelTester()
5107 .mr(1)
5108 .nr(8)
5109 .kr(1)
5110 .sr(1)
5111 .m(1)
5112 .n(n)
5113 .k(k)
5114 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5115 }
5116 }
5117 }
5118
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8_strided_cn)5119 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
5120 TEST_REQUIRES_ARM_NEON_FMA;
5121 for (uint32_t n = 9; n < 16; n++) {
5122 for (size_t k = 1; k <= 10; k += 3) {
5123 GemmMicrokernelTester()
5124 .mr(1)
5125 .nr(8)
5126 .kr(1)
5127 .sr(1)
5128 .m(1)
5129 .n(n)
5130 .k(k)
5131 .cn_stride(11)
5132 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5133 }
5134 }
5135 }
5136
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8_strided_a)5137 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_strided_a) {
5138 TEST_REQUIRES_ARM_NEON_FMA;
5139 for (uint32_t n = 9; n < 16; n++) {
5140 for (size_t k = 1; k <= 10; k += 3) {
5141 GemmMicrokernelTester()
5142 .mr(1)
5143 .nr(8)
5144 .kr(1)
5145 .sr(1)
5146 .m(1)
5147 .n(n)
5148 .k(k)
5149 .a_stride(13)
5150 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5151 }
5152 }
5153 }
5154
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8_subtile)5155 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
5156 TEST_REQUIRES_ARM_NEON_FMA;
5157 for (uint32_t n = 9; n < 16; n++) {
5158 for (size_t k = 1; k <= 10; k += 3) {
5159 for (uint32_t m = 1; m <= 1; m++) {
5160 GemmMicrokernelTester()
5161 .mr(1)
5162 .nr(8)
5163 .kr(1)
5164 .sr(1)
5165 .m(m)
5166 .n(n)
5167 .k(k)
5168 .iterations(1)
5169 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5170 }
5171 }
5172 }
5173 }
5174
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8)5175 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8) {
5176 TEST_REQUIRES_ARM_NEON_FMA;
5177 for (uint32_t n = 16; n <= 24; n += 8) {
5178 for (size_t k = 1; k <= 10; k += 3) {
5179 GemmMicrokernelTester()
5180 .mr(1)
5181 .nr(8)
5182 .kr(1)
5183 .sr(1)
5184 .m(1)
5185 .n(n)
5186 .k(k)
5187 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5188 }
5189 }
5190 }
5191
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8_strided_cn)5192 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
5193 TEST_REQUIRES_ARM_NEON_FMA;
5194 for (uint32_t n = 16; n <= 24; n += 8) {
5195 for (size_t k = 1; k <= 10; k += 3) {
5196 GemmMicrokernelTester()
5197 .mr(1)
5198 .nr(8)
5199 .kr(1)
5200 .sr(1)
5201 .m(1)
5202 .n(n)
5203 .k(k)
5204 .cn_stride(11)
5205 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5206 }
5207 }
5208 }
5209
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8_strided_a)5210 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_strided_a) {
5211 TEST_REQUIRES_ARM_NEON_FMA;
5212 for (uint32_t n = 16; n <= 24; n += 8) {
5213 for (size_t k = 1; k <= 10; k += 3) {
5214 GemmMicrokernelTester()
5215 .mr(1)
5216 .nr(8)
5217 .kr(1)
5218 .sr(1)
5219 .m(1)
5220 .n(n)
5221 .k(k)
5222 .a_stride(13)
5223 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5224 }
5225 }
5226 }
5227
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8_subtile)5228 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
5229 TEST_REQUIRES_ARM_NEON_FMA;
5230 for (uint32_t n = 16; n <= 24; n += 8) {
5231 for (size_t k = 1; k <= 10; k += 3) {
5232 for (uint32_t m = 1; m <= 1; m++) {
5233 GemmMicrokernelTester()
5234 .mr(1)
5235 .nr(8)
5236 .kr(1)
5237 .sr(1)
5238 .m(m)
5239 .n(n)
5240 .k(k)
5241 .iterations(1)
5242 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5243 }
5244 }
5245 }
5246 }
5247
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,strided_cm_subtile)5248 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
5249 TEST_REQUIRES_ARM_NEON_FMA;
5250 for (size_t k = 1; k <= 10; k += 3) {
5251 for (uint32_t n = 1; n <= 8; n++) {
5252 for (uint32_t m = 1; m <= 1; m++) {
5253 GemmMicrokernelTester()
5254 .mr(1)
5255 .nr(8)
5256 .kr(1)
5257 .sr(1)
5258 .m(m)
5259 .n(n)
5260 .k(k)
5261 .cm_stride(11)
5262 .iterations(1)
5263 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5264 }
5265 }
5266 }
5267 }
5268
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,qmin)5269 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, qmin) {
5270 TEST_REQUIRES_ARM_NEON_FMA;
5271 GemmMicrokernelTester()
5272 .mr(1)
5273 .nr(8)
5274 .kr(1)
5275 .sr(1)
5276 .m(1)
5277 .n(8)
5278 .k(2)
5279 .qmin(128)
5280 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5281 }
5282
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,qmax)5283 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, qmax) {
5284 TEST_REQUIRES_ARM_NEON_FMA;
5285 GemmMicrokernelTester()
5286 .mr(1)
5287 .nr(8)
5288 .kr(1)
5289 .sr(1)
5290 .m(1)
5291 .n(8)
5292 .k(2)
5293 .qmax(128)
5294 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5295 }
5296
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64,strided_cm)5297 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm) {
5298 TEST_REQUIRES_ARM_NEON_FMA;
5299 GemmMicrokernelTester()
5300 .mr(1)
5301 .nr(8)
5302 .kr(1)
5303 .sr(1)
5304 .m(1)
5305 .n(8)
5306 .k(2)
5307 .cm_stride(11)
5308 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
5309 }
5310 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5311
5312
5313 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_eq_4)5314 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4) {
5315 TEST_REQUIRES_ARM_NEON_FMA;
5316 GemmMicrokernelTester()
5317 .mr(1)
5318 .nr(8)
5319 .kr(1)
5320 .sr(4)
5321 .m(1)
5322 .n(8)
5323 .k(4)
5324 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5325 }
5326
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,strided_cn)5327 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, strided_cn) {
5328 TEST_REQUIRES_ARM_NEON_FMA;
5329 GemmMicrokernelTester()
5330 .mr(1)
5331 .nr(8)
5332 .kr(1)
5333 .sr(4)
5334 .m(1)
5335 .n(8)
5336 .k(4)
5337 .cn_stride(11)
5338 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5339 }
5340
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_eq_4_strided_a)5341 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_strided_a) {
5342 TEST_REQUIRES_ARM_NEON_FMA;
5343 GemmMicrokernelTester()
5344 .mr(1)
5345 .nr(8)
5346 .kr(1)
5347 .sr(4)
5348 .m(1)
5349 .n(8)
5350 .k(4)
5351 .a_stride(7)
5352 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5353 }
5354
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_eq_4_subtile)5355 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile) {
5356 TEST_REQUIRES_ARM_NEON_FMA;
5357 for (uint32_t n = 1; n <= 8; n++) {
5358 for (uint32_t m = 1; m <= 1; m++) {
5359 GemmMicrokernelTester()
5360 .mr(1)
5361 .nr(8)
5362 .kr(1)
5363 .sr(4)
5364 .m(m)
5365 .n(n)
5366 .k(4)
5367 .iterations(1)
5368 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5369 }
5370 }
5371 }
5372
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_eq_4_subtile_m)5373 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_m) {
5374 TEST_REQUIRES_ARM_NEON_FMA;
5375 for (uint32_t m = 1; m <= 1; m++) {
5376 GemmMicrokernelTester()
5377 .mr(1)
5378 .nr(8)
5379 .kr(1)
5380 .sr(4)
5381 .m(m)
5382 .n(8)
5383 .k(4)
5384 .iterations(1)
5385 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5386 }
5387 }
5388
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_eq_4_subtile_n)5389 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_n) {
5390 TEST_REQUIRES_ARM_NEON_FMA;
5391 for (uint32_t n = 1; n <= 8; n++) {
5392 GemmMicrokernelTester()
5393 .mr(1)
5394 .nr(8)
5395 .kr(1)
5396 .sr(4)
5397 .m(1)
5398 .n(n)
5399 .k(4)
5400 .iterations(1)
5401 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5402 }
5403 }
5404
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_lt_4)5405 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_lt_4) {
5406 TEST_REQUIRES_ARM_NEON_FMA;
5407 for (size_t k = 1; k < 4; k++) {
5408 GemmMicrokernelTester()
5409 .mr(1)
5410 .nr(8)
5411 .kr(1)
5412 .sr(4)
5413 .m(1)
5414 .n(8)
5415 .k(k)
5416 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5417 }
5418 }
5419
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_lt_4_strided_a)5420 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_lt_4_strided_a) {
5421 TEST_REQUIRES_ARM_NEON_FMA;
5422 for (size_t k = 1; k < 4; k++) {
5423 GemmMicrokernelTester()
5424 .mr(1)
5425 .nr(8)
5426 .kr(1)
5427 .sr(4)
5428 .m(1)
5429 .n(8)
5430 .k(k)
5431 .a_stride(7)
5432 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5433 }
5434 }
5435
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_lt_4_subtile)5436 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_lt_4_subtile) {
5437 TEST_REQUIRES_ARM_NEON_FMA;
5438 for (size_t k = 1; k < 4; k++) {
5439 for (uint32_t n = 1; n <= 8; n++) {
5440 for (uint32_t m = 1; m <= 1; m++) {
5441 GemmMicrokernelTester()
5442 .mr(1)
5443 .nr(8)
5444 .kr(1)
5445 .sr(4)
5446 .m(m)
5447 .n(n)
5448 .k(k)
5449 .iterations(1)
5450 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5451 }
5452 }
5453 }
5454 }
5455
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_gt_4)5456 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_gt_4) {
5457 TEST_REQUIRES_ARM_NEON_FMA;
5458 for (size_t k = 5; k < 8; k++) {
5459 GemmMicrokernelTester()
5460 .mr(1)
5461 .nr(8)
5462 .kr(1)
5463 .sr(4)
5464 .m(1)
5465 .n(8)
5466 .k(k)
5467 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5468 }
5469 }
5470
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_gt_4_strided_a)5471 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_gt_4_strided_a) {
5472 TEST_REQUIRES_ARM_NEON_FMA;
5473 for (size_t k = 5; k < 8; k++) {
5474 GemmMicrokernelTester()
5475 .mr(1)
5476 .nr(8)
5477 .kr(1)
5478 .sr(4)
5479 .m(1)
5480 .n(8)
5481 .k(k)
5482 .a_stride(11)
5483 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5484 }
5485 }
5486
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_gt_4_subtile)5487 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_gt_4_subtile) {
5488 TEST_REQUIRES_ARM_NEON_FMA;
5489 for (size_t k = 5; k < 8; k++) {
5490 for (uint32_t n = 1; n <= 8; n++) {
5491 for (uint32_t m = 1; m <= 1; m++) {
5492 GemmMicrokernelTester()
5493 .mr(1)
5494 .nr(8)
5495 .kr(1)
5496 .sr(4)
5497 .m(m)
5498 .n(n)
5499 .k(k)
5500 .iterations(1)
5501 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5502 }
5503 }
5504 }
5505 }
5506
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_div_4)5507 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_div_4) {
5508 TEST_REQUIRES_ARM_NEON_FMA;
5509 for (size_t k = 8; k <= 40; k += 4) {
5510 GemmMicrokernelTester()
5511 .mr(1)
5512 .nr(8)
5513 .kr(1)
5514 .sr(4)
5515 .m(1)
5516 .n(8)
5517 .k(k)
5518 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5519 }
5520 }
5521
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_div_4_strided_a)5522 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_div_4_strided_a) {
5523 TEST_REQUIRES_ARM_NEON_FMA;
5524 for (size_t k = 8; k <= 40; k += 4) {
5525 GemmMicrokernelTester()
5526 .mr(1)
5527 .nr(8)
5528 .kr(1)
5529 .sr(4)
5530 .m(1)
5531 .n(8)
5532 .k(k)
5533 .a_stride(43)
5534 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5535 }
5536 }
5537
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,k_div_4_subtile)5538 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_div_4_subtile) {
5539 TEST_REQUIRES_ARM_NEON_FMA;
5540 for (size_t k = 8; k <= 40; k += 4) {
5541 for (uint32_t n = 1; n <= 8; n++) {
5542 for (uint32_t m = 1; m <= 1; m++) {
5543 GemmMicrokernelTester()
5544 .mr(1)
5545 .nr(8)
5546 .kr(1)
5547 .sr(4)
5548 .m(m)
5549 .n(n)
5550 .k(k)
5551 .iterations(1)
5552 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5553 }
5554 }
5555 }
5556 }
5557
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_gt_8)5558 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8) {
5559 TEST_REQUIRES_ARM_NEON_FMA;
5560 for (uint32_t n = 9; n < 16; n++) {
5561 for (size_t k = 1; k <= 20; k += 5) {
5562 GemmMicrokernelTester()
5563 .mr(1)
5564 .nr(8)
5565 .kr(1)
5566 .sr(4)
5567 .m(1)
5568 .n(n)
5569 .k(k)
5570 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5571 }
5572 }
5573 }
5574
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_gt_8_strided_cn)5575 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_cn) {
5576 TEST_REQUIRES_ARM_NEON_FMA;
5577 for (uint32_t n = 9; n < 16; n++) {
5578 for (size_t k = 1; k <= 20; k += 5) {
5579 GemmMicrokernelTester()
5580 .mr(1)
5581 .nr(8)
5582 .kr(1)
5583 .sr(4)
5584 .m(1)
5585 .n(n)
5586 .k(k)
5587 .cn_stride(11)
5588 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5589 }
5590 }
5591 }
5592
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_gt_8_strided_a)5593 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_a) {
5594 TEST_REQUIRES_ARM_NEON_FMA;
5595 for (uint32_t n = 9; n < 16; n++) {
5596 for (size_t k = 1; k <= 20; k += 5) {
5597 GemmMicrokernelTester()
5598 .mr(1)
5599 .nr(8)
5600 .kr(1)
5601 .sr(4)
5602 .m(1)
5603 .n(n)
5604 .k(k)
5605 .a_stride(23)
5606 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5607 }
5608 }
5609 }
5610
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_gt_8_subtile)5611 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8_subtile) {
5612 TEST_REQUIRES_ARM_NEON_FMA;
5613 for (uint32_t n = 9; n < 16; n++) {
5614 for (size_t k = 1; k <= 20; k += 5) {
5615 for (uint32_t m = 1; m <= 1; m++) {
5616 GemmMicrokernelTester()
5617 .mr(1)
5618 .nr(8)
5619 .kr(1)
5620 .sr(4)
5621 .m(m)
5622 .n(n)
5623 .k(k)
5624 .iterations(1)
5625 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5626 }
5627 }
5628 }
5629 }
5630
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_div_8)5631 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8) {
5632 TEST_REQUIRES_ARM_NEON_FMA;
5633 for (uint32_t n = 16; n <= 24; n += 8) {
5634 for (size_t k = 1; k <= 20; k += 5) {
5635 GemmMicrokernelTester()
5636 .mr(1)
5637 .nr(8)
5638 .kr(1)
5639 .sr(4)
5640 .m(1)
5641 .n(n)
5642 .k(k)
5643 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5644 }
5645 }
5646 }
5647
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_div_8_strided_cn)5648 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8_strided_cn) {
5649 TEST_REQUIRES_ARM_NEON_FMA;
5650 for (uint32_t n = 16; n <= 24; n += 8) {
5651 for (size_t k = 1; k <= 20; k += 5) {
5652 GemmMicrokernelTester()
5653 .mr(1)
5654 .nr(8)
5655 .kr(1)
5656 .sr(4)
5657 .m(1)
5658 .n(n)
5659 .k(k)
5660 .cn_stride(11)
5661 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5662 }
5663 }
5664 }
5665
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_div_8_strided_a)5666 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8_strided_a) {
5667 TEST_REQUIRES_ARM_NEON_FMA;
5668 for (uint32_t n = 16; n <= 24; n += 8) {
5669 for (size_t k = 1; k <= 20; k += 5) {
5670 GemmMicrokernelTester()
5671 .mr(1)
5672 .nr(8)
5673 .kr(1)
5674 .sr(4)
5675 .m(1)
5676 .n(n)
5677 .k(k)
5678 .a_stride(23)
5679 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5680 }
5681 }
5682 }
5683
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,n_div_8_subtile)5684 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8_subtile) {
5685 TEST_REQUIRES_ARM_NEON_FMA;
5686 for (uint32_t n = 16; n <= 24; n += 8) {
5687 for (size_t k = 1; k <= 20; k += 5) {
5688 for (uint32_t m = 1; m <= 1; m++) {
5689 GemmMicrokernelTester()
5690 .mr(1)
5691 .nr(8)
5692 .kr(1)
5693 .sr(4)
5694 .m(m)
5695 .n(n)
5696 .k(k)
5697 .iterations(1)
5698 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5699 }
5700 }
5701 }
5702 }
5703
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,strided_cm_subtile)5704 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, strided_cm_subtile) {
5705 TEST_REQUIRES_ARM_NEON_FMA;
5706 for (size_t k = 1; k <= 20; k += 5) {
5707 for (uint32_t n = 1; n <= 8; n++) {
5708 for (uint32_t m = 1; m <= 1; m++) {
5709 GemmMicrokernelTester()
5710 .mr(1)
5711 .nr(8)
5712 .kr(1)
5713 .sr(4)
5714 .m(m)
5715 .n(n)
5716 .k(k)
5717 .cm_stride(11)
5718 .iterations(1)
5719 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5720 }
5721 }
5722 }
5723 }
5724
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,qmin)5725 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, qmin) {
5726 TEST_REQUIRES_ARM_NEON_FMA;
5727 GemmMicrokernelTester()
5728 .mr(1)
5729 .nr(8)
5730 .kr(1)
5731 .sr(4)
5732 .m(1)
5733 .n(8)
5734 .k(4)
5735 .qmin(128)
5736 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5737 }
5738
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,qmax)5739 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, qmax) {
5740 TEST_REQUIRES_ARM_NEON_FMA;
5741 GemmMicrokernelTester()
5742 .mr(1)
5743 .nr(8)
5744 .kr(1)
5745 .sr(4)
5746 .m(1)
5747 .n(8)
5748 .k(4)
5749 .qmax(128)
5750 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5751 }
5752
TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA,strided_cm)5753 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, strided_cm) {
5754 TEST_REQUIRES_ARM_NEON_FMA;
5755 GemmMicrokernelTester()
5756 .mr(1)
5757 .nr(8)
5758 .kr(1)
5759 .sr(4)
5760 .m(1)
5761 .n(8)
5762 .k(4)
5763 .cm_stride(11)
5764 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
5765 }
5766 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5767
5768
5769 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_eq_2)5770 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2) {
5771 TEST_REQUIRES_ARM_NEON;
5772 GemmMicrokernelTester()
5773 .mr(4)
5774 .nr(8)
5775 .kr(1)
5776 .sr(1)
5777 .m(4)
5778 .n(8)
5779 .k(2)
5780 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5781 }
5782
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,strided_cn)5783 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, strided_cn) {
5784 TEST_REQUIRES_ARM_NEON;
5785 GemmMicrokernelTester()
5786 .mr(4)
5787 .nr(8)
5788 .kr(1)
5789 .sr(1)
5790 .m(4)
5791 .n(8)
5792 .k(2)
5793 .cn_stride(11)
5794 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5795 }
5796
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_eq_2_strided_a)5797 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_strided_a) {
5798 TEST_REQUIRES_ARM_NEON;
5799 GemmMicrokernelTester()
5800 .mr(4)
5801 .nr(8)
5802 .kr(1)
5803 .sr(1)
5804 .m(4)
5805 .n(8)
5806 .k(2)
5807 .a_stride(5)
5808 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5809 }
5810
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_eq_2_subtile)5811 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile) {
5812 TEST_REQUIRES_ARM_NEON;
5813 for (uint32_t n = 1; n <= 8; n++) {
5814 for (uint32_t m = 1; m <= 4; m++) {
5815 GemmMicrokernelTester()
5816 .mr(4)
5817 .nr(8)
5818 .kr(1)
5819 .sr(1)
5820 .m(m)
5821 .n(n)
5822 .k(2)
5823 .iterations(1)
5824 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5825 }
5826 }
5827 }
5828
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_eq_2_subtile_m)5829 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
5830 TEST_REQUIRES_ARM_NEON;
5831 for (uint32_t m = 1; m <= 4; m++) {
5832 GemmMicrokernelTester()
5833 .mr(4)
5834 .nr(8)
5835 .kr(1)
5836 .sr(1)
5837 .m(m)
5838 .n(8)
5839 .k(2)
5840 .iterations(1)
5841 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5842 }
5843 }
5844
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_eq_2_subtile_n)5845 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
5846 TEST_REQUIRES_ARM_NEON;
5847 for (uint32_t n = 1; n <= 8; n++) {
5848 GemmMicrokernelTester()
5849 .mr(4)
5850 .nr(8)
5851 .kr(1)
5852 .sr(1)
5853 .m(4)
5854 .n(n)
5855 .k(2)
5856 .iterations(1)
5857 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5858 }
5859 }
5860
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_lt_2)5861 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_lt_2) {
5862 TEST_REQUIRES_ARM_NEON;
5863 for (size_t k = 1; k < 2; k++) {
5864 GemmMicrokernelTester()
5865 .mr(4)
5866 .nr(8)
5867 .kr(1)
5868 .sr(1)
5869 .m(4)
5870 .n(8)
5871 .k(k)
5872 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5873 }
5874 }
5875
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_lt_2_strided_a)5876 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_lt_2_strided_a) {
5877 TEST_REQUIRES_ARM_NEON;
5878 for (size_t k = 1; k < 2; k++) {
5879 GemmMicrokernelTester()
5880 .mr(4)
5881 .nr(8)
5882 .kr(1)
5883 .sr(1)
5884 .m(4)
5885 .n(8)
5886 .k(k)
5887 .a_stride(5)
5888 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5889 }
5890 }
5891
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_lt_2_subtile)5892 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_lt_2_subtile) {
5893 TEST_REQUIRES_ARM_NEON;
5894 for (size_t k = 1; k < 2; k++) {
5895 for (uint32_t n = 1; n <= 8; n++) {
5896 for (uint32_t m = 1; m <= 4; m++) {
5897 GemmMicrokernelTester()
5898 .mr(4)
5899 .nr(8)
5900 .kr(1)
5901 .sr(1)
5902 .m(m)
5903 .n(n)
5904 .k(k)
5905 .iterations(1)
5906 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5907 }
5908 }
5909 }
5910 }
5911
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_gt_2)5912 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_gt_2) {
5913 TEST_REQUIRES_ARM_NEON;
5914 for (size_t k = 3; k < 4; k++) {
5915 GemmMicrokernelTester()
5916 .mr(4)
5917 .nr(8)
5918 .kr(1)
5919 .sr(1)
5920 .m(4)
5921 .n(8)
5922 .k(k)
5923 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5924 }
5925 }
5926
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_gt_2_strided_a)5927 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_gt_2_strided_a) {
5928 TEST_REQUIRES_ARM_NEON;
5929 for (size_t k = 3; k < 4; k++) {
5930 GemmMicrokernelTester()
5931 .mr(4)
5932 .nr(8)
5933 .kr(1)
5934 .sr(1)
5935 .m(4)
5936 .n(8)
5937 .k(k)
5938 .a_stride(7)
5939 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5940 }
5941 }
5942
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_gt_2_subtile)5943 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_gt_2_subtile) {
5944 TEST_REQUIRES_ARM_NEON;
5945 for (size_t k = 3; k < 4; k++) {
5946 for (uint32_t n = 1; n <= 8; n++) {
5947 for (uint32_t m = 1; m <= 4; m++) {
5948 GemmMicrokernelTester()
5949 .mr(4)
5950 .nr(8)
5951 .kr(1)
5952 .sr(1)
5953 .m(m)
5954 .n(n)
5955 .k(k)
5956 .iterations(1)
5957 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5958 }
5959 }
5960 }
5961 }
5962
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_div_2)5963 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_div_2) {
5964 TEST_REQUIRES_ARM_NEON;
5965 for (size_t k = 4; k <= 20; k += 2) {
5966 GemmMicrokernelTester()
5967 .mr(4)
5968 .nr(8)
5969 .kr(1)
5970 .sr(1)
5971 .m(4)
5972 .n(8)
5973 .k(k)
5974 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5975 }
5976 }
5977
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_div_2_strided_a)5978 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_div_2_strided_a) {
5979 TEST_REQUIRES_ARM_NEON;
5980 for (size_t k = 4; k <= 20; k += 2) {
5981 GemmMicrokernelTester()
5982 .mr(4)
5983 .nr(8)
5984 .kr(1)
5985 .sr(1)
5986 .m(4)
5987 .n(8)
5988 .k(k)
5989 .a_stride(23)
5990 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5991 }
5992 }
5993
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,k_div_2_subtile)5994 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_div_2_subtile) {
5995 TEST_REQUIRES_ARM_NEON;
5996 for (size_t k = 4; k <= 20; k += 2) {
5997 for (uint32_t n = 1; n <= 8; n++) {
5998 for (uint32_t m = 1; m <= 4; m++) {
5999 GemmMicrokernelTester()
6000 .mr(4)
6001 .nr(8)
6002 .kr(1)
6003 .sr(1)
6004 .m(m)
6005 .n(n)
6006 .k(k)
6007 .iterations(1)
6008 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6009 }
6010 }
6011 }
6012 }
6013
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_gt_8)6014 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8) {
6015 TEST_REQUIRES_ARM_NEON;
6016 for (uint32_t n = 9; n < 16; n++) {
6017 for (size_t k = 1; k <= 10; k += 3) {
6018 GemmMicrokernelTester()
6019 .mr(4)
6020 .nr(8)
6021 .kr(1)
6022 .sr(1)
6023 .m(4)
6024 .n(n)
6025 .k(k)
6026 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6027 }
6028 }
6029 }
6030
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_gt_8_strided_cn)6031 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
6032 TEST_REQUIRES_ARM_NEON;
6033 for (uint32_t n = 9; n < 16; n++) {
6034 for (size_t k = 1; k <= 10; k += 3) {
6035 GemmMicrokernelTester()
6036 .mr(4)
6037 .nr(8)
6038 .kr(1)
6039 .sr(1)
6040 .m(4)
6041 .n(n)
6042 .k(k)
6043 .cn_stride(11)
6044 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6045 }
6046 }
6047 }
6048
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_gt_8_strided_a)6049 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_strided_a) {
6050 TEST_REQUIRES_ARM_NEON;
6051 for (uint32_t n = 9; n < 16; n++) {
6052 for (size_t k = 1; k <= 10; k += 3) {
6053 GemmMicrokernelTester()
6054 .mr(4)
6055 .nr(8)
6056 .kr(1)
6057 .sr(1)
6058 .m(4)
6059 .n(n)
6060 .k(k)
6061 .a_stride(13)
6062 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6063 }
6064 }
6065 }
6066
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_gt_8_subtile)6067 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_subtile) {
6068 TEST_REQUIRES_ARM_NEON;
6069 for (uint32_t n = 9; n < 16; n++) {
6070 for (size_t k = 1; k <= 10; k += 3) {
6071 for (uint32_t m = 1; m <= 4; m++) {
6072 GemmMicrokernelTester()
6073 .mr(4)
6074 .nr(8)
6075 .kr(1)
6076 .sr(1)
6077 .m(m)
6078 .n(n)
6079 .k(k)
6080 .iterations(1)
6081 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6082 }
6083 }
6084 }
6085 }
6086
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_div_8)6087 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8) {
6088 TEST_REQUIRES_ARM_NEON;
6089 for (uint32_t n = 16; n <= 24; n += 8) {
6090 for (size_t k = 1; k <= 10; k += 3) {
6091 GemmMicrokernelTester()
6092 .mr(4)
6093 .nr(8)
6094 .kr(1)
6095 .sr(1)
6096 .m(4)
6097 .n(n)
6098 .k(k)
6099 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6100 }
6101 }
6102 }
6103
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_div_8_strided_cn)6104 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8_strided_cn) {
6105 TEST_REQUIRES_ARM_NEON;
6106 for (uint32_t n = 16; n <= 24; n += 8) {
6107 for (size_t k = 1; k <= 10; k += 3) {
6108 GemmMicrokernelTester()
6109 .mr(4)
6110 .nr(8)
6111 .kr(1)
6112 .sr(1)
6113 .m(4)
6114 .n(n)
6115 .k(k)
6116 .cn_stride(11)
6117 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6118 }
6119 }
6120 }
6121
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_div_8_strided_a)6122 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8_strided_a) {
6123 TEST_REQUIRES_ARM_NEON;
6124 for (uint32_t n = 16; n <= 24; n += 8) {
6125 for (size_t k = 1; k <= 10; k += 3) {
6126 GemmMicrokernelTester()
6127 .mr(4)
6128 .nr(8)
6129 .kr(1)
6130 .sr(1)
6131 .m(4)
6132 .n(n)
6133 .k(k)
6134 .a_stride(13)
6135 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6136 }
6137 }
6138 }
6139
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,n_div_8_subtile)6140 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8_subtile) {
6141 TEST_REQUIRES_ARM_NEON;
6142 for (uint32_t n = 16; n <= 24; n += 8) {
6143 for (size_t k = 1; k <= 10; k += 3) {
6144 for (uint32_t m = 1; m <= 4; m++) {
6145 GemmMicrokernelTester()
6146 .mr(4)
6147 .nr(8)
6148 .kr(1)
6149 .sr(1)
6150 .m(m)
6151 .n(n)
6152 .k(k)
6153 .iterations(1)
6154 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6155 }
6156 }
6157 }
6158 }
6159
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,strided_cm_subtile)6160 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, strided_cm_subtile) {
6161 TEST_REQUIRES_ARM_NEON;
6162 for (size_t k = 1; k <= 10; k += 3) {
6163 for (uint32_t n = 1; n <= 8; n++) {
6164 for (uint32_t m = 1; m <= 4; m++) {
6165 GemmMicrokernelTester()
6166 .mr(4)
6167 .nr(8)
6168 .kr(1)
6169 .sr(1)
6170 .m(m)
6171 .n(n)
6172 .k(k)
6173 .cm_stride(11)
6174 .iterations(1)
6175 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6176 }
6177 }
6178 }
6179 }
6180
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,qmin)6181 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, qmin) {
6182 TEST_REQUIRES_ARM_NEON;
6183 GemmMicrokernelTester()
6184 .mr(4)
6185 .nr(8)
6186 .kr(1)
6187 .sr(1)
6188 .m(4)
6189 .n(8)
6190 .k(2)
6191 .qmin(128)
6192 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6193 }
6194
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,qmax)6195 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, qmax) {
6196 TEST_REQUIRES_ARM_NEON;
6197 GemmMicrokernelTester()
6198 .mr(4)
6199 .nr(8)
6200 .kr(1)
6201 .sr(1)
6202 .m(4)
6203 .n(8)
6204 .k(2)
6205 .qmax(128)
6206 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6207 }
6208
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64,strided_cm)6209 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, strided_cm) {
6210 TEST_REQUIRES_ARM_NEON;
6211 GemmMicrokernelTester()
6212 .mr(4)
6213 .nr(8)
6214 .kr(1)
6215 .sr(1)
6216 .m(4)
6217 .n(8)
6218 .k(2)
6219 .cm_stride(11)
6220 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
6221 }
6222 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6223
6224
6225 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_eq_4)6226 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4) {
6227 TEST_REQUIRES_ARM_NEON;
6228 GemmMicrokernelTester()
6229 .mr(4)
6230 .nr(8)
6231 .kr(1)
6232 .sr(1)
6233 .m(4)
6234 .n(8)
6235 .k(4)
6236 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6237 }
6238
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,strided_cn)6239 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, strided_cn) {
6240 TEST_REQUIRES_ARM_NEON;
6241 GemmMicrokernelTester()
6242 .mr(4)
6243 .nr(8)
6244 .kr(1)
6245 .sr(1)
6246 .m(4)
6247 .n(8)
6248 .k(4)
6249 .cn_stride(11)
6250 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6251 }
6252
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_eq_4_strided_a)6253 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_strided_a) {
6254 TEST_REQUIRES_ARM_NEON;
6255 GemmMicrokernelTester()
6256 .mr(4)
6257 .nr(8)
6258 .kr(1)
6259 .sr(1)
6260 .m(4)
6261 .n(8)
6262 .k(4)
6263 .a_stride(7)
6264 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6265 }
6266
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_eq_4_subtile)6267 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile) {
6268 TEST_REQUIRES_ARM_NEON;
6269 for (uint32_t n = 1; n <= 8; n++) {
6270 for (uint32_t m = 1; m <= 4; m++) {
6271 GemmMicrokernelTester()
6272 .mr(4)
6273 .nr(8)
6274 .kr(1)
6275 .sr(1)
6276 .m(m)
6277 .n(n)
6278 .k(4)
6279 .iterations(1)
6280 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6281 }
6282 }
6283 }
6284
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_eq_4_subtile_m)6285 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
6286 TEST_REQUIRES_ARM_NEON;
6287 for (uint32_t m = 1; m <= 4; m++) {
6288 GemmMicrokernelTester()
6289 .mr(4)
6290 .nr(8)
6291 .kr(1)
6292 .sr(1)
6293 .m(m)
6294 .n(8)
6295 .k(4)
6296 .iterations(1)
6297 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6298 }
6299 }
6300
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_eq_4_subtile_n)6301 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
6302 TEST_REQUIRES_ARM_NEON;
6303 for (uint32_t n = 1; n <= 8; n++) {
6304 GemmMicrokernelTester()
6305 .mr(4)
6306 .nr(8)
6307 .kr(1)
6308 .sr(1)
6309 .m(4)
6310 .n(n)
6311 .k(4)
6312 .iterations(1)
6313 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6314 }
6315 }
6316
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_lt_4)6317 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_lt_4) {
6318 TEST_REQUIRES_ARM_NEON;
6319 for (size_t k = 1; k < 4; k++) {
6320 GemmMicrokernelTester()
6321 .mr(4)
6322 .nr(8)
6323 .kr(1)
6324 .sr(1)
6325 .m(4)
6326 .n(8)
6327 .k(k)
6328 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6329 }
6330 }
6331
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_lt_4_strided_a)6332 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_strided_a) {
6333 TEST_REQUIRES_ARM_NEON;
6334 for (size_t k = 1; k < 4; k++) {
6335 GemmMicrokernelTester()
6336 .mr(4)
6337 .nr(8)
6338 .kr(1)
6339 .sr(1)
6340 .m(4)
6341 .n(8)
6342 .k(k)
6343 .a_stride(7)
6344 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6345 }
6346 }
6347
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_lt_4_subtile)6348 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_subtile) {
6349 TEST_REQUIRES_ARM_NEON;
6350 for (size_t k = 1; k < 4; k++) {
6351 for (uint32_t n = 1; n <= 8; n++) {
6352 for (uint32_t m = 1; m <= 4; m++) {
6353 GemmMicrokernelTester()
6354 .mr(4)
6355 .nr(8)
6356 .kr(1)
6357 .sr(1)
6358 .m(m)
6359 .n(n)
6360 .k(k)
6361 .iterations(1)
6362 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6363 }
6364 }
6365 }
6366 }
6367
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_gt_4)6368 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_gt_4) {
6369 TEST_REQUIRES_ARM_NEON;
6370 for (size_t k = 5; k < 8; k++) {
6371 GemmMicrokernelTester()
6372 .mr(4)
6373 .nr(8)
6374 .kr(1)
6375 .sr(1)
6376 .m(4)
6377 .n(8)
6378 .k(k)
6379 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6380 }
6381 }
6382
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_gt_4_strided_a)6383 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_strided_a) {
6384 TEST_REQUIRES_ARM_NEON;
6385 for (size_t k = 5; k < 8; k++) {
6386 GemmMicrokernelTester()
6387 .mr(4)
6388 .nr(8)
6389 .kr(1)
6390 .sr(1)
6391 .m(4)
6392 .n(8)
6393 .k(k)
6394 .a_stride(11)
6395 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6396 }
6397 }
6398
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_gt_4_subtile)6399 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_subtile) {
6400 TEST_REQUIRES_ARM_NEON;
6401 for (size_t k = 5; k < 8; k++) {
6402 for (uint32_t n = 1; n <= 8; n++) {
6403 for (uint32_t m = 1; m <= 4; m++) {
6404 GemmMicrokernelTester()
6405 .mr(4)
6406 .nr(8)
6407 .kr(1)
6408 .sr(1)
6409 .m(m)
6410 .n(n)
6411 .k(k)
6412 .iterations(1)
6413 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6414 }
6415 }
6416 }
6417 }
6418
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_div_4)6419 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_div_4) {
6420 TEST_REQUIRES_ARM_NEON;
6421 for (size_t k = 8; k <= 40; k += 4) {
6422 GemmMicrokernelTester()
6423 .mr(4)
6424 .nr(8)
6425 .kr(1)
6426 .sr(1)
6427 .m(4)
6428 .n(8)
6429 .k(k)
6430 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6431 }
6432 }
6433
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_div_4_strided_a)6434 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_div_4_strided_a) {
6435 TEST_REQUIRES_ARM_NEON;
6436 for (size_t k = 8; k <= 40; k += 4) {
6437 GemmMicrokernelTester()
6438 .mr(4)
6439 .nr(8)
6440 .kr(1)
6441 .sr(1)
6442 .m(4)
6443 .n(8)
6444 .k(k)
6445 .a_stride(43)
6446 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6447 }
6448 }
6449
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,k_div_4_subtile)6450 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_div_4_subtile) {
6451 TEST_REQUIRES_ARM_NEON;
6452 for (size_t k = 8; k <= 40; k += 4) {
6453 for (uint32_t n = 1; n <= 8; n++) {
6454 for (uint32_t m = 1; m <= 4; m++) {
6455 GemmMicrokernelTester()
6456 .mr(4)
6457 .nr(8)
6458 .kr(1)
6459 .sr(1)
6460 .m(m)
6461 .n(n)
6462 .k(k)
6463 .iterations(1)
6464 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6465 }
6466 }
6467 }
6468 }
6469
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_gt_8)6470 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8) {
6471 TEST_REQUIRES_ARM_NEON;
6472 for (uint32_t n = 9; n < 16; n++) {
6473 for (size_t k = 1; k <= 20; k += 5) {
6474 GemmMicrokernelTester()
6475 .mr(4)
6476 .nr(8)
6477 .kr(1)
6478 .sr(1)
6479 .m(4)
6480 .n(n)
6481 .k(k)
6482 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6483 }
6484 }
6485 }
6486
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_gt_8_strided_cn)6487 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
6488 TEST_REQUIRES_ARM_NEON;
6489 for (uint32_t n = 9; n < 16; n++) {
6490 for (size_t k = 1; k <= 20; k += 5) {
6491 GemmMicrokernelTester()
6492 .mr(4)
6493 .nr(8)
6494 .kr(1)
6495 .sr(1)
6496 .m(4)
6497 .n(n)
6498 .k(k)
6499 .cn_stride(11)
6500 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6501 }
6502 }
6503 }
6504
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_gt_8_strided_a)6505 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_a) {
6506 TEST_REQUIRES_ARM_NEON;
6507 for (uint32_t n = 9; n < 16; n++) {
6508 for (size_t k = 1; k <= 20; k += 5) {
6509 GemmMicrokernelTester()
6510 .mr(4)
6511 .nr(8)
6512 .kr(1)
6513 .sr(1)
6514 .m(4)
6515 .n(n)
6516 .k(k)
6517 .a_stride(23)
6518 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6519 }
6520 }
6521 }
6522
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_gt_8_subtile)6523 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_subtile) {
6524 TEST_REQUIRES_ARM_NEON;
6525 for (uint32_t n = 9; n < 16; n++) {
6526 for (size_t k = 1; k <= 20; k += 5) {
6527 for (uint32_t m = 1; m <= 4; m++) {
6528 GemmMicrokernelTester()
6529 .mr(4)
6530 .nr(8)
6531 .kr(1)
6532 .sr(1)
6533 .m(m)
6534 .n(n)
6535 .k(k)
6536 .iterations(1)
6537 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6538 }
6539 }
6540 }
6541 }
6542
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_div_8)6543 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8) {
6544 TEST_REQUIRES_ARM_NEON;
6545 for (uint32_t n = 16; n <= 24; n += 8) {
6546 for (size_t k = 1; k <= 20; k += 5) {
6547 GemmMicrokernelTester()
6548 .mr(4)
6549 .nr(8)
6550 .kr(1)
6551 .sr(1)
6552 .m(4)
6553 .n(n)
6554 .k(k)
6555 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6556 }
6557 }
6558 }
6559
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_div_8_strided_cn)6560 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_cn) {
6561 TEST_REQUIRES_ARM_NEON;
6562 for (uint32_t n = 16; n <= 24; n += 8) {
6563 for (size_t k = 1; k <= 20; k += 5) {
6564 GemmMicrokernelTester()
6565 .mr(4)
6566 .nr(8)
6567 .kr(1)
6568 .sr(1)
6569 .m(4)
6570 .n(n)
6571 .k(k)
6572 .cn_stride(11)
6573 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6574 }
6575 }
6576 }
6577
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_div_8_strided_a)6578 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_a) {
6579 TEST_REQUIRES_ARM_NEON;
6580 for (uint32_t n = 16; n <= 24; n += 8) {
6581 for (size_t k = 1; k <= 20; k += 5) {
6582 GemmMicrokernelTester()
6583 .mr(4)
6584 .nr(8)
6585 .kr(1)
6586 .sr(1)
6587 .m(4)
6588 .n(n)
6589 .k(k)
6590 .a_stride(23)
6591 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6592 }
6593 }
6594 }
6595
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,n_div_8_subtile)6596 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8_subtile) {
6597 TEST_REQUIRES_ARM_NEON;
6598 for (uint32_t n = 16; n <= 24; n += 8) {
6599 for (size_t k = 1; k <= 20; k += 5) {
6600 for (uint32_t m = 1; m <= 4; m++) {
6601 GemmMicrokernelTester()
6602 .mr(4)
6603 .nr(8)
6604 .kr(1)
6605 .sr(1)
6606 .m(m)
6607 .n(n)
6608 .k(k)
6609 .iterations(1)
6610 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6611 }
6612 }
6613 }
6614 }
6615
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,strided_cm_subtile)6616 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, strided_cm_subtile) {
6617 TEST_REQUIRES_ARM_NEON;
6618 for (size_t k = 1; k <= 20; k += 5) {
6619 for (uint32_t n = 1; n <= 8; n++) {
6620 for (uint32_t m = 1; m <= 4; m++) {
6621 GemmMicrokernelTester()
6622 .mr(4)
6623 .nr(8)
6624 .kr(1)
6625 .sr(1)
6626 .m(m)
6627 .n(n)
6628 .k(k)
6629 .cm_stride(11)
6630 .iterations(1)
6631 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6632 }
6633 }
6634 }
6635 }
6636
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,qmin)6637 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, qmin) {
6638 TEST_REQUIRES_ARM_NEON;
6639 GemmMicrokernelTester()
6640 .mr(4)
6641 .nr(8)
6642 .kr(1)
6643 .sr(1)
6644 .m(4)
6645 .n(8)
6646 .k(4)
6647 .qmin(128)
6648 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6649 }
6650
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,qmax)6651 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, qmax) {
6652 TEST_REQUIRES_ARM_NEON;
6653 GemmMicrokernelTester()
6654 .mr(4)
6655 .nr(8)
6656 .kr(1)
6657 .sr(1)
6658 .m(4)
6659 .n(8)
6660 .k(4)
6661 .qmax(128)
6662 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6663 }
6664
TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128,strided_cm)6665 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, strided_cm) {
6666 TEST_REQUIRES_ARM_NEON;
6667 GemmMicrokernelTester()
6668 .mr(4)
6669 .nr(8)
6670 .kr(1)
6671 .sr(1)
6672 .m(4)
6673 .n(8)
6674 .k(4)
6675 .cm_stride(11)
6676 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
6677 }
6678 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6679
6680
6681 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_eq_2)6682 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2) {
6683 TEST_REQUIRES_ARM_NEON;
6684 GemmMicrokernelTester()
6685 .mr(4)
6686 .nr(8)
6687 .kr(1)
6688 .sr(1)
6689 .m(4)
6690 .n(8)
6691 .k(2)
6692 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6693 }
6694
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,strided_cn)6695 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, strided_cn) {
6696 TEST_REQUIRES_ARM_NEON;
6697 GemmMicrokernelTester()
6698 .mr(4)
6699 .nr(8)
6700 .kr(1)
6701 .sr(1)
6702 .m(4)
6703 .n(8)
6704 .k(2)
6705 .cn_stride(11)
6706 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6707 }
6708
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_eq_2_strided_a)6709 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_strided_a) {
6710 TEST_REQUIRES_ARM_NEON;
6711 GemmMicrokernelTester()
6712 .mr(4)
6713 .nr(8)
6714 .kr(1)
6715 .sr(1)
6716 .m(4)
6717 .n(8)
6718 .k(2)
6719 .a_stride(5)
6720 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6721 }
6722
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_eq_2_subtile)6723 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile) {
6724 TEST_REQUIRES_ARM_NEON;
6725 for (uint32_t n = 1; n <= 8; n++) {
6726 for (uint32_t m = 1; m <= 4; m++) {
6727 GemmMicrokernelTester()
6728 .mr(4)
6729 .nr(8)
6730 .kr(1)
6731 .sr(1)
6732 .m(m)
6733 .n(n)
6734 .k(2)
6735 .iterations(1)
6736 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6737 }
6738 }
6739 }
6740
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_eq_2_subtile_m)6741 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
6742 TEST_REQUIRES_ARM_NEON;
6743 for (uint32_t m = 1; m <= 4; m++) {
6744 GemmMicrokernelTester()
6745 .mr(4)
6746 .nr(8)
6747 .kr(1)
6748 .sr(1)
6749 .m(m)
6750 .n(8)
6751 .k(2)
6752 .iterations(1)
6753 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6754 }
6755 }
6756
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_eq_2_subtile_n)6757 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
6758 TEST_REQUIRES_ARM_NEON;
6759 for (uint32_t n = 1; n <= 8; n++) {
6760 GemmMicrokernelTester()
6761 .mr(4)
6762 .nr(8)
6763 .kr(1)
6764 .sr(1)
6765 .m(4)
6766 .n(n)
6767 .k(2)
6768 .iterations(1)
6769 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6770 }
6771 }
6772
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_lt_2)6773 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_lt_2) {
6774 TEST_REQUIRES_ARM_NEON;
6775 for (size_t k = 1; k < 2; k++) {
6776 GemmMicrokernelTester()
6777 .mr(4)
6778 .nr(8)
6779 .kr(1)
6780 .sr(1)
6781 .m(4)
6782 .n(8)
6783 .k(k)
6784 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6785 }
6786 }
6787
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_lt_2_strided_a)6788 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_strided_a) {
6789 TEST_REQUIRES_ARM_NEON;
6790 for (size_t k = 1; k < 2; k++) {
6791 GemmMicrokernelTester()
6792 .mr(4)
6793 .nr(8)
6794 .kr(1)
6795 .sr(1)
6796 .m(4)
6797 .n(8)
6798 .k(k)
6799 .a_stride(5)
6800 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6801 }
6802 }
6803
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_lt_2_subtile)6804 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_subtile) {
6805 TEST_REQUIRES_ARM_NEON;
6806 for (size_t k = 1; k < 2; k++) {
6807 for (uint32_t n = 1; n <= 8; n++) {
6808 for (uint32_t m = 1; m <= 4; m++) {
6809 GemmMicrokernelTester()
6810 .mr(4)
6811 .nr(8)
6812 .kr(1)
6813 .sr(1)
6814 .m(m)
6815 .n(n)
6816 .k(k)
6817 .iterations(1)
6818 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6819 }
6820 }
6821 }
6822 }
6823
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_gt_2)6824 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_gt_2) {
6825 TEST_REQUIRES_ARM_NEON;
6826 for (size_t k = 3; k < 4; k++) {
6827 GemmMicrokernelTester()
6828 .mr(4)
6829 .nr(8)
6830 .kr(1)
6831 .sr(1)
6832 .m(4)
6833 .n(8)
6834 .k(k)
6835 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6836 }
6837 }
6838
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_gt_2_strided_a)6839 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_strided_a) {
6840 TEST_REQUIRES_ARM_NEON;
6841 for (size_t k = 3; k < 4; k++) {
6842 GemmMicrokernelTester()
6843 .mr(4)
6844 .nr(8)
6845 .kr(1)
6846 .sr(1)
6847 .m(4)
6848 .n(8)
6849 .k(k)
6850 .a_stride(7)
6851 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6852 }
6853 }
6854
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_gt_2_subtile)6855 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_subtile) {
6856 TEST_REQUIRES_ARM_NEON;
6857 for (size_t k = 3; k < 4; k++) {
6858 for (uint32_t n = 1; n <= 8; n++) {
6859 for (uint32_t m = 1; m <= 4; m++) {
6860 GemmMicrokernelTester()
6861 .mr(4)
6862 .nr(8)
6863 .kr(1)
6864 .sr(1)
6865 .m(m)
6866 .n(n)
6867 .k(k)
6868 .iterations(1)
6869 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6870 }
6871 }
6872 }
6873 }
6874
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_div_2)6875 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_div_2) {
6876 TEST_REQUIRES_ARM_NEON;
6877 for (size_t k = 4; k <= 20; k += 2) {
6878 GemmMicrokernelTester()
6879 .mr(4)
6880 .nr(8)
6881 .kr(1)
6882 .sr(1)
6883 .m(4)
6884 .n(8)
6885 .k(k)
6886 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6887 }
6888 }
6889
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_div_2_strided_a)6890 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_div_2_strided_a) {
6891 TEST_REQUIRES_ARM_NEON;
6892 for (size_t k = 4; k <= 20; k += 2) {
6893 GemmMicrokernelTester()
6894 .mr(4)
6895 .nr(8)
6896 .kr(1)
6897 .sr(1)
6898 .m(4)
6899 .n(8)
6900 .k(k)
6901 .a_stride(23)
6902 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6903 }
6904 }
6905
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,k_div_2_subtile)6906 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_div_2_subtile) {
6907 TEST_REQUIRES_ARM_NEON;
6908 for (size_t k = 4; k <= 20; k += 2) {
6909 for (uint32_t n = 1; n <= 8; n++) {
6910 for (uint32_t m = 1; m <= 4; m++) {
6911 GemmMicrokernelTester()
6912 .mr(4)
6913 .nr(8)
6914 .kr(1)
6915 .sr(1)
6916 .m(m)
6917 .n(n)
6918 .k(k)
6919 .iterations(1)
6920 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6921 }
6922 }
6923 }
6924 }
6925
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_gt_8)6926 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8) {
6927 TEST_REQUIRES_ARM_NEON;
6928 for (uint32_t n = 9; n < 16; n++) {
6929 for (size_t k = 1; k <= 10; k += 3) {
6930 GemmMicrokernelTester()
6931 .mr(4)
6932 .nr(8)
6933 .kr(1)
6934 .sr(1)
6935 .m(4)
6936 .n(n)
6937 .k(k)
6938 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6939 }
6940 }
6941 }
6942
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_gt_8_strided_cn)6943 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
6944 TEST_REQUIRES_ARM_NEON;
6945 for (uint32_t n = 9; n < 16; n++) {
6946 for (size_t k = 1; k <= 10; k += 3) {
6947 GemmMicrokernelTester()
6948 .mr(4)
6949 .nr(8)
6950 .kr(1)
6951 .sr(1)
6952 .m(4)
6953 .n(n)
6954 .k(k)
6955 .cn_stride(11)
6956 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6957 }
6958 }
6959 }
6960
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_gt_8_strided_a)6961 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_a) {
6962 TEST_REQUIRES_ARM_NEON;
6963 for (uint32_t n = 9; n < 16; n++) {
6964 for (size_t k = 1; k <= 10; k += 3) {
6965 GemmMicrokernelTester()
6966 .mr(4)
6967 .nr(8)
6968 .kr(1)
6969 .sr(1)
6970 .m(4)
6971 .n(n)
6972 .k(k)
6973 .a_stride(13)
6974 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6975 }
6976 }
6977 }
6978
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_gt_8_subtile)6979 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_subtile) {
6980 TEST_REQUIRES_ARM_NEON;
6981 for (uint32_t n = 9; n < 16; n++) {
6982 for (size_t k = 1; k <= 10; k += 3) {
6983 for (uint32_t m = 1; m <= 4; m++) {
6984 GemmMicrokernelTester()
6985 .mr(4)
6986 .nr(8)
6987 .kr(1)
6988 .sr(1)
6989 .m(m)
6990 .n(n)
6991 .k(k)
6992 .iterations(1)
6993 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6994 }
6995 }
6996 }
6997 }
6998
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_div_8)6999 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8) {
7000 TEST_REQUIRES_ARM_NEON;
7001 for (uint32_t n = 16; n <= 24; n += 8) {
7002 for (size_t k = 1; k <= 10; k += 3) {
7003 GemmMicrokernelTester()
7004 .mr(4)
7005 .nr(8)
7006 .kr(1)
7007 .sr(1)
7008 .m(4)
7009 .n(n)
7010 .k(k)
7011 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7012 }
7013 }
7014 }
7015
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_div_8_strided_cn)7016 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_cn) {
7017 TEST_REQUIRES_ARM_NEON;
7018 for (uint32_t n = 16; n <= 24; n += 8) {
7019 for (size_t k = 1; k <= 10; k += 3) {
7020 GemmMicrokernelTester()
7021 .mr(4)
7022 .nr(8)
7023 .kr(1)
7024 .sr(1)
7025 .m(4)
7026 .n(n)
7027 .k(k)
7028 .cn_stride(11)
7029 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7030 }
7031 }
7032 }
7033
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_div_8_strided_a)7034 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_a) {
7035 TEST_REQUIRES_ARM_NEON;
7036 for (uint32_t n = 16; n <= 24; n += 8) {
7037 for (size_t k = 1; k <= 10; k += 3) {
7038 GemmMicrokernelTester()
7039 .mr(4)
7040 .nr(8)
7041 .kr(1)
7042 .sr(1)
7043 .m(4)
7044 .n(n)
7045 .k(k)
7046 .a_stride(13)
7047 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7048 }
7049 }
7050 }
7051
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,n_div_8_subtile)7052 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8_subtile) {
7053 TEST_REQUIRES_ARM_NEON;
7054 for (uint32_t n = 16; n <= 24; n += 8) {
7055 for (size_t k = 1; k <= 10; k += 3) {
7056 for (uint32_t m = 1; m <= 4; m++) {
7057 GemmMicrokernelTester()
7058 .mr(4)
7059 .nr(8)
7060 .kr(1)
7061 .sr(1)
7062 .m(m)
7063 .n(n)
7064 .k(k)
7065 .iterations(1)
7066 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7067 }
7068 }
7069 }
7070 }
7071
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,strided_cm_subtile)7072 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, strided_cm_subtile) {
7073 TEST_REQUIRES_ARM_NEON;
7074 for (size_t k = 1; k <= 10; k += 3) {
7075 for (uint32_t n = 1; n <= 8; n++) {
7076 for (uint32_t m = 1; m <= 4; m++) {
7077 GemmMicrokernelTester()
7078 .mr(4)
7079 .nr(8)
7080 .kr(1)
7081 .sr(1)
7082 .m(m)
7083 .n(n)
7084 .k(k)
7085 .cm_stride(11)
7086 .iterations(1)
7087 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7088 }
7089 }
7090 }
7091 }
7092
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,qmin)7093 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, qmin) {
7094 TEST_REQUIRES_ARM_NEON;
7095 GemmMicrokernelTester()
7096 .mr(4)
7097 .nr(8)
7098 .kr(1)
7099 .sr(1)
7100 .m(4)
7101 .n(8)
7102 .k(2)
7103 .qmin(128)
7104 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7105 }
7106
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,qmax)7107 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, qmax) {
7108 TEST_REQUIRES_ARM_NEON;
7109 GemmMicrokernelTester()
7110 .mr(4)
7111 .nr(8)
7112 .kr(1)
7113 .sr(1)
7114 .m(4)
7115 .n(8)
7116 .k(2)
7117 .qmax(128)
7118 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7119 }
7120
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64,strided_cm)7121 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, strided_cm) {
7122 TEST_REQUIRES_ARM_NEON;
7123 GemmMicrokernelTester()
7124 .mr(4)
7125 .nr(8)
7126 .kr(1)
7127 .sr(1)
7128 .m(4)
7129 .n(8)
7130 .k(2)
7131 .cm_stride(11)
7132 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7133 }
7134 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7135
7136
7137 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_eq_4)7138 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4) {
7139 TEST_REQUIRES_ARM_NEON;
7140 GemmMicrokernelTester()
7141 .mr(4)
7142 .nr(8)
7143 .kr(1)
7144 .sr(1)
7145 .m(4)
7146 .n(8)
7147 .k(4)
7148 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7149 }
7150
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,strided_cn)7151 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, strided_cn) {
7152 TEST_REQUIRES_ARM_NEON;
7153 GemmMicrokernelTester()
7154 .mr(4)
7155 .nr(8)
7156 .kr(1)
7157 .sr(1)
7158 .m(4)
7159 .n(8)
7160 .k(4)
7161 .cn_stride(11)
7162 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7163 }
7164
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_eq_4_strided_a)7165 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_strided_a) {
7166 TEST_REQUIRES_ARM_NEON;
7167 GemmMicrokernelTester()
7168 .mr(4)
7169 .nr(8)
7170 .kr(1)
7171 .sr(1)
7172 .m(4)
7173 .n(8)
7174 .k(4)
7175 .a_stride(7)
7176 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7177 }
7178
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_eq_4_subtile)7179 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile) {
7180 TEST_REQUIRES_ARM_NEON;
7181 for (uint32_t n = 1; n <= 8; n++) {
7182 for (uint32_t m = 1; m <= 4; m++) {
7183 GemmMicrokernelTester()
7184 .mr(4)
7185 .nr(8)
7186 .kr(1)
7187 .sr(1)
7188 .m(m)
7189 .n(n)
7190 .k(4)
7191 .iterations(1)
7192 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7193 }
7194 }
7195 }
7196
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_eq_4_subtile_m)7197 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
7198 TEST_REQUIRES_ARM_NEON;
7199 for (uint32_t m = 1; m <= 4; m++) {
7200 GemmMicrokernelTester()
7201 .mr(4)
7202 .nr(8)
7203 .kr(1)
7204 .sr(1)
7205 .m(m)
7206 .n(8)
7207 .k(4)
7208 .iterations(1)
7209 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7210 }
7211 }
7212
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_eq_4_subtile_n)7213 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
7214 TEST_REQUIRES_ARM_NEON;
7215 for (uint32_t n = 1; n <= 8; n++) {
7216 GemmMicrokernelTester()
7217 .mr(4)
7218 .nr(8)
7219 .kr(1)
7220 .sr(1)
7221 .m(4)
7222 .n(n)
7223 .k(4)
7224 .iterations(1)
7225 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7226 }
7227 }
7228
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_lt_4)7229 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_lt_4) {
7230 TEST_REQUIRES_ARM_NEON;
7231 for (size_t k = 1; k < 4; k++) {
7232 GemmMicrokernelTester()
7233 .mr(4)
7234 .nr(8)
7235 .kr(1)
7236 .sr(1)
7237 .m(4)
7238 .n(8)
7239 .k(k)
7240 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7241 }
7242 }
7243
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_lt_4_strided_a)7244 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_strided_a) {
7245 TEST_REQUIRES_ARM_NEON;
7246 for (size_t k = 1; k < 4; k++) {
7247 GemmMicrokernelTester()
7248 .mr(4)
7249 .nr(8)
7250 .kr(1)
7251 .sr(1)
7252 .m(4)
7253 .n(8)
7254 .k(k)
7255 .a_stride(7)
7256 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7257 }
7258 }
7259
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_lt_4_subtile)7260 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_subtile) {
7261 TEST_REQUIRES_ARM_NEON;
7262 for (size_t k = 1; k < 4; k++) {
7263 for (uint32_t n = 1; n <= 8; n++) {
7264 for (uint32_t m = 1; m <= 4; m++) {
7265 GemmMicrokernelTester()
7266 .mr(4)
7267 .nr(8)
7268 .kr(1)
7269 .sr(1)
7270 .m(m)
7271 .n(n)
7272 .k(k)
7273 .iterations(1)
7274 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7275 }
7276 }
7277 }
7278 }
7279
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_gt_4)7280 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_gt_4) {
7281 TEST_REQUIRES_ARM_NEON;
7282 for (size_t k = 5; k < 8; k++) {
7283 GemmMicrokernelTester()
7284 .mr(4)
7285 .nr(8)
7286 .kr(1)
7287 .sr(1)
7288 .m(4)
7289 .n(8)
7290 .k(k)
7291 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7292 }
7293 }
7294
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_gt_4_strided_a)7295 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_strided_a) {
7296 TEST_REQUIRES_ARM_NEON;
7297 for (size_t k = 5; k < 8; k++) {
7298 GemmMicrokernelTester()
7299 .mr(4)
7300 .nr(8)
7301 .kr(1)
7302 .sr(1)
7303 .m(4)
7304 .n(8)
7305 .k(k)
7306 .a_stride(11)
7307 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7308 }
7309 }
7310
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_gt_4_subtile)7311 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_subtile) {
7312 TEST_REQUIRES_ARM_NEON;
7313 for (size_t k = 5; k < 8; k++) {
7314 for (uint32_t n = 1; n <= 8; n++) {
7315 for (uint32_t m = 1; m <= 4; m++) {
7316 GemmMicrokernelTester()
7317 .mr(4)
7318 .nr(8)
7319 .kr(1)
7320 .sr(1)
7321 .m(m)
7322 .n(n)
7323 .k(k)
7324 .iterations(1)
7325 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7326 }
7327 }
7328 }
7329 }
7330
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_div_4)7331 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_div_4) {
7332 TEST_REQUIRES_ARM_NEON;
7333 for (size_t k = 8; k <= 40; k += 4) {
7334 GemmMicrokernelTester()
7335 .mr(4)
7336 .nr(8)
7337 .kr(1)
7338 .sr(1)
7339 .m(4)
7340 .n(8)
7341 .k(k)
7342 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7343 }
7344 }
7345
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_div_4_strided_a)7346 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_div_4_strided_a) {
7347 TEST_REQUIRES_ARM_NEON;
7348 for (size_t k = 8; k <= 40; k += 4) {
7349 GemmMicrokernelTester()
7350 .mr(4)
7351 .nr(8)
7352 .kr(1)
7353 .sr(1)
7354 .m(4)
7355 .n(8)
7356 .k(k)
7357 .a_stride(43)
7358 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7359 }
7360 }
7361
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,k_div_4_subtile)7362 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_div_4_subtile) {
7363 TEST_REQUIRES_ARM_NEON;
7364 for (size_t k = 8; k <= 40; k += 4) {
7365 for (uint32_t n = 1; n <= 8; n++) {
7366 for (uint32_t m = 1; m <= 4; m++) {
7367 GemmMicrokernelTester()
7368 .mr(4)
7369 .nr(8)
7370 .kr(1)
7371 .sr(1)
7372 .m(m)
7373 .n(n)
7374 .k(k)
7375 .iterations(1)
7376 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7377 }
7378 }
7379 }
7380 }
7381
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_gt_8)7382 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8) {
7383 TEST_REQUIRES_ARM_NEON;
7384 for (uint32_t n = 9; n < 16; n++) {
7385 for (size_t k = 1; k <= 20; k += 5) {
7386 GemmMicrokernelTester()
7387 .mr(4)
7388 .nr(8)
7389 .kr(1)
7390 .sr(1)
7391 .m(4)
7392 .n(n)
7393 .k(k)
7394 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7395 }
7396 }
7397 }
7398
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_gt_8_strided_cn)7399 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
7400 TEST_REQUIRES_ARM_NEON;
7401 for (uint32_t n = 9; n < 16; n++) {
7402 for (size_t k = 1; k <= 20; k += 5) {
7403 GemmMicrokernelTester()
7404 .mr(4)
7405 .nr(8)
7406 .kr(1)
7407 .sr(1)
7408 .m(4)
7409 .n(n)
7410 .k(k)
7411 .cn_stride(11)
7412 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7413 }
7414 }
7415 }
7416
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_gt_8_strided_a)7417 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_a) {
7418 TEST_REQUIRES_ARM_NEON;
7419 for (uint32_t n = 9; n < 16; n++) {
7420 for (size_t k = 1; k <= 20; k += 5) {
7421 GemmMicrokernelTester()
7422 .mr(4)
7423 .nr(8)
7424 .kr(1)
7425 .sr(1)
7426 .m(4)
7427 .n(n)
7428 .k(k)
7429 .a_stride(23)
7430 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7431 }
7432 }
7433 }
7434
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_gt_8_subtile)7435 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_subtile) {
7436 TEST_REQUIRES_ARM_NEON;
7437 for (uint32_t n = 9; n < 16; n++) {
7438 for (size_t k = 1; k <= 20; k += 5) {
7439 for (uint32_t m = 1; m <= 4; m++) {
7440 GemmMicrokernelTester()
7441 .mr(4)
7442 .nr(8)
7443 .kr(1)
7444 .sr(1)
7445 .m(m)
7446 .n(n)
7447 .k(k)
7448 .iterations(1)
7449 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7450 }
7451 }
7452 }
7453 }
7454
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_div_8)7455 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8) {
7456 TEST_REQUIRES_ARM_NEON;
7457 for (uint32_t n = 16; n <= 24; n += 8) {
7458 for (size_t k = 1; k <= 20; k += 5) {
7459 GemmMicrokernelTester()
7460 .mr(4)
7461 .nr(8)
7462 .kr(1)
7463 .sr(1)
7464 .m(4)
7465 .n(n)
7466 .k(k)
7467 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7468 }
7469 }
7470 }
7471
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_div_8_strided_cn)7472 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_cn) {
7473 TEST_REQUIRES_ARM_NEON;
7474 for (uint32_t n = 16; n <= 24; n += 8) {
7475 for (size_t k = 1; k <= 20; k += 5) {
7476 GemmMicrokernelTester()
7477 .mr(4)
7478 .nr(8)
7479 .kr(1)
7480 .sr(1)
7481 .m(4)
7482 .n(n)
7483 .k(k)
7484 .cn_stride(11)
7485 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7486 }
7487 }
7488 }
7489
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_div_8_strided_a)7490 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_a) {
7491 TEST_REQUIRES_ARM_NEON;
7492 for (uint32_t n = 16; n <= 24; n += 8) {
7493 for (size_t k = 1; k <= 20; k += 5) {
7494 GemmMicrokernelTester()
7495 .mr(4)
7496 .nr(8)
7497 .kr(1)
7498 .sr(1)
7499 .m(4)
7500 .n(n)
7501 .k(k)
7502 .a_stride(23)
7503 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7504 }
7505 }
7506 }
7507
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,n_div_8_subtile)7508 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8_subtile) {
7509 TEST_REQUIRES_ARM_NEON;
7510 for (uint32_t n = 16; n <= 24; n += 8) {
7511 for (size_t k = 1; k <= 20; k += 5) {
7512 for (uint32_t m = 1; m <= 4; m++) {
7513 GemmMicrokernelTester()
7514 .mr(4)
7515 .nr(8)
7516 .kr(1)
7517 .sr(1)
7518 .m(m)
7519 .n(n)
7520 .k(k)
7521 .iterations(1)
7522 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7523 }
7524 }
7525 }
7526 }
7527
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,strided_cm_subtile)7528 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, strided_cm_subtile) {
7529 TEST_REQUIRES_ARM_NEON;
7530 for (size_t k = 1; k <= 20; k += 5) {
7531 for (uint32_t n = 1; n <= 8; n++) {
7532 for (uint32_t m = 1; m <= 4; m++) {
7533 GemmMicrokernelTester()
7534 .mr(4)
7535 .nr(8)
7536 .kr(1)
7537 .sr(1)
7538 .m(m)
7539 .n(n)
7540 .k(k)
7541 .cm_stride(11)
7542 .iterations(1)
7543 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7544 }
7545 }
7546 }
7547 }
7548
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,qmin)7549 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, qmin) {
7550 TEST_REQUIRES_ARM_NEON;
7551 GemmMicrokernelTester()
7552 .mr(4)
7553 .nr(8)
7554 .kr(1)
7555 .sr(1)
7556 .m(4)
7557 .n(8)
7558 .k(4)
7559 .qmin(128)
7560 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7561 }
7562
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,qmax)7563 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, qmax) {
7564 TEST_REQUIRES_ARM_NEON;
7565 GemmMicrokernelTester()
7566 .mr(4)
7567 .nr(8)
7568 .kr(1)
7569 .sr(1)
7570 .m(4)
7571 .n(8)
7572 .k(4)
7573 .qmax(128)
7574 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7575 }
7576
TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128,strided_cm)7577 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, strided_cm) {
7578 TEST_REQUIRES_ARM_NEON;
7579 GemmMicrokernelTester()
7580 .mr(4)
7581 .nr(8)
7582 .kr(1)
7583 .sr(1)
7584 .m(4)
7585 .n(8)
7586 .k(4)
7587 .cm_stride(11)
7588 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7589 }
7590 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7591
7592
7593 #if XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4)7594 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4) {
7595 TEST_REQUIRES_ARM_NEON_FMA;
7596 GemmMicrokernelTester()
7597 .mr(4)
7598 .nr(8)
7599 .kr(1)
7600 .sr(1)
7601 .m(4)
7602 .n(8)
7603 .k(4)
7604 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7605 }
7606
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,strided_cn)7607 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cn) {
7608 TEST_REQUIRES_ARM_NEON_FMA;
7609 GemmMicrokernelTester()
7610 .mr(4)
7611 .nr(8)
7612 .kr(1)
7613 .sr(1)
7614 .m(4)
7615 .n(8)
7616 .k(4)
7617 .cn_stride(11)
7618 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7619 }
7620
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4_strided_a)7621 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_strided_a) {
7622 TEST_REQUIRES_ARM_NEON_FMA;
7623 GemmMicrokernelTester()
7624 .mr(4)
7625 .nr(8)
7626 .kr(1)
7627 .sr(1)
7628 .m(4)
7629 .n(8)
7630 .k(4)
7631 .a_stride(7)
7632 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7633 }
7634
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4_subtile)7635 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
7636 TEST_REQUIRES_ARM_NEON_FMA;
7637 for (uint32_t n = 1; n <= 8; n++) {
7638 for (uint32_t m = 1; m <= 4; m++) {
7639 GemmMicrokernelTester()
7640 .mr(4)
7641 .nr(8)
7642 .kr(1)
7643 .sr(1)
7644 .m(m)
7645 .n(n)
7646 .k(4)
7647 .iterations(1)
7648 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7649 }
7650 }
7651 }
7652
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4_subtile_m)7653 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
7654 TEST_REQUIRES_ARM_NEON_FMA;
7655 for (uint32_t m = 1; m <= 4; m++) {
7656 GemmMicrokernelTester()
7657 .mr(4)
7658 .nr(8)
7659 .kr(1)
7660 .sr(1)
7661 .m(m)
7662 .n(8)
7663 .k(4)
7664 .iterations(1)
7665 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7666 }
7667 }
7668
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_eq_4_subtile_n)7669 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
7670 TEST_REQUIRES_ARM_NEON_FMA;
7671 for (uint32_t n = 1; n <= 8; n++) {
7672 GemmMicrokernelTester()
7673 .mr(4)
7674 .nr(8)
7675 .kr(1)
7676 .sr(1)
7677 .m(4)
7678 .n(n)
7679 .k(4)
7680 .iterations(1)
7681 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7682 }
7683 }
7684
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_lt_4)7685 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4) {
7686 TEST_REQUIRES_ARM_NEON_FMA;
7687 for (size_t k = 1; k < 4; k++) {
7688 GemmMicrokernelTester()
7689 .mr(4)
7690 .nr(8)
7691 .kr(1)
7692 .sr(1)
7693 .m(4)
7694 .n(8)
7695 .k(k)
7696 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7697 }
7698 }
7699
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_lt_4_strided_a)7700 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_strided_a) {
7701 TEST_REQUIRES_ARM_NEON_FMA;
7702 for (size_t k = 1; k < 4; k++) {
7703 GemmMicrokernelTester()
7704 .mr(4)
7705 .nr(8)
7706 .kr(1)
7707 .sr(1)
7708 .m(4)
7709 .n(8)
7710 .k(k)
7711 .a_stride(7)
7712 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7713 }
7714 }
7715
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_lt_4_subtile)7716 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
7717 TEST_REQUIRES_ARM_NEON_FMA;
7718 for (size_t k = 1; k < 4; k++) {
7719 for (uint32_t n = 1; n <= 8; n++) {
7720 for (uint32_t m = 1; m <= 4; m++) {
7721 GemmMicrokernelTester()
7722 .mr(4)
7723 .nr(8)
7724 .kr(1)
7725 .sr(1)
7726 .m(m)
7727 .n(n)
7728 .k(k)
7729 .iterations(1)
7730 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7731 }
7732 }
7733 }
7734 }
7735
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_gt_4)7736 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4) {
7737 TEST_REQUIRES_ARM_NEON_FMA;
7738 for (size_t k = 5; k < 8; k++) {
7739 GemmMicrokernelTester()
7740 .mr(4)
7741 .nr(8)
7742 .kr(1)
7743 .sr(1)
7744 .m(4)
7745 .n(8)
7746 .k(k)
7747 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7748 }
7749 }
7750
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_gt_4_strided_a)7751 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_strided_a) {
7752 TEST_REQUIRES_ARM_NEON_FMA;
7753 for (size_t k = 5; k < 8; k++) {
7754 GemmMicrokernelTester()
7755 .mr(4)
7756 .nr(8)
7757 .kr(1)
7758 .sr(1)
7759 .m(4)
7760 .n(8)
7761 .k(k)
7762 .a_stride(11)
7763 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7764 }
7765 }
7766
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_gt_4_subtile)7767 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
7768 TEST_REQUIRES_ARM_NEON_FMA;
7769 for (size_t k = 5; k < 8; k++) {
7770 for (uint32_t n = 1; n <= 8; n++) {
7771 for (uint32_t m = 1; m <= 4; m++) {
7772 GemmMicrokernelTester()
7773 .mr(4)
7774 .nr(8)
7775 .kr(1)
7776 .sr(1)
7777 .m(m)
7778 .n(n)
7779 .k(k)
7780 .iterations(1)
7781 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7782 }
7783 }
7784 }
7785 }
7786
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_div_4)7787 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4) {
7788 TEST_REQUIRES_ARM_NEON_FMA;
7789 for (size_t k = 8; k <= 40; k += 4) {
7790 GemmMicrokernelTester()
7791 .mr(4)
7792 .nr(8)
7793 .kr(1)
7794 .sr(1)
7795 .m(4)
7796 .n(8)
7797 .k(k)
7798 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7799 }
7800 }
7801
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_div_4_strided_a)7802 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_strided_a) {
7803 TEST_REQUIRES_ARM_NEON_FMA;
7804 for (size_t k = 8; k <= 40; k += 4) {
7805 GemmMicrokernelTester()
7806 .mr(4)
7807 .nr(8)
7808 .kr(1)
7809 .sr(1)
7810 .m(4)
7811 .n(8)
7812 .k(k)
7813 .a_stride(43)
7814 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7815 }
7816 }
7817
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,k_div_4_subtile)7818 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
7819 TEST_REQUIRES_ARM_NEON_FMA;
7820 for (size_t k = 8; k <= 40; k += 4) {
7821 for (uint32_t n = 1; n <= 8; n++) {
7822 for (uint32_t m = 1; m <= 4; m++) {
7823 GemmMicrokernelTester()
7824 .mr(4)
7825 .nr(8)
7826 .kr(1)
7827 .sr(1)
7828 .m(m)
7829 .n(n)
7830 .k(k)
7831 .iterations(1)
7832 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7833 }
7834 }
7835 }
7836 }
7837
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8)7838 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8) {
7839 TEST_REQUIRES_ARM_NEON_FMA;
7840 for (uint32_t n = 9; n < 16; n++) {
7841 for (size_t k = 1; k <= 20; k += 5) {
7842 GemmMicrokernelTester()
7843 .mr(4)
7844 .nr(8)
7845 .kr(1)
7846 .sr(1)
7847 .m(4)
7848 .n(n)
7849 .k(k)
7850 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7851 }
7852 }
7853 }
7854
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8_strided_cn)7855 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
7856 TEST_REQUIRES_ARM_NEON_FMA;
7857 for (uint32_t n = 9; n < 16; n++) {
7858 for (size_t k = 1; k <= 20; k += 5) {
7859 GemmMicrokernelTester()
7860 .mr(4)
7861 .nr(8)
7862 .kr(1)
7863 .sr(1)
7864 .m(4)
7865 .n(n)
7866 .k(k)
7867 .cn_stride(11)
7868 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7869 }
7870 }
7871 }
7872
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8_strided_a)7873 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_a) {
7874 TEST_REQUIRES_ARM_NEON_FMA;
7875 for (uint32_t n = 9; n < 16; n++) {
7876 for (size_t k = 1; k <= 20; k += 5) {
7877 GemmMicrokernelTester()
7878 .mr(4)
7879 .nr(8)
7880 .kr(1)
7881 .sr(1)
7882 .m(4)
7883 .n(n)
7884 .k(k)
7885 .a_stride(23)
7886 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7887 }
7888 }
7889 }
7890
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_gt_8_subtile)7891 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
7892 TEST_REQUIRES_ARM_NEON_FMA;
7893 for (uint32_t n = 9; n < 16; n++) {
7894 for (size_t k = 1; k <= 20; k += 5) {
7895 for (uint32_t m = 1; m <= 4; m++) {
7896 GemmMicrokernelTester()
7897 .mr(4)
7898 .nr(8)
7899 .kr(1)
7900 .sr(1)
7901 .m(m)
7902 .n(n)
7903 .k(k)
7904 .iterations(1)
7905 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7906 }
7907 }
7908 }
7909 }
7910
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8)7911 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8) {
7912 TEST_REQUIRES_ARM_NEON_FMA;
7913 for (uint32_t n = 16; n <= 24; n += 8) {
7914 for (size_t k = 1; k <= 20; k += 5) {
7915 GemmMicrokernelTester()
7916 .mr(4)
7917 .nr(8)
7918 .kr(1)
7919 .sr(1)
7920 .m(4)
7921 .n(n)
7922 .k(k)
7923 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7924 }
7925 }
7926 }
7927
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8_strided_cn)7928 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
7929 TEST_REQUIRES_ARM_NEON_FMA;
7930 for (uint32_t n = 16; n <= 24; n += 8) {
7931 for (size_t k = 1; k <= 20; k += 5) {
7932 GemmMicrokernelTester()
7933 .mr(4)
7934 .nr(8)
7935 .kr(1)
7936 .sr(1)
7937 .m(4)
7938 .n(n)
7939 .k(k)
7940 .cn_stride(11)
7941 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7942 }
7943 }
7944 }
7945
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8_strided_a)7946 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_a) {
7947 TEST_REQUIRES_ARM_NEON_FMA;
7948 for (uint32_t n = 16; n <= 24; n += 8) {
7949 for (size_t k = 1; k <= 20; k += 5) {
7950 GemmMicrokernelTester()
7951 .mr(4)
7952 .nr(8)
7953 .kr(1)
7954 .sr(1)
7955 .m(4)
7956 .n(n)
7957 .k(k)
7958 .a_stride(23)
7959 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7960 }
7961 }
7962 }
7963
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,n_div_8_subtile)7964 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
7965 TEST_REQUIRES_ARM_NEON_FMA;
7966 for (uint32_t n = 16; n <= 24; n += 8) {
7967 for (size_t k = 1; k <= 20; k += 5) {
7968 for (uint32_t m = 1; m <= 4; m++) {
7969 GemmMicrokernelTester()
7970 .mr(4)
7971 .nr(8)
7972 .kr(1)
7973 .sr(1)
7974 .m(m)
7975 .n(n)
7976 .k(k)
7977 .iterations(1)
7978 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7979 }
7980 }
7981 }
7982 }
7983
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,strided_cm_subtile)7984 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
7985 TEST_REQUIRES_ARM_NEON_FMA;
7986 for (size_t k = 1; k <= 20; k += 5) {
7987 for (uint32_t n = 1; n <= 8; n++) {
7988 for (uint32_t m = 1; m <= 4; m++) {
7989 GemmMicrokernelTester()
7990 .mr(4)
7991 .nr(8)
7992 .kr(1)
7993 .sr(1)
7994 .m(m)
7995 .n(n)
7996 .k(k)
7997 .cm_stride(11)
7998 .iterations(1)
7999 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
8000 }
8001 }
8002 }
8003 }
8004
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,qmin)8005 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, qmin) {
8006 TEST_REQUIRES_ARM_NEON_FMA;
8007 GemmMicrokernelTester()
8008 .mr(4)
8009 .nr(8)
8010 .kr(1)
8011 .sr(1)
8012 .m(4)
8013 .n(8)
8014 .k(4)
8015 .qmin(128)
8016 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
8017 }
8018
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,qmax)8019 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, qmax) {
8020 TEST_REQUIRES_ARM_NEON_FMA;
8021 GemmMicrokernelTester()
8022 .mr(4)
8023 .nr(8)
8024 .kr(1)
8025 .sr(1)
8026 .m(4)
8027 .n(8)
8028 .k(4)
8029 .qmax(128)
8030 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
8031 }
8032
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128,strided_cm)8033 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm) {
8034 TEST_REQUIRES_ARM_NEON_FMA;
8035 GemmMicrokernelTester()
8036 .mr(4)
8037 .nr(8)
8038 .kr(1)
8039 .sr(1)
8040 .m(4)
8041 .n(8)
8042 .k(4)
8043 .cm_stride(11)
8044 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
8045 }
8046 #endif // XNN_ARCH_ARM64
8047
8048
8049 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_eq_2)8050 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2) {
8051 TEST_REQUIRES_ARM_NEON;
8052 GemmMicrokernelTester()
8053 .mr(5)
8054 .nr(8)
8055 .kr(1)
8056 .sr(1)
8057 .m(5)
8058 .n(8)
8059 .k(2)
8060 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8061 }
8062
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,strided_cn)8063 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, strided_cn) {
8064 TEST_REQUIRES_ARM_NEON;
8065 GemmMicrokernelTester()
8066 .mr(5)
8067 .nr(8)
8068 .kr(1)
8069 .sr(1)
8070 .m(5)
8071 .n(8)
8072 .k(2)
8073 .cn_stride(11)
8074 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8075 }
8076
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_eq_2_strided_a)8077 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_strided_a) {
8078 TEST_REQUIRES_ARM_NEON;
8079 GemmMicrokernelTester()
8080 .mr(5)
8081 .nr(8)
8082 .kr(1)
8083 .sr(1)
8084 .m(5)
8085 .n(8)
8086 .k(2)
8087 .a_stride(5)
8088 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8089 }
8090
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_eq_2_subtile)8091 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile) {
8092 TEST_REQUIRES_ARM_NEON;
8093 for (uint32_t n = 1; n <= 8; n++) {
8094 for (uint32_t m = 1; m <= 5; m++) {
8095 GemmMicrokernelTester()
8096 .mr(5)
8097 .nr(8)
8098 .kr(1)
8099 .sr(1)
8100 .m(m)
8101 .n(n)
8102 .k(2)
8103 .iterations(1)
8104 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8105 }
8106 }
8107 }
8108
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_eq_2_subtile_m)8109 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
8110 TEST_REQUIRES_ARM_NEON;
8111 for (uint32_t m = 1; m <= 5; m++) {
8112 GemmMicrokernelTester()
8113 .mr(5)
8114 .nr(8)
8115 .kr(1)
8116 .sr(1)
8117 .m(m)
8118 .n(8)
8119 .k(2)
8120 .iterations(1)
8121 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8122 }
8123 }
8124
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_eq_2_subtile_n)8125 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
8126 TEST_REQUIRES_ARM_NEON;
8127 for (uint32_t n = 1; n <= 8; n++) {
8128 GemmMicrokernelTester()
8129 .mr(5)
8130 .nr(8)
8131 .kr(1)
8132 .sr(1)
8133 .m(5)
8134 .n(n)
8135 .k(2)
8136 .iterations(1)
8137 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8138 }
8139 }
8140
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_lt_2)8141 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_lt_2) {
8142 TEST_REQUIRES_ARM_NEON;
8143 for (size_t k = 1; k < 2; k++) {
8144 GemmMicrokernelTester()
8145 .mr(5)
8146 .nr(8)
8147 .kr(1)
8148 .sr(1)
8149 .m(5)
8150 .n(8)
8151 .k(k)
8152 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8153 }
8154 }
8155
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_lt_2_strided_a)8156 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_lt_2_strided_a) {
8157 TEST_REQUIRES_ARM_NEON;
8158 for (size_t k = 1; k < 2; k++) {
8159 GemmMicrokernelTester()
8160 .mr(5)
8161 .nr(8)
8162 .kr(1)
8163 .sr(1)
8164 .m(5)
8165 .n(8)
8166 .k(k)
8167 .a_stride(5)
8168 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8169 }
8170 }
8171
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_lt_2_subtile)8172 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_lt_2_subtile) {
8173 TEST_REQUIRES_ARM_NEON;
8174 for (size_t k = 1; k < 2; k++) {
8175 for (uint32_t n = 1; n <= 8; n++) {
8176 for (uint32_t m = 1; m <= 5; m++) {
8177 GemmMicrokernelTester()
8178 .mr(5)
8179 .nr(8)
8180 .kr(1)
8181 .sr(1)
8182 .m(m)
8183 .n(n)
8184 .k(k)
8185 .iterations(1)
8186 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8187 }
8188 }
8189 }
8190 }
8191
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_gt_2)8192 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_gt_2) {
8193 TEST_REQUIRES_ARM_NEON;
8194 for (size_t k = 3; k < 4; k++) {
8195 GemmMicrokernelTester()
8196 .mr(5)
8197 .nr(8)
8198 .kr(1)
8199 .sr(1)
8200 .m(5)
8201 .n(8)
8202 .k(k)
8203 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8204 }
8205 }
8206
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_gt_2_strided_a)8207 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_gt_2_strided_a) {
8208 TEST_REQUIRES_ARM_NEON;
8209 for (size_t k = 3; k < 4; k++) {
8210 GemmMicrokernelTester()
8211 .mr(5)
8212 .nr(8)
8213 .kr(1)
8214 .sr(1)
8215 .m(5)
8216 .n(8)
8217 .k(k)
8218 .a_stride(7)
8219 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8220 }
8221 }
8222
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_gt_2_subtile)8223 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_gt_2_subtile) {
8224 TEST_REQUIRES_ARM_NEON;
8225 for (size_t k = 3; k < 4; k++) {
8226 for (uint32_t n = 1; n <= 8; n++) {
8227 for (uint32_t m = 1; m <= 5; m++) {
8228 GemmMicrokernelTester()
8229 .mr(5)
8230 .nr(8)
8231 .kr(1)
8232 .sr(1)
8233 .m(m)
8234 .n(n)
8235 .k(k)
8236 .iterations(1)
8237 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8238 }
8239 }
8240 }
8241 }
8242
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_div_2)8243 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_div_2) {
8244 TEST_REQUIRES_ARM_NEON;
8245 for (size_t k = 4; k <= 20; k += 2) {
8246 GemmMicrokernelTester()
8247 .mr(5)
8248 .nr(8)
8249 .kr(1)
8250 .sr(1)
8251 .m(5)
8252 .n(8)
8253 .k(k)
8254 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8255 }
8256 }
8257
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_div_2_strided_a)8258 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_div_2_strided_a) {
8259 TEST_REQUIRES_ARM_NEON;
8260 for (size_t k = 4; k <= 20; k += 2) {
8261 GemmMicrokernelTester()
8262 .mr(5)
8263 .nr(8)
8264 .kr(1)
8265 .sr(1)
8266 .m(5)
8267 .n(8)
8268 .k(k)
8269 .a_stride(23)
8270 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8271 }
8272 }
8273
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,k_div_2_subtile)8274 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_div_2_subtile) {
8275 TEST_REQUIRES_ARM_NEON;
8276 for (size_t k = 4; k <= 20; k += 2) {
8277 for (uint32_t n = 1; n <= 8; n++) {
8278 for (uint32_t m = 1; m <= 5; m++) {
8279 GemmMicrokernelTester()
8280 .mr(5)
8281 .nr(8)
8282 .kr(1)
8283 .sr(1)
8284 .m(m)
8285 .n(n)
8286 .k(k)
8287 .iterations(1)
8288 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8289 }
8290 }
8291 }
8292 }
8293
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_gt_8)8294 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8) {
8295 TEST_REQUIRES_ARM_NEON;
8296 for (uint32_t n = 9; n < 16; n++) {
8297 for (size_t k = 1; k <= 10; k += 3) {
8298 GemmMicrokernelTester()
8299 .mr(5)
8300 .nr(8)
8301 .kr(1)
8302 .sr(1)
8303 .m(5)
8304 .n(n)
8305 .k(k)
8306 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8307 }
8308 }
8309 }
8310
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_gt_8_strided_cn)8311 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
8312 TEST_REQUIRES_ARM_NEON;
8313 for (uint32_t n = 9; n < 16; n++) {
8314 for (size_t k = 1; k <= 10; k += 3) {
8315 GemmMicrokernelTester()
8316 .mr(5)
8317 .nr(8)
8318 .kr(1)
8319 .sr(1)
8320 .m(5)
8321 .n(n)
8322 .k(k)
8323 .cn_stride(11)
8324 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8325 }
8326 }
8327 }
8328
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_gt_8_strided_a)8329 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_strided_a) {
8330 TEST_REQUIRES_ARM_NEON;
8331 for (uint32_t n = 9; n < 16; n++) {
8332 for (size_t k = 1; k <= 10; k += 3) {
8333 GemmMicrokernelTester()
8334 .mr(5)
8335 .nr(8)
8336 .kr(1)
8337 .sr(1)
8338 .m(5)
8339 .n(n)
8340 .k(k)
8341 .a_stride(13)
8342 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8343 }
8344 }
8345 }
8346
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_gt_8_subtile)8347 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_subtile) {
8348 TEST_REQUIRES_ARM_NEON;
8349 for (uint32_t n = 9; n < 16; n++) {
8350 for (size_t k = 1; k <= 10; k += 3) {
8351 for (uint32_t m = 1; m <= 5; m++) {
8352 GemmMicrokernelTester()
8353 .mr(5)
8354 .nr(8)
8355 .kr(1)
8356 .sr(1)
8357 .m(m)
8358 .n(n)
8359 .k(k)
8360 .iterations(1)
8361 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8362 }
8363 }
8364 }
8365 }
8366
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_div_8)8367 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8) {
8368 TEST_REQUIRES_ARM_NEON;
8369 for (uint32_t n = 16; n <= 24; n += 8) {
8370 for (size_t k = 1; k <= 10; k += 3) {
8371 GemmMicrokernelTester()
8372 .mr(5)
8373 .nr(8)
8374 .kr(1)
8375 .sr(1)
8376 .m(5)
8377 .n(n)
8378 .k(k)
8379 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8380 }
8381 }
8382 }
8383
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_div_8_strided_cn)8384 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8_strided_cn) {
8385 TEST_REQUIRES_ARM_NEON;
8386 for (uint32_t n = 16; n <= 24; n += 8) {
8387 for (size_t k = 1; k <= 10; k += 3) {
8388 GemmMicrokernelTester()
8389 .mr(5)
8390 .nr(8)
8391 .kr(1)
8392 .sr(1)
8393 .m(5)
8394 .n(n)
8395 .k(k)
8396 .cn_stride(11)
8397 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8398 }
8399 }
8400 }
8401
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_div_8_strided_a)8402 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8_strided_a) {
8403 TEST_REQUIRES_ARM_NEON;
8404 for (uint32_t n = 16; n <= 24; n += 8) {
8405 for (size_t k = 1; k <= 10; k += 3) {
8406 GemmMicrokernelTester()
8407 .mr(5)
8408 .nr(8)
8409 .kr(1)
8410 .sr(1)
8411 .m(5)
8412 .n(n)
8413 .k(k)
8414 .a_stride(13)
8415 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8416 }
8417 }
8418 }
8419
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,n_div_8_subtile)8420 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8_subtile) {
8421 TEST_REQUIRES_ARM_NEON;
8422 for (uint32_t n = 16; n <= 24; n += 8) {
8423 for (size_t k = 1; k <= 10; k += 3) {
8424 for (uint32_t m = 1; m <= 5; m++) {
8425 GemmMicrokernelTester()
8426 .mr(5)
8427 .nr(8)
8428 .kr(1)
8429 .sr(1)
8430 .m(m)
8431 .n(n)
8432 .k(k)
8433 .iterations(1)
8434 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8435 }
8436 }
8437 }
8438 }
8439
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,strided_cm_subtile)8440 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, strided_cm_subtile) {
8441 TEST_REQUIRES_ARM_NEON;
8442 for (size_t k = 1; k <= 10; k += 3) {
8443 for (uint32_t n = 1; n <= 8; n++) {
8444 for (uint32_t m = 1; m <= 5; m++) {
8445 GemmMicrokernelTester()
8446 .mr(5)
8447 .nr(8)
8448 .kr(1)
8449 .sr(1)
8450 .m(m)
8451 .n(n)
8452 .k(k)
8453 .cm_stride(11)
8454 .iterations(1)
8455 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8456 }
8457 }
8458 }
8459 }
8460
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,qmin)8461 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, qmin) {
8462 TEST_REQUIRES_ARM_NEON;
8463 GemmMicrokernelTester()
8464 .mr(5)
8465 .nr(8)
8466 .kr(1)
8467 .sr(1)
8468 .m(5)
8469 .n(8)
8470 .k(2)
8471 .qmin(128)
8472 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8473 }
8474
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,qmax)8475 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, qmax) {
8476 TEST_REQUIRES_ARM_NEON;
8477 GemmMicrokernelTester()
8478 .mr(5)
8479 .nr(8)
8480 .kr(1)
8481 .sr(1)
8482 .m(5)
8483 .n(8)
8484 .k(2)
8485 .qmax(128)
8486 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8487 }
8488
TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64,strided_cm)8489 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, strided_cm) {
8490 TEST_REQUIRES_ARM_NEON;
8491 GemmMicrokernelTester()
8492 .mr(5)
8493 .nr(8)
8494 .kr(1)
8495 .sr(1)
8496 .m(5)
8497 .n(8)
8498 .k(2)
8499 .cm_stride(11)
8500 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
8501 }
8502 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8503
8504
8505 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_eq_4)8506 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4) {
8507 TEST_REQUIRES_ARM_NEON;
8508 GemmMicrokernelTester()
8509 .mr(6)
8510 .nr(8)
8511 .kr(1)
8512 .sr(1)
8513 .m(6)
8514 .n(8)
8515 .k(4)
8516 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8517 }
8518
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,strided_cn)8519 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, strided_cn) {
8520 TEST_REQUIRES_ARM_NEON;
8521 GemmMicrokernelTester()
8522 .mr(6)
8523 .nr(8)
8524 .kr(1)
8525 .sr(1)
8526 .m(6)
8527 .n(8)
8528 .k(4)
8529 .cn_stride(11)
8530 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8531 }
8532
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_eq_4_strided_a)8533 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_strided_a) {
8534 TEST_REQUIRES_ARM_NEON;
8535 GemmMicrokernelTester()
8536 .mr(6)
8537 .nr(8)
8538 .kr(1)
8539 .sr(1)
8540 .m(6)
8541 .n(8)
8542 .k(4)
8543 .a_stride(7)
8544 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8545 }
8546
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_eq_4_subtile)8547 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile) {
8548 TEST_REQUIRES_ARM_NEON;
8549 for (uint32_t n = 1; n <= 8; n++) {
8550 for (uint32_t m = 1; m <= 6; m++) {
8551 GemmMicrokernelTester()
8552 .mr(6)
8553 .nr(8)
8554 .kr(1)
8555 .sr(1)
8556 .m(m)
8557 .n(n)
8558 .k(4)
8559 .iterations(1)
8560 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8561 }
8562 }
8563 }
8564
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_eq_4_subtile_m)8565 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
8566 TEST_REQUIRES_ARM_NEON;
8567 for (uint32_t m = 1; m <= 6; m++) {
8568 GemmMicrokernelTester()
8569 .mr(6)
8570 .nr(8)
8571 .kr(1)
8572 .sr(1)
8573 .m(m)
8574 .n(8)
8575 .k(4)
8576 .iterations(1)
8577 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8578 }
8579 }
8580
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_eq_4_subtile_n)8581 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
8582 TEST_REQUIRES_ARM_NEON;
8583 for (uint32_t n = 1; n <= 8; n++) {
8584 GemmMicrokernelTester()
8585 .mr(6)
8586 .nr(8)
8587 .kr(1)
8588 .sr(1)
8589 .m(6)
8590 .n(n)
8591 .k(4)
8592 .iterations(1)
8593 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8594 }
8595 }
8596
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_lt_4)8597 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_lt_4) {
8598 TEST_REQUIRES_ARM_NEON;
8599 for (size_t k = 1; k < 4; k++) {
8600 GemmMicrokernelTester()
8601 .mr(6)
8602 .nr(8)
8603 .kr(1)
8604 .sr(1)
8605 .m(6)
8606 .n(8)
8607 .k(k)
8608 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8609 }
8610 }
8611
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_lt_4_strided_a)8612 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_strided_a) {
8613 TEST_REQUIRES_ARM_NEON;
8614 for (size_t k = 1; k < 4; k++) {
8615 GemmMicrokernelTester()
8616 .mr(6)
8617 .nr(8)
8618 .kr(1)
8619 .sr(1)
8620 .m(6)
8621 .n(8)
8622 .k(k)
8623 .a_stride(7)
8624 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8625 }
8626 }
8627
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_lt_4_subtile)8628 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_subtile) {
8629 TEST_REQUIRES_ARM_NEON;
8630 for (size_t k = 1; k < 4; k++) {
8631 for (uint32_t n = 1; n <= 8; n++) {
8632 for (uint32_t m = 1; m <= 6; m++) {
8633 GemmMicrokernelTester()
8634 .mr(6)
8635 .nr(8)
8636 .kr(1)
8637 .sr(1)
8638 .m(m)
8639 .n(n)
8640 .k(k)
8641 .iterations(1)
8642 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8643 }
8644 }
8645 }
8646 }
8647
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_gt_4)8648 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_gt_4) {
8649 TEST_REQUIRES_ARM_NEON;
8650 for (size_t k = 5; k < 8; k++) {
8651 GemmMicrokernelTester()
8652 .mr(6)
8653 .nr(8)
8654 .kr(1)
8655 .sr(1)
8656 .m(6)
8657 .n(8)
8658 .k(k)
8659 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8660 }
8661 }
8662
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_gt_4_strided_a)8663 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_strided_a) {
8664 TEST_REQUIRES_ARM_NEON;
8665 for (size_t k = 5; k < 8; k++) {
8666 GemmMicrokernelTester()
8667 .mr(6)
8668 .nr(8)
8669 .kr(1)
8670 .sr(1)
8671 .m(6)
8672 .n(8)
8673 .k(k)
8674 .a_stride(11)
8675 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8676 }
8677 }
8678
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_gt_4_subtile)8679 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_subtile) {
8680 TEST_REQUIRES_ARM_NEON;
8681 for (size_t k = 5; k < 8; k++) {
8682 for (uint32_t n = 1; n <= 8; n++) {
8683 for (uint32_t m = 1; m <= 6; m++) {
8684 GemmMicrokernelTester()
8685 .mr(6)
8686 .nr(8)
8687 .kr(1)
8688 .sr(1)
8689 .m(m)
8690 .n(n)
8691 .k(k)
8692 .iterations(1)
8693 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8694 }
8695 }
8696 }
8697 }
8698
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_div_4)8699 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_div_4) {
8700 TEST_REQUIRES_ARM_NEON;
8701 for (size_t k = 8; k <= 40; k += 4) {
8702 GemmMicrokernelTester()
8703 .mr(6)
8704 .nr(8)
8705 .kr(1)
8706 .sr(1)
8707 .m(6)
8708 .n(8)
8709 .k(k)
8710 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8711 }
8712 }
8713
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_div_4_strided_a)8714 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_div_4_strided_a) {
8715 TEST_REQUIRES_ARM_NEON;
8716 for (size_t k = 8; k <= 40; k += 4) {
8717 GemmMicrokernelTester()
8718 .mr(6)
8719 .nr(8)
8720 .kr(1)
8721 .sr(1)
8722 .m(6)
8723 .n(8)
8724 .k(k)
8725 .a_stride(43)
8726 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8727 }
8728 }
8729
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,k_div_4_subtile)8730 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_div_4_subtile) {
8731 TEST_REQUIRES_ARM_NEON;
8732 for (size_t k = 8; k <= 40; k += 4) {
8733 for (uint32_t n = 1; n <= 8; n++) {
8734 for (uint32_t m = 1; m <= 6; m++) {
8735 GemmMicrokernelTester()
8736 .mr(6)
8737 .nr(8)
8738 .kr(1)
8739 .sr(1)
8740 .m(m)
8741 .n(n)
8742 .k(k)
8743 .iterations(1)
8744 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8745 }
8746 }
8747 }
8748 }
8749
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_gt_8)8750 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8) {
8751 TEST_REQUIRES_ARM_NEON;
8752 for (uint32_t n = 9; n < 16; n++) {
8753 for (size_t k = 1; k <= 20; k += 5) {
8754 GemmMicrokernelTester()
8755 .mr(6)
8756 .nr(8)
8757 .kr(1)
8758 .sr(1)
8759 .m(6)
8760 .n(n)
8761 .k(k)
8762 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8763 }
8764 }
8765 }
8766
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_gt_8_strided_cn)8767 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
8768 TEST_REQUIRES_ARM_NEON;
8769 for (uint32_t n = 9; n < 16; n++) {
8770 for (size_t k = 1; k <= 20; k += 5) {
8771 GemmMicrokernelTester()
8772 .mr(6)
8773 .nr(8)
8774 .kr(1)
8775 .sr(1)
8776 .m(6)
8777 .n(n)
8778 .k(k)
8779 .cn_stride(11)
8780 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8781 }
8782 }
8783 }
8784
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_gt_8_strided_a)8785 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_a) {
8786 TEST_REQUIRES_ARM_NEON;
8787 for (uint32_t n = 9; n < 16; n++) {
8788 for (size_t k = 1; k <= 20; k += 5) {
8789 GemmMicrokernelTester()
8790 .mr(6)
8791 .nr(8)
8792 .kr(1)
8793 .sr(1)
8794 .m(6)
8795 .n(n)
8796 .k(k)
8797 .a_stride(23)
8798 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8799 }
8800 }
8801 }
8802
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_gt_8_subtile)8803 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_subtile) {
8804 TEST_REQUIRES_ARM_NEON;
8805 for (uint32_t n = 9; n < 16; n++) {
8806 for (size_t k = 1; k <= 20; k += 5) {
8807 for (uint32_t m = 1; m <= 6; m++) {
8808 GemmMicrokernelTester()
8809 .mr(6)
8810 .nr(8)
8811 .kr(1)
8812 .sr(1)
8813 .m(m)
8814 .n(n)
8815 .k(k)
8816 .iterations(1)
8817 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8818 }
8819 }
8820 }
8821 }
8822
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_div_8)8823 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8) {
8824 TEST_REQUIRES_ARM_NEON;
8825 for (uint32_t n = 16; n <= 24; n += 8) {
8826 for (size_t k = 1; k <= 20; k += 5) {
8827 GemmMicrokernelTester()
8828 .mr(6)
8829 .nr(8)
8830 .kr(1)
8831 .sr(1)
8832 .m(6)
8833 .n(n)
8834 .k(k)
8835 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8836 }
8837 }
8838 }
8839
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_div_8_strided_cn)8840 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_cn) {
8841 TEST_REQUIRES_ARM_NEON;
8842 for (uint32_t n = 16; n <= 24; n += 8) {
8843 for (size_t k = 1; k <= 20; k += 5) {
8844 GemmMicrokernelTester()
8845 .mr(6)
8846 .nr(8)
8847 .kr(1)
8848 .sr(1)
8849 .m(6)
8850 .n(n)
8851 .k(k)
8852 .cn_stride(11)
8853 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8854 }
8855 }
8856 }
8857
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_div_8_strided_a)8858 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_a) {
8859 TEST_REQUIRES_ARM_NEON;
8860 for (uint32_t n = 16; n <= 24; n += 8) {
8861 for (size_t k = 1; k <= 20; k += 5) {
8862 GemmMicrokernelTester()
8863 .mr(6)
8864 .nr(8)
8865 .kr(1)
8866 .sr(1)
8867 .m(6)
8868 .n(n)
8869 .k(k)
8870 .a_stride(23)
8871 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8872 }
8873 }
8874 }
8875
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,n_div_8_subtile)8876 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8_subtile) {
8877 TEST_REQUIRES_ARM_NEON;
8878 for (uint32_t n = 16; n <= 24; n += 8) {
8879 for (size_t k = 1; k <= 20; k += 5) {
8880 for (uint32_t m = 1; m <= 6; m++) {
8881 GemmMicrokernelTester()
8882 .mr(6)
8883 .nr(8)
8884 .kr(1)
8885 .sr(1)
8886 .m(m)
8887 .n(n)
8888 .k(k)
8889 .iterations(1)
8890 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8891 }
8892 }
8893 }
8894 }
8895
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,strided_cm_subtile)8896 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, strided_cm_subtile) {
8897 TEST_REQUIRES_ARM_NEON;
8898 for (size_t k = 1; k <= 20; k += 5) {
8899 for (uint32_t n = 1; n <= 8; n++) {
8900 for (uint32_t m = 1; m <= 6; m++) {
8901 GemmMicrokernelTester()
8902 .mr(6)
8903 .nr(8)
8904 .kr(1)
8905 .sr(1)
8906 .m(m)
8907 .n(n)
8908 .k(k)
8909 .cm_stride(11)
8910 .iterations(1)
8911 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8912 }
8913 }
8914 }
8915 }
8916
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,qmin)8917 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, qmin) {
8918 TEST_REQUIRES_ARM_NEON;
8919 GemmMicrokernelTester()
8920 .mr(6)
8921 .nr(8)
8922 .kr(1)
8923 .sr(1)
8924 .m(6)
8925 .n(8)
8926 .k(4)
8927 .qmin(128)
8928 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8929 }
8930
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,qmax)8931 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, qmax) {
8932 TEST_REQUIRES_ARM_NEON;
8933 GemmMicrokernelTester()
8934 .mr(6)
8935 .nr(8)
8936 .kr(1)
8937 .sr(1)
8938 .m(6)
8939 .n(8)
8940 .k(4)
8941 .qmax(128)
8942 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8943 }
8944
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128,strided_cm)8945 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, strided_cm) {
8946 TEST_REQUIRES_ARM_NEON;
8947 GemmMicrokernelTester()
8948 .mr(6)
8949 .nr(8)
8950 .kr(1)
8951 .sr(1)
8952 .m(6)
8953 .n(8)
8954 .k(4)
8955 .cm_stride(11)
8956 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8957 }
8958 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8959
8960
8961 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2)8962 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2) {
8963 TEST_REQUIRES_ARM_NEON_FMA;
8964 GemmMicrokernelTester()
8965 .mr(6)
8966 .nr(8)
8967 .kr(1)
8968 .sr(1)
8969 .m(6)
8970 .n(8)
8971 .k(2)
8972 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8973 }
8974
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,strided_cn)8975 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cn) {
8976 TEST_REQUIRES_ARM_NEON_FMA;
8977 GemmMicrokernelTester()
8978 .mr(6)
8979 .nr(8)
8980 .kr(1)
8981 .sr(1)
8982 .m(6)
8983 .n(8)
8984 .k(2)
8985 .cn_stride(11)
8986 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8987 }
8988
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2_strided_a)8989 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_strided_a) {
8990 TEST_REQUIRES_ARM_NEON_FMA;
8991 GemmMicrokernelTester()
8992 .mr(6)
8993 .nr(8)
8994 .kr(1)
8995 .sr(1)
8996 .m(6)
8997 .n(8)
8998 .k(2)
8999 .a_stride(5)
9000 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9001 }
9002
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2_subtile)9003 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
9004 TEST_REQUIRES_ARM_NEON_FMA;
9005 for (uint32_t n = 1; n <= 8; n++) {
9006 for (uint32_t m = 1; m <= 6; m++) {
9007 GemmMicrokernelTester()
9008 .mr(6)
9009 .nr(8)
9010 .kr(1)
9011 .sr(1)
9012 .m(m)
9013 .n(n)
9014 .k(2)
9015 .iterations(1)
9016 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9017 }
9018 }
9019 }
9020
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2_subtile_m)9021 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
9022 TEST_REQUIRES_ARM_NEON_FMA;
9023 for (uint32_t m = 1; m <= 6; m++) {
9024 GemmMicrokernelTester()
9025 .mr(6)
9026 .nr(8)
9027 .kr(1)
9028 .sr(1)
9029 .m(m)
9030 .n(8)
9031 .k(2)
9032 .iterations(1)
9033 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9034 }
9035 }
9036
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2_subtile_n)9037 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
9038 TEST_REQUIRES_ARM_NEON_FMA;
9039 for (uint32_t n = 1; n <= 8; n++) {
9040 GemmMicrokernelTester()
9041 .mr(6)
9042 .nr(8)
9043 .kr(1)
9044 .sr(1)
9045 .m(6)
9046 .n(n)
9047 .k(2)
9048 .iterations(1)
9049 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9050 }
9051 }
9052
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_lt_2)9053 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2) {
9054 TEST_REQUIRES_ARM_NEON_FMA;
9055 for (size_t k = 1; k < 2; k++) {
9056 GemmMicrokernelTester()
9057 .mr(6)
9058 .nr(8)
9059 .kr(1)
9060 .sr(1)
9061 .m(6)
9062 .n(8)
9063 .k(k)
9064 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9065 }
9066 }
9067
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_lt_2_strided_a)9068 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2_strided_a) {
9069 TEST_REQUIRES_ARM_NEON_FMA;
9070 for (size_t k = 1; k < 2; k++) {
9071 GemmMicrokernelTester()
9072 .mr(6)
9073 .nr(8)
9074 .kr(1)
9075 .sr(1)
9076 .m(6)
9077 .n(8)
9078 .k(k)
9079 .a_stride(5)
9080 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9081 }
9082 }
9083
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_lt_2_subtile)9084 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
9085 TEST_REQUIRES_ARM_NEON_FMA;
9086 for (size_t k = 1; k < 2; k++) {
9087 for (uint32_t n = 1; n <= 8; n++) {
9088 for (uint32_t m = 1; m <= 6; m++) {
9089 GemmMicrokernelTester()
9090 .mr(6)
9091 .nr(8)
9092 .kr(1)
9093 .sr(1)
9094 .m(m)
9095 .n(n)
9096 .k(k)
9097 .iterations(1)
9098 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9099 }
9100 }
9101 }
9102 }
9103
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_gt_2)9104 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2) {
9105 TEST_REQUIRES_ARM_NEON_FMA;
9106 for (size_t k = 3; k < 4; k++) {
9107 GemmMicrokernelTester()
9108 .mr(6)
9109 .nr(8)
9110 .kr(1)
9111 .sr(1)
9112 .m(6)
9113 .n(8)
9114 .k(k)
9115 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9116 }
9117 }
9118
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_gt_2_strided_a)9119 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2_strided_a) {
9120 TEST_REQUIRES_ARM_NEON_FMA;
9121 for (size_t k = 3; k < 4; k++) {
9122 GemmMicrokernelTester()
9123 .mr(6)
9124 .nr(8)
9125 .kr(1)
9126 .sr(1)
9127 .m(6)
9128 .n(8)
9129 .k(k)
9130 .a_stride(7)
9131 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9132 }
9133 }
9134
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_gt_2_subtile)9135 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
9136 TEST_REQUIRES_ARM_NEON_FMA;
9137 for (size_t k = 3; k < 4; k++) {
9138 for (uint32_t n = 1; n <= 8; n++) {
9139 for (uint32_t m = 1; m <= 6; m++) {
9140 GemmMicrokernelTester()
9141 .mr(6)
9142 .nr(8)
9143 .kr(1)
9144 .sr(1)
9145 .m(m)
9146 .n(n)
9147 .k(k)
9148 .iterations(1)
9149 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9150 }
9151 }
9152 }
9153 }
9154
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_div_2)9155 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2) {
9156 TEST_REQUIRES_ARM_NEON_FMA;
9157 for (size_t k = 4; k <= 20; k += 2) {
9158 GemmMicrokernelTester()
9159 .mr(6)
9160 .nr(8)
9161 .kr(1)
9162 .sr(1)
9163 .m(6)
9164 .n(8)
9165 .k(k)
9166 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9167 }
9168 }
9169
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_div_2_strided_a)9170 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2_strided_a) {
9171 TEST_REQUIRES_ARM_NEON_FMA;
9172 for (size_t k = 4; k <= 20; k += 2) {
9173 GemmMicrokernelTester()
9174 .mr(6)
9175 .nr(8)
9176 .kr(1)
9177 .sr(1)
9178 .m(6)
9179 .n(8)
9180 .k(k)
9181 .a_stride(23)
9182 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9183 }
9184 }
9185
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,k_div_2_subtile)9186 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
9187 TEST_REQUIRES_ARM_NEON_FMA;
9188 for (size_t k = 4; k <= 20; k += 2) {
9189 for (uint32_t n = 1; n <= 8; n++) {
9190 for (uint32_t m = 1; m <= 6; m++) {
9191 GemmMicrokernelTester()
9192 .mr(6)
9193 .nr(8)
9194 .kr(1)
9195 .sr(1)
9196 .m(m)
9197 .n(n)
9198 .k(k)
9199 .iterations(1)
9200 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9201 }
9202 }
9203 }
9204 }
9205
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8)9206 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8) {
9207 TEST_REQUIRES_ARM_NEON_FMA;
9208 for (uint32_t n = 9; n < 16; n++) {
9209 for (size_t k = 1; k <= 10; k += 3) {
9210 GemmMicrokernelTester()
9211 .mr(6)
9212 .nr(8)
9213 .kr(1)
9214 .sr(1)
9215 .m(6)
9216 .n(n)
9217 .k(k)
9218 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9219 }
9220 }
9221 }
9222
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8_strided_cn)9223 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
9224 TEST_REQUIRES_ARM_NEON_FMA;
9225 for (uint32_t n = 9; n < 16; n++) {
9226 for (size_t k = 1; k <= 10; k += 3) {
9227 GemmMicrokernelTester()
9228 .mr(6)
9229 .nr(8)
9230 .kr(1)
9231 .sr(1)
9232 .m(6)
9233 .n(n)
9234 .k(k)
9235 .cn_stride(11)
9236 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9237 }
9238 }
9239 }
9240
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8_strided_a)9241 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_strided_a) {
9242 TEST_REQUIRES_ARM_NEON_FMA;
9243 for (uint32_t n = 9; n < 16; n++) {
9244 for (size_t k = 1; k <= 10; k += 3) {
9245 GemmMicrokernelTester()
9246 .mr(6)
9247 .nr(8)
9248 .kr(1)
9249 .sr(1)
9250 .m(6)
9251 .n(n)
9252 .k(k)
9253 .a_stride(13)
9254 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9255 }
9256 }
9257 }
9258
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8_subtile)9259 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
9260 TEST_REQUIRES_ARM_NEON_FMA;
9261 for (uint32_t n = 9; n < 16; n++) {
9262 for (size_t k = 1; k <= 10; k += 3) {
9263 for (uint32_t m = 1; m <= 6; m++) {
9264 GemmMicrokernelTester()
9265 .mr(6)
9266 .nr(8)
9267 .kr(1)
9268 .sr(1)
9269 .m(m)
9270 .n(n)
9271 .k(k)
9272 .iterations(1)
9273 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9274 }
9275 }
9276 }
9277 }
9278
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8)9279 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8) {
9280 TEST_REQUIRES_ARM_NEON_FMA;
9281 for (uint32_t n = 16; n <= 24; n += 8) {
9282 for (size_t k = 1; k <= 10; k += 3) {
9283 GemmMicrokernelTester()
9284 .mr(6)
9285 .nr(8)
9286 .kr(1)
9287 .sr(1)
9288 .m(6)
9289 .n(n)
9290 .k(k)
9291 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9292 }
9293 }
9294 }
9295
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8_strided_cn)9296 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
9297 TEST_REQUIRES_ARM_NEON_FMA;
9298 for (uint32_t n = 16; n <= 24; n += 8) {
9299 for (size_t k = 1; k <= 10; k += 3) {
9300 GemmMicrokernelTester()
9301 .mr(6)
9302 .nr(8)
9303 .kr(1)
9304 .sr(1)
9305 .m(6)
9306 .n(n)
9307 .k(k)
9308 .cn_stride(11)
9309 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9310 }
9311 }
9312 }
9313
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8_strided_a)9314 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_strided_a) {
9315 TEST_REQUIRES_ARM_NEON_FMA;
9316 for (uint32_t n = 16; n <= 24; n += 8) {
9317 for (size_t k = 1; k <= 10; k += 3) {
9318 GemmMicrokernelTester()
9319 .mr(6)
9320 .nr(8)
9321 .kr(1)
9322 .sr(1)
9323 .m(6)
9324 .n(n)
9325 .k(k)
9326 .a_stride(13)
9327 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9328 }
9329 }
9330 }
9331
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8_subtile)9332 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
9333 TEST_REQUIRES_ARM_NEON_FMA;
9334 for (uint32_t n = 16; n <= 24; n += 8) {
9335 for (size_t k = 1; k <= 10; k += 3) {
9336 for (uint32_t m = 1; m <= 6; m++) {
9337 GemmMicrokernelTester()
9338 .mr(6)
9339 .nr(8)
9340 .kr(1)
9341 .sr(1)
9342 .m(m)
9343 .n(n)
9344 .k(k)
9345 .iterations(1)
9346 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9347 }
9348 }
9349 }
9350 }
9351
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,strided_cm_subtile)9352 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
9353 TEST_REQUIRES_ARM_NEON_FMA;
9354 for (size_t k = 1; k <= 10; k += 3) {
9355 for (uint32_t n = 1; n <= 8; n++) {
9356 for (uint32_t m = 1; m <= 6; m++) {
9357 GemmMicrokernelTester()
9358 .mr(6)
9359 .nr(8)
9360 .kr(1)
9361 .sr(1)
9362 .m(m)
9363 .n(n)
9364 .k(k)
9365 .cm_stride(11)
9366 .iterations(1)
9367 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9368 }
9369 }
9370 }
9371 }
9372
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,qmin)9373 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, qmin) {
9374 TEST_REQUIRES_ARM_NEON_FMA;
9375 GemmMicrokernelTester()
9376 .mr(6)
9377 .nr(8)
9378 .kr(1)
9379 .sr(1)
9380 .m(6)
9381 .n(8)
9382 .k(2)
9383 .qmin(128)
9384 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9385 }
9386
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,qmax)9387 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, qmax) {
9388 TEST_REQUIRES_ARM_NEON_FMA;
9389 GemmMicrokernelTester()
9390 .mr(6)
9391 .nr(8)
9392 .kr(1)
9393 .sr(1)
9394 .m(6)
9395 .n(8)
9396 .k(2)
9397 .qmax(128)
9398 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9399 }
9400
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64,strided_cm)9401 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm) {
9402 TEST_REQUIRES_ARM_NEON_FMA;
9403 GemmMicrokernelTester()
9404 .mr(6)
9405 .nr(8)
9406 .kr(1)
9407 .sr(1)
9408 .m(6)
9409 .n(8)
9410 .k(2)
9411 .cm_stride(11)
9412 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9413 }
9414 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9415
9416
9417 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_eq_4)9418 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4) {
9419 TEST_REQUIRES_ARM_NEON;
9420 GemmMicrokernelTester()
9421 .mr(6)
9422 .nr(8)
9423 .kr(1)
9424 .sr(4)
9425 .m(6)
9426 .n(8)
9427 .k(4)
9428 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9429 }
9430
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,strided_cn)9431 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, strided_cn) {
9432 TEST_REQUIRES_ARM_NEON;
9433 GemmMicrokernelTester()
9434 .mr(6)
9435 .nr(8)
9436 .kr(1)
9437 .sr(4)
9438 .m(6)
9439 .n(8)
9440 .k(4)
9441 .cn_stride(11)
9442 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9443 }
9444
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_eq_4_strided_a)9445 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_strided_a) {
9446 TEST_REQUIRES_ARM_NEON;
9447 GemmMicrokernelTester()
9448 .mr(6)
9449 .nr(8)
9450 .kr(1)
9451 .sr(4)
9452 .m(6)
9453 .n(8)
9454 .k(4)
9455 .a_stride(7)
9456 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9457 }
9458
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_eq_4_subtile)9459 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_subtile) {
9460 TEST_REQUIRES_ARM_NEON;
9461 for (uint32_t n = 1; n <= 8; n++) {
9462 for (uint32_t m = 1; m <= 6; m++) {
9463 GemmMicrokernelTester()
9464 .mr(6)
9465 .nr(8)
9466 .kr(1)
9467 .sr(4)
9468 .m(m)
9469 .n(n)
9470 .k(4)
9471 .iterations(1)
9472 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9473 }
9474 }
9475 }
9476
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_eq_4_subtile_m)9477 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_subtile_m) {
9478 TEST_REQUIRES_ARM_NEON;
9479 for (uint32_t m = 1; m <= 6; m++) {
9480 GemmMicrokernelTester()
9481 .mr(6)
9482 .nr(8)
9483 .kr(1)
9484 .sr(4)
9485 .m(m)
9486 .n(8)
9487 .k(4)
9488 .iterations(1)
9489 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9490 }
9491 }
9492
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_eq_4_subtile_n)9493 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_subtile_n) {
9494 TEST_REQUIRES_ARM_NEON;
9495 for (uint32_t n = 1; n <= 8; n++) {
9496 GemmMicrokernelTester()
9497 .mr(6)
9498 .nr(8)
9499 .kr(1)
9500 .sr(4)
9501 .m(6)
9502 .n(n)
9503 .k(4)
9504 .iterations(1)
9505 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9506 }
9507 }
9508
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_lt_4)9509 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_lt_4) {
9510 TEST_REQUIRES_ARM_NEON;
9511 for (size_t k = 1; k < 4; k++) {
9512 GemmMicrokernelTester()
9513 .mr(6)
9514 .nr(8)
9515 .kr(1)
9516 .sr(4)
9517 .m(6)
9518 .n(8)
9519 .k(k)
9520 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9521 }
9522 }
9523
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_lt_4_strided_a)9524 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_lt_4_strided_a) {
9525 TEST_REQUIRES_ARM_NEON;
9526 for (size_t k = 1; k < 4; k++) {
9527 GemmMicrokernelTester()
9528 .mr(6)
9529 .nr(8)
9530 .kr(1)
9531 .sr(4)
9532 .m(6)
9533 .n(8)
9534 .k(k)
9535 .a_stride(7)
9536 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9537 }
9538 }
9539
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_lt_4_subtile)9540 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_lt_4_subtile) {
9541 TEST_REQUIRES_ARM_NEON;
9542 for (size_t k = 1; k < 4; k++) {
9543 for (uint32_t n = 1; n <= 8; n++) {
9544 for (uint32_t m = 1; m <= 6; m++) {
9545 GemmMicrokernelTester()
9546 .mr(6)
9547 .nr(8)
9548 .kr(1)
9549 .sr(4)
9550 .m(m)
9551 .n(n)
9552 .k(k)
9553 .iterations(1)
9554 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9555 }
9556 }
9557 }
9558 }
9559
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_gt_4)9560 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_gt_4) {
9561 TEST_REQUIRES_ARM_NEON;
9562 for (size_t k = 5; k < 8; k++) {
9563 GemmMicrokernelTester()
9564 .mr(6)
9565 .nr(8)
9566 .kr(1)
9567 .sr(4)
9568 .m(6)
9569 .n(8)
9570 .k(k)
9571 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9572 }
9573 }
9574
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_gt_4_strided_a)9575 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_gt_4_strided_a) {
9576 TEST_REQUIRES_ARM_NEON;
9577 for (size_t k = 5; k < 8; k++) {
9578 GemmMicrokernelTester()
9579 .mr(6)
9580 .nr(8)
9581 .kr(1)
9582 .sr(4)
9583 .m(6)
9584 .n(8)
9585 .k(k)
9586 .a_stride(11)
9587 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9588 }
9589 }
9590
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_gt_4_subtile)9591 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_gt_4_subtile) {
9592 TEST_REQUIRES_ARM_NEON;
9593 for (size_t k = 5; k < 8; k++) {
9594 for (uint32_t n = 1; n <= 8; n++) {
9595 for (uint32_t m = 1; m <= 6; m++) {
9596 GemmMicrokernelTester()
9597 .mr(6)
9598 .nr(8)
9599 .kr(1)
9600 .sr(4)
9601 .m(m)
9602 .n(n)
9603 .k(k)
9604 .iterations(1)
9605 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9606 }
9607 }
9608 }
9609 }
9610
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_div_4)9611 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_div_4) {
9612 TEST_REQUIRES_ARM_NEON;
9613 for (size_t k = 8; k <= 40; k += 4) {
9614 GemmMicrokernelTester()
9615 .mr(6)
9616 .nr(8)
9617 .kr(1)
9618 .sr(4)
9619 .m(6)
9620 .n(8)
9621 .k(k)
9622 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9623 }
9624 }
9625
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_div_4_strided_a)9626 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_div_4_strided_a) {
9627 TEST_REQUIRES_ARM_NEON;
9628 for (size_t k = 8; k <= 40; k += 4) {
9629 GemmMicrokernelTester()
9630 .mr(6)
9631 .nr(8)
9632 .kr(1)
9633 .sr(4)
9634 .m(6)
9635 .n(8)
9636 .k(k)
9637 .a_stride(43)
9638 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9639 }
9640 }
9641
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,k_div_4_subtile)9642 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_div_4_subtile) {
9643 TEST_REQUIRES_ARM_NEON;
9644 for (size_t k = 8; k <= 40; k += 4) {
9645 for (uint32_t n = 1; n <= 8; n++) {
9646 for (uint32_t m = 1; m <= 6; m++) {
9647 GemmMicrokernelTester()
9648 .mr(6)
9649 .nr(8)
9650 .kr(1)
9651 .sr(4)
9652 .m(m)
9653 .n(n)
9654 .k(k)
9655 .iterations(1)
9656 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9657 }
9658 }
9659 }
9660 }
9661
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_gt_8)9662 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8) {
9663 TEST_REQUIRES_ARM_NEON;
9664 for (uint32_t n = 9; n < 16; n++) {
9665 for (size_t k = 1; k <= 20; k += 5) {
9666 GemmMicrokernelTester()
9667 .mr(6)
9668 .nr(8)
9669 .kr(1)
9670 .sr(4)
9671 .m(6)
9672 .n(n)
9673 .k(k)
9674 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9675 }
9676 }
9677 }
9678
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_gt_8_strided_cn)9679 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8_strided_cn) {
9680 TEST_REQUIRES_ARM_NEON;
9681 for (uint32_t n = 9; n < 16; n++) {
9682 for (size_t k = 1; k <= 20; k += 5) {
9683 GemmMicrokernelTester()
9684 .mr(6)
9685 .nr(8)
9686 .kr(1)
9687 .sr(4)
9688 .m(6)
9689 .n(n)
9690 .k(k)
9691 .cn_stride(11)
9692 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9693 }
9694 }
9695 }
9696
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_gt_8_strided_a)9697 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8_strided_a) {
9698 TEST_REQUIRES_ARM_NEON;
9699 for (uint32_t n = 9; n < 16; n++) {
9700 for (size_t k = 1; k <= 20; k += 5) {
9701 GemmMicrokernelTester()
9702 .mr(6)
9703 .nr(8)
9704 .kr(1)
9705 .sr(4)
9706 .m(6)
9707 .n(n)
9708 .k(k)
9709 .a_stride(23)
9710 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9711 }
9712 }
9713 }
9714
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_gt_8_subtile)9715 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8_subtile) {
9716 TEST_REQUIRES_ARM_NEON;
9717 for (uint32_t n = 9; n < 16; n++) {
9718 for (size_t k = 1; k <= 20; k += 5) {
9719 for (uint32_t m = 1; m <= 6; m++) {
9720 GemmMicrokernelTester()
9721 .mr(6)
9722 .nr(8)
9723 .kr(1)
9724 .sr(4)
9725 .m(m)
9726 .n(n)
9727 .k(k)
9728 .iterations(1)
9729 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9730 }
9731 }
9732 }
9733 }
9734
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_div_8)9735 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8) {
9736 TEST_REQUIRES_ARM_NEON;
9737 for (uint32_t n = 16; n <= 24; n += 8) {
9738 for (size_t k = 1; k <= 20; k += 5) {
9739 GemmMicrokernelTester()
9740 .mr(6)
9741 .nr(8)
9742 .kr(1)
9743 .sr(4)
9744 .m(6)
9745 .n(n)
9746 .k(k)
9747 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9748 }
9749 }
9750 }
9751
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_div_8_strided_cn)9752 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8_strided_cn) {
9753 TEST_REQUIRES_ARM_NEON;
9754 for (uint32_t n = 16; n <= 24; n += 8) {
9755 for (size_t k = 1; k <= 20; k += 5) {
9756 GemmMicrokernelTester()
9757 .mr(6)
9758 .nr(8)
9759 .kr(1)
9760 .sr(4)
9761 .m(6)
9762 .n(n)
9763 .k(k)
9764 .cn_stride(11)
9765 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9766 }
9767 }
9768 }
9769
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_div_8_strided_a)9770 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8_strided_a) {
9771 TEST_REQUIRES_ARM_NEON;
9772 for (uint32_t n = 16; n <= 24; n += 8) {
9773 for (size_t k = 1; k <= 20; k += 5) {
9774 GemmMicrokernelTester()
9775 .mr(6)
9776 .nr(8)
9777 .kr(1)
9778 .sr(4)
9779 .m(6)
9780 .n(n)
9781 .k(k)
9782 .a_stride(23)
9783 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9784 }
9785 }
9786 }
9787
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,n_div_8_subtile)9788 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8_subtile) {
9789 TEST_REQUIRES_ARM_NEON;
9790 for (uint32_t n = 16; n <= 24; n += 8) {
9791 for (size_t k = 1; k <= 20; k += 5) {
9792 for (uint32_t m = 1; m <= 6; m++) {
9793 GemmMicrokernelTester()
9794 .mr(6)
9795 .nr(8)
9796 .kr(1)
9797 .sr(4)
9798 .m(m)
9799 .n(n)
9800 .k(k)
9801 .iterations(1)
9802 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9803 }
9804 }
9805 }
9806 }
9807
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,strided_cm_subtile)9808 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, strided_cm_subtile) {
9809 TEST_REQUIRES_ARM_NEON;
9810 for (size_t k = 1; k <= 20; k += 5) {
9811 for (uint32_t n = 1; n <= 8; n++) {
9812 for (uint32_t m = 1; m <= 6; m++) {
9813 GemmMicrokernelTester()
9814 .mr(6)
9815 .nr(8)
9816 .kr(1)
9817 .sr(4)
9818 .m(m)
9819 .n(n)
9820 .k(k)
9821 .cm_stride(11)
9822 .iterations(1)
9823 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9824 }
9825 }
9826 }
9827 }
9828
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,qmin)9829 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, qmin) {
9830 TEST_REQUIRES_ARM_NEON;
9831 GemmMicrokernelTester()
9832 .mr(6)
9833 .nr(8)
9834 .kr(1)
9835 .sr(4)
9836 .m(6)
9837 .n(8)
9838 .k(4)
9839 .qmin(128)
9840 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9841 }
9842
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,qmax)9843 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, qmax) {
9844 TEST_REQUIRES_ARM_NEON;
9845 GemmMicrokernelTester()
9846 .mr(6)
9847 .nr(8)
9848 .kr(1)
9849 .sr(4)
9850 .m(6)
9851 .n(8)
9852 .k(4)
9853 .qmax(128)
9854 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9855 }
9856
TEST(F32_GEMMINC_MINMAX_6X8S4__NEON,strided_cm)9857 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, strided_cm) {
9858 TEST_REQUIRES_ARM_NEON;
9859 GemmMicrokernelTester()
9860 .mr(6)
9861 .nr(8)
9862 .kr(1)
9863 .sr(4)
9864 .m(6)
9865 .n(8)
9866 .k(4)
9867 .cm_stride(11)
9868 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9869 }
9870 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9871
9872
9873 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_eq_4)9874 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4) {
9875 TEST_REQUIRES_ARM_NEON_FMA;
9876 GemmMicrokernelTester()
9877 .mr(6)
9878 .nr(8)
9879 .kr(1)
9880 .sr(4)
9881 .m(6)
9882 .n(8)
9883 .k(4)
9884 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9885 }
9886
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,strided_cn)9887 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, strided_cn) {
9888 TEST_REQUIRES_ARM_NEON_FMA;
9889 GemmMicrokernelTester()
9890 .mr(6)
9891 .nr(8)
9892 .kr(1)
9893 .sr(4)
9894 .m(6)
9895 .n(8)
9896 .k(4)
9897 .cn_stride(11)
9898 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9899 }
9900
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_eq_4_strided_a)9901 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_strided_a) {
9902 TEST_REQUIRES_ARM_NEON_FMA;
9903 GemmMicrokernelTester()
9904 .mr(6)
9905 .nr(8)
9906 .kr(1)
9907 .sr(4)
9908 .m(6)
9909 .n(8)
9910 .k(4)
9911 .a_stride(7)
9912 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9913 }
9914
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_eq_4_subtile)9915 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile) {
9916 TEST_REQUIRES_ARM_NEON_FMA;
9917 for (uint32_t n = 1; n <= 8; n++) {
9918 for (uint32_t m = 1; m <= 6; m++) {
9919 GemmMicrokernelTester()
9920 .mr(6)
9921 .nr(8)
9922 .kr(1)
9923 .sr(4)
9924 .m(m)
9925 .n(n)
9926 .k(4)
9927 .iterations(1)
9928 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9929 }
9930 }
9931 }
9932
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_eq_4_subtile_m)9933 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_m) {
9934 TEST_REQUIRES_ARM_NEON_FMA;
9935 for (uint32_t m = 1; m <= 6; m++) {
9936 GemmMicrokernelTester()
9937 .mr(6)
9938 .nr(8)
9939 .kr(1)
9940 .sr(4)
9941 .m(m)
9942 .n(8)
9943 .k(4)
9944 .iterations(1)
9945 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9946 }
9947 }
9948
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_eq_4_subtile_n)9949 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_n) {
9950 TEST_REQUIRES_ARM_NEON_FMA;
9951 for (uint32_t n = 1; n <= 8; n++) {
9952 GemmMicrokernelTester()
9953 .mr(6)
9954 .nr(8)
9955 .kr(1)
9956 .sr(4)
9957 .m(6)
9958 .n(n)
9959 .k(4)
9960 .iterations(1)
9961 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9962 }
9963 }
9964
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_lt_4)9965 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_lt_4) {
9966 TEST_REQUIRES_ARM_NEON_FMA;
9967 for (size_t k = 1; k < 4; k++) {
9968 GemmMicrokernelTester()
9969 .mr(6)
9970 .nr(8)
9971 .kr(1)
9972 .sr(4)
9973 .m(6)
9974 .n(8)
9975 .k(k)
9976 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9977 }
9978 }
9979
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_lt_4_strided_a)9980 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_lt_4_strided_a) {
9981 TEST_REQUIRES_ARM_NEON_FMA;
9982 for (size_t k = 1; k < 4; k++) {
9983 GemmMicrokernelTester()
9984 .mr(6)
9985 .nr(8)
9986 .kr(1)
9987 .sr(4)
9988 .m(6)
9989 .n(8)
9990 .k(k)
9991 .a_stride(7)
9992 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9993 }
9994 }
9995
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_lt_4_subtile)9996 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_lt_4_subtile) {
9997 TEST_REQUIRES_ARM_NEON_FMA;
9998 for (size_t k = 1; k < 4; k++) {
9999 for (uint32_t n = 1; n <= 8; n++) {
10000 for (uint32_t m = 1; m <= 6; m++) {
10001 GemmMicrokernelTester()
10002 .mr(6)
10003 .nr(8)
10004 .kr(1)
10005 .sr(4)
10006 .m(m)
10007 .n(n)
10008 .k(k)
10009 .iterations(1)
10010 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10011 }
10012 }
10013 }
10014 }
10015
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_gt_4)10016 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_gt_4) {
10017 TEST_REQUIRES_ARM_NEON_FMA;
10018 for (size_t k = 5; k < 8; k++) {
10019 GemmMicrokernelTester()
10020 .mr(6)
10021 .nr(8)
10022 .kr(1)
10023 .sr(4)
10024 .m(6)
10025 .n(8)
10026 .k(k)
10027 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10028 }
10029 }
10030
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_gt_4_strided_a)10031 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_gt_4_strided_a) {
10032 TEST_REQUIRES_ARM_NEON_FMA;
10033 for (size_t k = 5; k < 8; k++) {
10034 GemmMicrokernelTester()
10035 .mr(6)
10036 .nr(8)
10037 .kr(1)
10038 .sr(4)
10039 .m(6)
10040 .n(8)
10041 .k(k)
10042 .a_stride(11)
10043 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10044 }
10045 }
10046
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_gt_4_subtile)10047 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_gt_4_subtile) {
10048 TEST_REQUIRES_ARM_NEON_FMA;
10049 for (size_t k = 5; k < 8; k++) {
10050 for (uint32_t n = 1; n <= 8; n++) {
10051 for (uint32_t m = 1; m <= 6; m++) {
10052 GemmMicrokernelTester()
10053 .mr(6)
10054 .nr(8)
10055 .kr(1)
10056 .sr(4)
10057 .m(m)
10058 .n(n)
10059 .k(k)
10060 .iterations(1)
10061 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10062 }
10063 }
10064 }
10065 }
10066
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_div_4)10067 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_div_4) {
10068 TEST_REQUIRES_ARM_NEON_FMA;
10069 for (size_t k = 8; k <= 40; k += 4) {
10070 GemmMicrokernelTester()
10071 .mr(6)
10072 .nr(8)
10073 .kr(1)
10074 .sr(4)
10075 .m(6)
10076 .n(8)
10077 .k(k)
10078 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10079 }
10080 }
10081
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_div_4_strided_a)10082 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_div_4_strided_a) {
10083 TEST_REQUIRES_ARM_NEON_FMA;
10084 for (size_t k = 8; k <= 40; k += 4) {
10085 GemmMicrokernelTester()
10086 .mr(6)
10087 .nr(8)
10088 .kr(1)
10089 .sr(4)
10090 .m(6)
10091 .n(8)
10092 .k(k)
10093 .a_stride(43)
10094 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10095 }
10096 }
10097
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,k_div_4_subtile)10098 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_div_4_subtile) {
10099 TEST_REQUIRES_ARM_NEON_FMA;
10100 for (size_t k = 8; k <= 40; k += 4) {
10101 for (uint32_t n = 1; n <= 8; n++) {
10102 for (uint32_t m = 1; m <= 6; m++) {
10103 GemmMicrokernelTester()
10104 .mr(6)
10105 .nr(8)
10106 .kr(1)
10107 .sr(4)
10108 .m(m)
10109 .n(n)
10110 .k(k)
10111 .iterations(1)
10112 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10113 }
10114 }
10115 }
10116 }
10117
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_gt_8)10118 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8) {
10119 TEST_REQUIRES_ARM_NEON_FMA;
10120 for (uint32_t n = 9; n < 16; n++) {
10121 for (size_t k = 1; k <= 20; k += 5) {
10122 GemmMicrokernelTester()
10123 .mr(6)
10124 .nr(8)
10125 .kr(1)
10126 .sr(4)
10127 .m(6)
10128 .n(n)
10129 .k(k)
10130 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10131 }
10132 }
10133 }
10134
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_gt_8_strided_cn)10135 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_cn) {
10136 TEST_REQUIRES_ARM_NEON_FMA;
10137 for (uint32_t n = 9; n < 16; n++) {
10138 for (size_t k = 1; k <= 20; k += 5) {
10139 GemmMicrokernelTester()
10140 .mr(6)
10141 .nr(8)
10142 .kr(1)
10143 .sr(4)
10144 .m(6)
10145 .n(n)
10146 .k(k)
10147 .cn_stride(11)
10148 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10149 }
10150 }
10151 }
10152
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_gt_8_strided_a)10153 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_a) {
10154 TEST_REQUIRES_ARM_NEON_FMA;
10155 for (uint32_t n = 9; n < 16; n++) {
10156 for (size_t k = 1; k <= 20; k += 5) {
10157 GemmMicrokernelTester()
10158 .mr(6)
10159 .nr(8)
10160 .kr(1)
10161 .sr(4)
10162 .m(6)
10163 .n(n)
10164 .k(k)
10165 .a_stride(23)
10166 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10167 }
10168 }
10169 }
10170
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_gt_8_subtile)10171 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8_subtile) {
10172 TEST_REQUIRES_ARM_NEON_FMA;
10173 for (uint32_t n = 9; n < 16; n++) {
10174 for (size_t k = 1; k <= 20; k += 5) {
10175 for (uint32_t m = 1; m <= 6; m++) {
10176 GemmMicrokernelTester()
10177 .mr(6)
10178 .nr(8)
10179 .kr(1)
10180 .sr(4)
10181 .m(m)
10182 .n(n)
10183 .k(k)
10184 .iterations(1)
10185 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10186 }
10187 }
10188 }
10189 }
10190
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_div_8)10191 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8) {
10192 TEST_REQUIRES_ARM_NEON_FMA;
10193 for (uint32_t n = 16; n <= 24; n += 8) {
10194 for (size_t k = 1; k <= 20; k += 5) {
10195 GemmMicrokernelTester()
10196 .mr(6)
10197 .nr(8)
10198 .kr(1)
10199 .sr(4)
10200 .m(6)
10201 .n(n)
10202 .k(k)
10203 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10204 }
10205 }
10206 }
10207
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_div_8_strided_cn)10208 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8_strided_cn) {
10209 TEST_REQUIRES_ARM_NEON_FMA;
10210 for (uint32_t n = 16; n <= 24; n += 8) {
10211 for (size_t k = 1; k <= 20; k += 5) {
10212 GemmMicrokernelTester()
10213 .mr(6)
10214 .nr(8)
10215 .kr(1)
10216 .sr(4)
10217 .m(6)
10218 .n(n)
10219 .k(k)
10220 .cn_stride(11)
10221 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10222 }
10223 }
10224 }
10225
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_div_8_strided_a)10226 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8_strided_a) {
10227 TEST_REQUIRES_ARM_NEON_FMA;
10228 for (uint32_t n = 16; n <= 24; n += 8) {
10229 for (size_t k = 1; k <= 20; k += 5) {
10230 GemmMicrokernelTester()
10231 .mr(6)
10232 .nr(8)
10233 .kr(1)
10234 .sr(4)
10235 .m(6)
10236 .n(n)
10237 .k(k)
10238 .a_stride(23)
10239 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10240 }
10241 }
10242 }
10243
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,n_div_8_subtile)10244 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8_subtile) {
10245 TEST_REQUIRES_ARM_NEON_FMA;
10246 for (uint32_t n = 16; n <= 24; n += 8) {
10247 for (size_t k = 1; k <= 20; k += 5) {
10248 for (uint32_t m = 1; m <= 6; m++) {
10249 GemmMicrokernelTester()
10250 .mr(6)
10251 .nr(8)
10252 .kr(1)
10253 .sr(4)
10254 .m(m)
10255 .n(n)
10256 .k(k)
10257 .iterations(1)
10258 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10259 }
10260 }
10261 }
10262 }
10263
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,strided_cm_subtile)10264 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, strided_cm_subtile) {
10265 TEST_REQUIRES_ARM_NEON_FMA;
10266 for (size_t k = 1; k <= 20; k += 5) {
10267 for (uint32_t n = 1; n <= 8; n++) {
10268 for (uint32_t m = 1; m <= 6; m++) {
10269 GemmMicrokernelTester()
10270 .mr(6)
10271 .nr(8)
10272 .kr(1)
10273 .sr(4)
10274 .m(m)
10275 .n(n)
10276 .k(k)
10277 .cm_stride(11)
10278 .iterations(1)
10279 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10280 }
10281 }
10282 }
10283 }
10284
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,qmin)10285 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, qmin) {
10286 TEST_REQUIRES_ARM_NEON_FMA;
10287 GemmMicrokernelTester()
10288 .mr(6)
10289 .nr(8)
10290 .kr(1)
10291 .sr(4)
10292 .m(6)
10293 .n(8)
10294 .k(4)
10295 .qmin(128)
10296 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10297 }
10298
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,qmax)10299 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, qmax) {
10300 TEST_REQUIRES_ARM_NEON_FMA;
10301 GemmMicrokernelTester()
10302 .mr(6)
10303 .nr(8)
10304 .kr(1)
10305 .sr(4)
10306 .m(6)
10307 .n(8)
10308 .k(4)
10309 .qmax(128)
10310 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10311 }
10312
TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA,strided_cm)10313 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, strided_cm) {
10314 TEST_REQUIRES_ARM_NEON_FMA;
10315 GemmMicrokernelTester()
10316 .mr(6)
10317 .nr(8)
10318 .kr(1)
10319 .sr(4)
10320 .m(6)
10321 .n(8)
10322 .k(4)
10323 .cm_stride(11)
10324 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10325 }
10326 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10327
10328
10329 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_eq_4)10330 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4) {
10331 TEST_REQUIRES_ARM_NEON;
10332 GemmMicrokernelTester()
10333 .mr(8)
10334 .nr(8)
10335 .kr(1)
10336 .sr(4)
10337 .m(8)
10338 .n(8)
10339 .k(4)
10340 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10341 }
10342
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,strided_cn)10343 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, strided_cn) {
10344 TEST_REQUIRES_ARM_NEON;
10345 GemmMicrokernelTester()
10346 .mr(8)
10347 .nr(8)
10348 .kr(1)
10349 .sr(4)
10350 .m(8)
10351 .n(8)
10352 .k(4)
10353 .cn_stride(11)
10354 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10355 }
10356
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_eq_4_strided_a)10357 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_strided_a) {
10358 TEST_REQUIRES_ARM_NEON;
10359 GemmMicrokernelTester()
10360 .mr(8)
10361 .nr(8)
10362 .kr(1)
10363 .sr(4)
10364 .m(8)
10365 .n(8)
10366 .k(4)
10367 .a_stride(7)
10368 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10369 }
10370
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_eq_4_subtile)10371 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_subtile) {
10372 TEST_REQUIRES_ARM_NEON;
10373 for (uint32_t n = 1; n <= 8; n++) {
10374 for (uint32_t m = 1; m <= 8; m++) {
10375 GemmMicrokernelTester()
10376 .mr(8)
10377 .nr(8)
10378 .kr(1)
10379 .sr(4)
10380 .m(m)
10381 .n(n)
10382 .k(4)
10383 .iterations(1)
10384 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10385 }
10386 }
10387 }
10388
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_eq_4_subtile_m)10389 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_subtile_m) {
10390 TEST_REQUIRES_ARM_NEON;
10391 for (uint32_t m = 1; m <= 8; m++) {
10392 GemmMicrokernelTester()
10393 .mr(8)
10394 .nr(8)
10395 .kr(1)
10396 .sr(4)
10397 .m(m)
10398 .n(8)
10399 .k(4)
10400 .iterations(1)
10401 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10402 }
10403 }
10404
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_eq_4_subtile_n)10405 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_subtile_n) {
10406 TEST_REQUIRES_ARM_NEON;
10407 for (uint32_t n = 1; n <= 8; n++) {
10408 GemmMicrokernelTester()
10409 .mr(8)
10410 .nr(8)
10411 .kr(1)
10412 .sr(4)
10413 .m(8)
10414 .n(n)
10415 .k(4)
10416 .iterations(1)
10417 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10418 }
10419 }
10420
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_lt_4)10421 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_lt_4) {
10422 TEST_REQUIRES_ARM_NEON;
10423 for (size_t k = 1; k < 4; k++) {
10424 GemmMicrokernelTester()
10425 .mr(8)
10426 .nr(8)
10427 .kr(1)
10428 .sr(4)
10429 .m(8)
10430 .n(8)
10431 .k(k)
10432 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10433 }
10434 }
10435
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_lt_4_strided_a)10436 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_lt_4_strided_a) {
10437 TEST_REQUIRES_ARM_NEON;
10438 for (size_t k = 1; k < 4; k++) {
10439 GemmMicrokernelTester()
10440 .mr(8)
10441 .nr(8)
10442 .kr(1)
10443 .sr(4)
10444 .m(8)
10445 .n(8)
10446 .k(k)
10447 .a_stride(7)
10448 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10449 }
10450 }
10451
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_lt_4_subtile)10452 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_lt_4_subtile) {
10453 TEST_REQUIRES_ARM_NEON;
10454 for (size_t k = 1; k < 4; k++) {
10455 for (uint32_t n = 1; n <= 8; n++) {
10456 for (uint32_t m = 1; m <= 8; m++) {
10457 GemmMicrokernelTester()
10458 .mr(8)
10459 .nr(8)
10460 .kr(1)
10461 .sr(4)
10462 .m(m)
10463 .n(n)
10464 .k(k)
10465 .iterations(1)
10466 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10467 }
10468 }
10469 }
10470 }
10471
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_gt_4)10472 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_gt_4) {
10473 TEST_REQUIRES_ARM_NEON;
10474 for (size_t k = 5; k < 8; k++) {
10475 GemmMicrokernelTester()
10476 .mr(8)
10477 .nr(8)
10478 .kr(1)
10479 .sr(4)
10480 .m(8)
10481 .n(8)
10482 .k(k)
10483 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10484 }
10485 }
10486
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_gt_4_strided_a)10487 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_gt_4_strided_a) {
10488 TEST_REQUIRES_ARM_NEON;
10489 for (size_t k = 5; k < 8; k++) {
10490 GemmMicrokernelTester()
10491 .mr(8)
10492 .nr(8)
10493 .kr(1)
10494 .sr(4)
10495 .m(8)
10496 .n(8)
10497 .k(k)
10498 .a_stride(11)
10499 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10500 }
10501 }
10502
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_gt_4_subtile)10503 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_gt_4_subtile) {
10504 TEST_REQUIRES_ARM_NEON;
10505 for (size_t k = 5; k < 8; k++) {
10506 for (uint32_t n = 1; n <= 8; n++) {
10507 for (uint32_t m = 1; m <= 8; m++) {
10508 GemmMicrokernelTester()
10509 .mr(8)
10510 .nr(8)
10511 .kr(1)
10512 .sr(4)
10513 .m(m)
10514 .n(n)
10515 .k(k)
10516 .iterations(1)
10517 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10518 }
10519 }
10520 }
10521 }
10522
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_div_4)10523 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_div_4) {
10524 TEST_REQUIRES_ARM_NEON;
10525 for (size_t k = 8; k <= 40; k += 4) {
10526 GemmMicrokernelTester()
10527 .mr(8)
10528 .nr(8)
10529 .kr(1)
10530 .sr(4)
10531 .m(8)
10532 .n(8)
10533 .k(k)
10534 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10535 }
10536 }
10537
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_div_4_strided_a)10538 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_div_4_strided_a) {
10539 TEST_REQUIRES_ARM_NEON;
10540 for (size_t k = 8; k <= 40; k += 4) {
10541 GemmMicrokernelTester()
10542 .mr(8)
10543 .nr(8)
10544 .kr(1)
10545 .sr(4)
10546 .m(8)
10547 .n(8)
10548 .k(k)
10549 .a_stride(43)
10550 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10551 }
10552 }
10553
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,k_div_4_subtile)10554 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_div_4_subtile) {
10555 TEST_REQUIRES_ARM_NEON;
10556 for (size_t k = 8; k <= 40; k += 4) {
10557 for (uint32_t n = 1; n <= 8; n++) {
10558 for (uint32_t m = 1; m <= 8; m++) {
10559 GemmMicrokernelTester()
10560 .mr(8)
10561 .nr(8)
10562 .kr(1)
10563 .sr(4)
10564 .m(m)
10565 .n(n)
10566 .k(k)
10567 .iterations(1)
10568 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10569 }
10570 }
10571 }
10572 }
10573
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_gt_8)10574 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8) {
10575 TEST_REQUIRES_ARM_NEON;
10576 for (uint32_t n = 9; n < 16; n++) {
10577 for (size_t k = 1; k <= 20; k += 5) {
10578 GemmMicrokernelTester()
10579 .mr(8)
10580 .nr(8)
10581 .kr(1)
10582 .sr(4)
10583 .m(8)
10584 .n(n)
10585 .k(k)
10586 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10587 }
10588 }
10589 }
10590
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_gt_8_strided_cn)10591 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8_strided_cn) {
10592 TEST_REQUIRES_ARM_NEON;
10593 for (uint32_t n = 9; n < 16; n++) {
10594 for (size_t k = 1; k <= 20; k += 5) {
10595 GemmMicrokernelTester()
10596 .mr(8)
10597 .nr(8)
10598 .kr(1)
10599 .sr(4)
10600 .m(8)
10601 .n(n)
10602 .k(k)
10603 .cn_stride(11)
10604 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10605 }
10606 }
10607 }
10608
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_gt_8_strided_a)10609 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8_strided_a) {
10610 TEST_REQUIRES_ARM_NEON;
10611 for (uint32_t n = 9; n < 16; n++) {
10612 for (size_t k = 1; k <= 20; k += 5) {
10613 GemmMicrokernelTester()
10614 .mr(8)
10615 .nr(8)
10616 .kr(1)
10617 .sr(4)
10618 .m(8)
10619 .n(n)
10620 .k(k)
10621 .a_stride(23)
10622 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10623 }
10624 }
10625 }
10626
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_gt_8_subtile)10627 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8_subtile) {
10628 TEST_REQUIRES_ARM_NEON;
10629 for (uint32_t n = 9; n < 16; n++) {
10630 for (size_t k = 1; k <= 20; k += 5) {
10631 for (uint32_t m = 1; m <= 8; m++) {
10632 GemmMicrokernelTester()
10633 .mr(8)
10634 .nr(8)
10635 .kr(1)
10636 .sr(4)
10637 .m(m)
10638 .n(n)
10639 .k(k)
10640 .iterations(1)
10641 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10642 }
10643 }
10644 }
10645 }
10646
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_div_8)10647 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8) {
10648 TEST_REQUIRES_ARM_NEON;
10649 for (uint32_t n = 16; n <= 24; n += 8) {
10650 for (size_t k = 1; k <= 20; k += 5) {
10651 GemmMicrokernelTester()
10652 .mr(8)
10653 .nr(8)
10654 .kr(1)
10655 .sr(4)
10656 .m(8)
10657 .n(n)
10658 .k(k)
10659 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10660 }
10661 }
10662 }
10663
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_div_8_strided_cn)10664 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8_strided_cn) {
10665 TEST_REQUIRES_ARM_NEON;
10666 for (uint32_t n = 16; n <= 24; n += 8) {
10667 for (size_t k = 1; k <= 20; k += 5) {
10668 GemmMicrokernelTester()
10669 .mr(8)
10670 .nr(8)
10671 .kr(1)
10672 .sr(4)
10673 .m(8)
10674 .n(n)
10675 .k(k)
10676 .cn_stride(11)
10677 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10678 }
10679 }
10680 }
10681
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_div_8_strided_a)10682 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8_strided_a) {
10683 TEST_REQUIRES_ARM_NEON;
10684 for (uint32_t n = 16; n <= 24; n += 8) {
10685 for (size_t k = 1; k <= 20; k += 5) {
10686 GemmMicrokernelTester()
10687 .mr(8)
10688 .nr(8)
10689 .kr(1)
10690 .sr(4)
10691 .m(8)
10692 .n(n)
10693 .k(k)
10694 .a_stride(23)
10695 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10696 }
10697 }
10698 }
10699
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,n_div_8_subtile)10700 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8_subtile) {
10701 TEST_REQUIRES_ARM_NEON;
10702 for (uint32_t n = 16; n <= 24; n += 8) {
10703 for (size_t k = 1; k <= 20; k += 5) {
10704 for (uint32_t m = 1; m <= 8; m++) {
10705 GemmMicrokernelTester()
10706 .mr(8)
10707 .nr(8)
10708 .kr(1)
10709 .sr(4)
10710 .m(m)
10711 .n(n)
10712 .k(k)
10713 .iterations(1)
10714 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10715 }
10716 }
10717 }
10718 }
10719
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,strided_cm_subtile)10720 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, strided_cm_subtile) {
10721 TEST_REQUIRES_ARM_NEON;
10722 for (size_t k = 1; k <= 20; k += 5) {
10723 for (uint32_t n = 1; n <= 8; n++) {
10724 for (uint32_t m = 1; m <= 8; m++) {
10725 GemmMicrokernelTester()
10726 .mr(8)
10727 .nr(8)
10728 .kr(1)
10729 .sr(4)
10730 .m(m)
10731 .n(n)
10732 .k(k)
10733 .cm_stride(11)
10734 .iterations(1)
10735 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10736 }
10737 }
10738 }
10739 }
10740
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,qmin)10741 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, qmin) {
10742 TEST_REQUIRES_ARM_NEON;
10743 GemmMicrokernelTester()
10744 .mr(8)
10745 .nr(8)
10746 .kr(1)
10747 .sr(4)
10748 .m(8)
10749 .n(8)
10750 .k(4)
10751 .qmin(128)
10752 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10753 }
10754
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,qmax)10755 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, qmax) {
10756 TEST_REQUIRES_ARM_NEON;
10757 GemmMicrokernelTester()
10758 .mr(8)
10759 .nr(8)
10760 .kr(1)
10761 .sr(4)
10762 .m(8)
10763 .n(8)
10764 .k(4)
10765 .qmax(128)
10766 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10767 }
10768
TEST(F32_GEMMINC_MINMAX_8X8S4__NEON,strided_cm)10769 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, strided_cm) {
10770 TEST_REQUIRES_ARM_NEON;
10771 GemmMicrokernelTester()
10772 .mr(8)
10773 .nr(8)
10774 .kr(1)
10775 .sr(4)
10776 .m(8)
10777 .n(8)
10778 .k(4)
10779 .cm_stride(11)
10780 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
10781 }
10782 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10783
10784
10785 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_eq_4)10786 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4) {
10787 TEST_REQUIRES_ARM_NEON_FMA;
10788 GemmMicrokernelTester()
10789 .mr(8)
10790 .nr(8)
10791 .kr(1)
10792 .sr(4)
10793 .m(8)
10794 .n(8)
10795 .k(4)
10796 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10797 }
10798
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,strided_cn)10799 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, strided_cn) {
10800 TEST_REQUIRES_ARM_NEON_FMA;
10801 GemmMicrokernelTester()
10802 .mr(8)
10803 .nr(8)
10804 .kr(1)
10805 .sr(4)
10806 .m(8)
10807 .n(8)
10808 .k(4)
10809 .cn_stride(11)
10810 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10811 }
10812
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_eq_4_strided_a)10813 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_strided_a) {
10814 TEST_REQUIRES_ARM_NEON_FMA;
10815 GemmMicrokernelTester()
10816 .mr(8)
10817 .nr(8)
10818 .kr(1)
10819 .sr(4)
10820 .m(8)
10821 .n(8)
10822 .k(4)
10823 .a_stride(7)
10824 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10825 }
10826
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_eq_4_subtile)10827 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile) {
10828 TEST_REQUIRES_ARM_NEON_FMA;
10829 for (uint32_t n = 1; n <= 8; n++) {
10830 for (uint32_t m = 1; m <= 8; m++) {
10831 GemmMicrokernelTester()
10832 .mr(8)
10833 .nr(8)
10834 .kr(1)
10835 .sr(4)
10836 .m(m)
10837 .n(n)
10838 .k(4)
10839 .iterations(1)
10840 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10841 }
10842 }
10843 }
10844
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_eq_4_subtile_m)10845 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_m) {
10846 TEST_REQUIRES_ARM_NEON_FMA;
10847 for (uint32_t m = 1; m <= 8; m++) {
10848 GemmMicrokernelTester()
10849 .mr(8)
10850 .nr(8)
10851 .kr(1)
10852 .sr(4)
10853 .m(m)
10854 .n(8)
10855 .k(4)
10856 .iterations(1)
10857 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10858 }
10859 }
10860
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_eq_4_subtile_n)10861 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_n) {
10862 TEST_REQUIRES_ARM_NEON_FMA;
10863 for (uint32_t n = 1; n <= 8; n++) {
10864 GemmMicrokernelTester()
10865 .mr(8)
10866 .nr(8)
10867 .kr(1)
10868 .sr(4)
10869 .m(8)
10870 .n(n)
10871 .k(4)
10872 .iterations(1)
10873 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10874 }
10875 }
10876
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_lt_4)10877 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_lt_4) {
10878 TEST_REQUIRES_ARM_NEON_FMA;
10879 for (size_t k = 1; k < 4; k++) {
10880 GemmMicrokernelTester()
10881 .mr(8)
10882 .nr(8)
10883 .kr(1)
10884 .sr(4)
10885 .m(8)
10886 .n(8)
10887 .k(k)
10888 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10889 }
10890 }
10891
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_lt_4_strided_a)10892 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_lt_4_strided_a) {
10893 TEST_REQUIRES_ARM_NEON_FMA;
10894 for (size_t k = 1; k < 4; k++) {
10895 GemmMicrokernelTester()
10896 .mr(8)
10897 .nr(8)
10898 .kr(1)
10899 .sr(4)
10900 .m(8)
10901 .n(8)
10902 .k(k)
10903 .a_stride(7)
10904 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10905 }
10906 }
10907
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_lt_4_subtile)10908 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_lt_4_subtile) {
10909 TEST_REQUIRES_ARM_NEON_FMA;
10910 for (size_t k = 1; k < 4; k++) {
10911 for (uint32_t n = 1; n <= 8; n++) {
10912 for (uint32_t m = 1; m <= 8; m++) {
10913 GemmMicrokernelTester()
10914 .mr(8)
10915 .nr(8)
10916 .kr(1)
10917 .sr(4)
10918 .m(m)
10919 .n(n)
10920 .k(k)
10921 .iterations(1)
10922 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10923 }
10924 }
10925 }
10926 }
10927
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_gt_4)10928 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_gt_4) {
10929 TEST_REQUIRES_ARM_NEON_FMA;
10930 for (size_t k = 5; k < 8; k++) {
10931 GemmMicrokernelTester()
10932 .mr(8)
10933 .nr(8)
10934 .kr(1)
10935 .sr(4)
10936 .m(8)
10937 .n(8)
10938 .k(k)
10939 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10940 }
10941 }
10942
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_gt_4_strided_a)10943 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_gt_4_strided_a) {
10944 TEST_REQUIRES_ARM_NEON_FMA;
10945 for (size_t k = 5; k < 8; k++) {
10946 GemmMicrokernelTester()
10947 .mr(8)
10948 .nr(8)
10949 .kr(1)
10950 .sr(4)
10951 .m(8)
10952 .n(8)
10953 .k(k)
10954 .a_stride(11)
10955 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10956 }
10957 }
10958
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_gt_4_subtile)10959 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_gt_4_subtile) {
10960 TEST_REQUIRES_ARM_NEON_FMA;
10961 for (size_t k = 5; k < 8; k++) {
10962 for (uint32_t n = 1; n <= 8; n++) {
10963 for (uint32_t m = 1; m <= 8; m++) {
10964 GemmMicrokernelTester()
10965 .mr(8)
10966 .nr(8)
10967 .kr(1)
10968 .sr(4)
10969 .m(m)
10970 .n(n)
10971 .k(k)
10972 .iterations(1)
10973 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10974 }
10975 }
10976 }
10977 }
10978
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_div_4)10979 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_div_4) {
10980 TEST_REQUIRES_ARM_NEON_FMA;
10981 for (size_t k = 8; k <= 40; k += 4) {
10982 GemmMicrokernelTester()
10983 .mr(8)
10984 .nr(8)
10985 .kr(1)
10986 .sr(4)
10987 .m(8)
10988 .n(8)
10989 .k(k)
10990 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10991 }
10992 }
10993
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_div_4_strided_a)10994 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_div_4_strided_a) {
10995 TEST_REQUIRES_ARM_NEON_FMA;
10996 for (size_t k = 8; k <= 40; k += 4) {
10997 GemmMicrokernelTester()
10998 .mr(8)
10999 .nr(8)
11000 .kr(1)
11001 .sr(4)
11002 .m(8)
11003 .n(8)
11004 .k(k)
11005 .a_stride(43)
11006 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11007 }
11008 }
11009
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,k_div_4_subtile)11010 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_div_4_subtile) {
11011 TEST_REQUIRES_ARM_NEON_FMA;
11012 for (size_t k = 8; k <= 40; k += 4) {
11013 for (uint32_t n = 1; n <= 8; n++) {
11014 for (uint32_t m = 1; m <= 8; m++) {
11015 GemmMicrokernelTester()
11016 .mr(8)
11017 .nr(8)
11018 .kr(1)
11019 .sr(4)
11020 .m(m)
11021 .n(n)
11022 .k(k)
11023 .iterations(1)
11024 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11025 }
11026 }
11027 }
11028 }
11029
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_gt_8)11030 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8) {
11031 TEST_REQUIRES_ARM_NEON_FMA;
11032 for (uint32_t n = 9; n < 16; n++) {
11033 for (size_t k = 1; k <= 20; k += 5) {
11034 GemmMicrokernelTester()
11035 .mr(8)
11036 .nr(8)
11037 .kr(1)
11038 .sr(4)
11039 .m(8)
11040 .n(n)
11041 .k(k)
11042 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11043 }
11044 }
11045 }
11046
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_gt_8_strided_cn)11047 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_cn) {
11048 TEST_REQUIRES_ARM_NEON_FMA;
11049 for (uint32_t n = 9; n < 16; n++) {
11050 for (size_t k = 1; k <= 20; k += 5) {
11051 GemmMicrokernelTester()
11052 .mr(8)
11053 .nr(8)
11054 .kr(1)
11055 .sr(4)
11056 .m(8)
11057 .n(n)
11058 .k(k)
11059 .cn_stride(11)
11060 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11061 }
11062 }
11063 }
11064
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_gt_8_strided_a)11065 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_a) {
11066 TEST_REQUIRES_ARM_NEON_FMA;
11067 for (uint32_t n = 9; n < 16; n++) {
11068 for (size_t k = 1; k <= 20; k += 5) {
11069 GemmMicrokernelTester()
11070 .mr(8)
11071 .nr(8)
11072 .kr(1)
11073 .sr(4)
11074 .m(8)
11075 .n(n)
11076 .k(k)
11077 .a_stride(23)
11078 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11079 }
11080 }
11081 }
11082
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_gt_8_subtile)11083 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8_subtile) {
11084 TEST_REQUIRES_ARM_NEON_FMA;
11085 for (uint32_t n = 9; n < 16; n++) {
11086 for (size_t k = 1; k <= 20; k += 5) {
11087 for (uint32_t m = 1; m <= 8; m++) {
11088 GemmMicrokernelTester()
11089 .mr(8)
11090 .nr(8)
11091 .kr(1)
11092 .sr(4)
11093 .m(m)
11094 .n(n)
11095 .k(k)
11096 .iterations(1)
11097 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11098 }
11099 }
11100 }
11101 }
11102
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_div_8)11103 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8) {
11104 TEST_REQUIRES_ARM_NEON_FMA;
11105 for (uint32_t n = 16; n <= 24; n += 8) {
11106 for (size_t k = 1; k <= 20; k += 5) {
11107 GemmMicrokernelTester()
11108 .mr(8)
11109 .nr(8)
11110 .kr(1)
11111 .sr(4)
11112 .m(8)
11113 .n(n)
11114 .k(k)
11115 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11116 }
11117 }
11118 }
11119
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_div_8_strided_cn)11120 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8_strided_cn) {
11121 TEST_REQUIRES_ARM_NEON_FMA;
11122 for (uint32_t n = 16; n <= 24; n += 8) {
11123 for (size_t k = 1; k <= 20; k += 5) {
11124 GemmMicrokernelTester()
11125 .mr(8)
11126 .nr(8)
11127 .kr(1)
11128 .sr(4)
11129 .m(8)
11130 .n(n)
11131 .k(k)
11132 .cn_stride(11)
11133 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11134 }
11135 }
11136 }
11137
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_div_8_strided_a)11138 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8_strided_a) {
11139 TEST_REQUIRES_ARM_NEON_FMA;
11140 for (uint32_t n = 16; n <= 24; n += 8) {
11141 for (size_t k = 1; k <= 20; k += 5) {
11142 GemmMicrokernelTester()
11143 .mr(8)
11144 .nr(8)
11145 .kr(1)
11146 .sr(4)
11147 .m(8)
11148 .n(n)
11149 .k(k)
11150 .a_stride(23)
11151 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11152 }
11153 }
11154 }
11155
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,n_div_8_subtile)11156 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8_subtile) {
11157 TEST_REQUIRES_ARM_NEON_FMA;
11158 for (uint32_t n = 16; n <= 24; n += 8) {
11159 for (size_t k = 1; k <= 20; k += 5) {
11160 for (uint32_t m = 1; m <= 8; m++) {
11161 GemmMicrokernelTester()
11162 .mr(8)
11163 .nr(8)
11164 .kr(1)
11165 .sr(4)
11166 .m(m)
11167 .n(n)
11168 .k(k)
11169 .iterations(1)
11170 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11171 }
11172 }
11173 }
11174 }
11175
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,strided_cm_subtile)11176 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, strided_cm_subtile) {
11177 TEST_REQUIRES_ARM_NEON_FMA;
11178 for (size_t k = 1; k <= 20; k += 5) {
11179 for (uint32_t n = 1; n <= 8; n++) {
11180 for (uint32_t m = 1; m <= 8; m++) {
11181 GemmMicrokernelTester()
11182 .mr(8)
11183 .nr(8)
11184 .kr(1)
11185 .sr(4)
11186 .m(m)
11187 .n(n)
11188 .k(k)
11189 .cm_stride(11)
11190 .iterations(1)
11191 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11192 }
11193 }
11194 }
11195 }
11196
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,qmin)11197 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, qmin) {
11198 TEST_REQUIRES_ARM_NEON_FMA;
11199 GemmMicrokernelTester()
11200 .mr(8)
11201 .nr(8)
11202 .kr(1)
11203 .sr(4)
11204 .m(8)
11205 .n(8)
11206 .k(4)
11207 .qmin(128)
11208 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11209 }
11210
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,qmax)11211 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, qmax) {
11212 TEST_REQUIRES_ARM_NEON_FMA;
11213 GemmMicrokernelTester()
11214 .mr(8)
11215 .nr(8)
11216 .kr(1)
11217 .sr(4)
11218 .m(8)
11219 .n(8)
11220 .k(4)
11221 .qmax(128)
11222 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11223 }
11224
TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA,strided_cm)11225 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, strided_cm) {
11226 TEST_REQUIRES_ARM_NEON_FMA;
11227 GemmMicrokernelTester()
11228 .mr(8)
11229 .nr(8)
11230 .kr(1)
11231 .sr(4)
11232 .m(8)
11233 .n(8)
11234 .k(4)
11235 .cm_stride(11)
11236 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11237 }
11238 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11239
11240
11241 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_eq_4)11242 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4) {
11243 TEST_REQUIRES_X86_SSE2;
11244 GemmMicrokernelTester()
11245 .mr(1)
11246 .nr(8)
11247 .kr(1)
11248 .sr(1)
11249 .m(1)
11250 .n(8)
11251 .k(4)
11252 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11253 }
11254
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,strided_cn)11255 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, strided_cn) {
11256 TEST_REQUIRES_X86_SSE2;
11257 GemmMicrokernelTester()
11258 .mr(1)
11259 .nr(8)
11260 .kr(1)
11261 .sr(1)
11262 .m(1)
11263 .n(8)
11264 .k(4)
11265 .cn_stride(11)
11266 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11267 }
11268
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_eq_4_strided_a)11269 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_strided_a) {
11270 TEST_REQUIRES_X86_SSE2;
11271 GemmMicrokernelTester()
11272 .mr(1)
11273 .nr(8)
11274 .kr(1)
11275 .sr(1)
11276 .m(1)
11277 .n(8)
11278 .k(4)
11279 .a_stride(7)
11280 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11281 }
11282
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_eq_4_subtile)11283 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile) {
11284 TEST_REQUIRES_X86_SSE2;
11285 for (uint32_t n = 1; n <= 8; n++) {
11286 for (uint32_t m = 1; m <= 1; m++) {
11287 GemmMicrokernelTester()
11288 .mr(1)
11289 .nr(8)
11290 .kr(1)
11291 .sr(1)
11292 .m(m)
11293 .n(n)
11294 .k(4)
11295 .iterations(1)
11296 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11297 }
11298 }
11299 }
11300
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_eq_4_subtile_m)11301 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_m) {
11302 TEST_REQUIRES_X86_SSE2;
11303 for (uint32_t m = 1; m <= 1; m++) {
11304 GemmMicrokernelTester()
11305 .mr(1)
11306 .nr(8)
11307 .kr(1)
11308 .sr(1)
11309 .m(m)
11310 .n(8)
11311 .k(4)
11312 .iterations(1)
11313 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11314 }
11315 }
11316
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_eq_4_subtile_n)11317 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_n) {
11318 TEST_REQUIRES_X86_SSE2;
11319 for (uint32_t n = 1; n <= 8; n++) {
11320 GemmMicrokernelTester()
11321 .mr(1)
11322 .nr(8)
11323 .kr(1)
11324 .sr(1)
11325 .m(1)
11326 .n(n)
11327 .k(4)
11328 .iterations(1)
11329 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11330 }
11331 }
11332
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_lt_4)11333 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_lt_4) {
11334 TEST_REQUIRES_X86_SSE2;
11335 for (size_t k = 1; k < 4; k++) {
11336 GemmMicrokernelTester()
11337 .mr(1)
11338 .nr(8)
11339 .kr(1)
11340 .sr(1)
11341 .m(1)
11342 .n(8)
11343 .k(k)
11344 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11345 }
11346 }
11347
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_lt_4_strided_a)11348 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_lt_4_strided_a) {
11349 TEST_REQUIRES_X86_SSE2;
11350 for (size_t k = 1; k < 4; k++) {
11351 GemmMicrokernelTester()
11352 .mr(1)
11353 .nr(8)
11354 .kr(1)
11355 .sr(1)
11356 .m(1)
11357 .n(8)
11358 .k(k)
11359 .a_stride(7)
11360 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11361 }
11362 }
11363
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_lt_4_subtile)11364 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_lt_4_subtile) {
11365 TEST_REQUIRES_X86_SSE2;
11366 for (size_t k = 1; k < 4; k++) {
11367 for (uint32_t n = 1; n <= 8; n++) {
11368 for (uint32_t m = 1; m <= 1; m++) {
11369 GemmMicrokernelTester()
11370 .mr(1)
11371 .nr(8)
11372 .kr(1)
11373 .sr(1)
11374 .m(m)
11375 .n(n)
11376 .k(k)
11377 .iterations(1)
11378 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11379 }
11380 }
11381 }
11382 }
11383
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_gt_4)11384 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_gt_4) {
11385 TEST_REQUIRES_X86_SSE2;
11386 for (size_t k = 5; k < 8; k++) {
11387 GemmMicrokernelTester()
11388 .mr(1)
11389 .nr(8)
11390 .kr(1)
11391 .sr(1)
11392 .m(1)
11393 .n(8)
11394 .k(k)
11395 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11396 }
11397 }
11398
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_gt_4_strided_a)11399 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_gt_4_strided_a) {
11400 TEST_REQUIRES_X86_SSE2;
11401 for (size_t k = 5; k < 8; k++) {
11402 GemmMicrokernelTester()
11403 .mr(1)
11404 .nr(8)
11405 .kr(1)
11406 .sr(1)
11407 .m(1)
11408 .n(8)
11409 .k(k)
11410 .a_stride(11)
11411 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11412 }
11413 }
11414
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_gt_4_subtile)11415 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_gt_4_subtile) {
11416 TEST_REQUIRES_X86_SSE2;
11417 for (size_t k = 5; k < 8; k++) {
11418 for (uint32_t n = 1; n <= 8; n++) {
11419 for (uint32_t m = 1; m <= 1; m++) {
11420 GemmMicrokernelTester()
11421 .mr(1)
11422 .nr(8)
11423 .kr(1)
11424 .sr(1)
11425 .m(m)
11426 .n(n)
11427 .k(k)
11428 .iterations(1)
11429 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11430 }
11431 }
11432 }
11433 }
11434
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_div_4)11435 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_div_4) {
11436 TEST_REQUIRES_X86_SSE2;
11437 for (size_t k = 8; k <= 40; k += 4) {
11438 GemmMicrokernelTester()
11439 .mr(1)
11440 .nr(8)
11441 .kr(1)
11442 .sr(1)
11443 .m(1)
11444 .n(8)
11445 .k(k)
11446 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11447 }
11448 }
11449
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_div_4_strided_a)11450 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_div_4_strided_a) {
11451 TEST_REQUIRES_X86_SSE2;
11452 for (size_t k = 8; k <= 40; k += 4) {
11453 GemmMicrokernelTester()
11454 .mr(1)
11455 .nr(8)
11456 .kr(1)
11457 .sr(1)
11458 .m(1)
11459 .n(8)
11460 .k(k)
11461 .a_stride(43)
11462 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11463 }
11464 }
11465
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,k_div_4_subtile)11466 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_div_4_subtile) {
11467 TEST_REQUIRES_X86_SSE2;
11468 for (size_t k = 8; k <= 40; k += 4) {
11469 for (uint32_t n = 1; n <= 8; n++) {
11470 for (uint32_t m = 1; m <= 1; m++) {
11471 GemmMicrokernelTester()
11472 .mr(1)
11473 .nr(8)
11474 .kr(1)
11475 .sr(1)
11476 .m(m)
11477 .n(n)
11478 .k(k)
11479 .iterations(1)
11480 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11481 }
11482 }
11483 }
11484 }
11485
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_gt_8)11486 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8) {
11487 TEST_REQUIRES_X86_SSE2;
11488 for (uint32_t n = 9; n < 16; n++) {
11489 for (size_t k = 1; k <= 20; k += 5) {
11490 GemmMicrokernelTester()
11491 .mr(1)
11492 .nr(8)
11493 .kr(1)
11494 .sr(1)
11495 .m(1)
11496 .n(n)
11497 .k(k)
11498 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11499 }
11500 }
11501 }
11502
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_gt_8_strided_cn)11503 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_cn) {
11504 TEST_REQUIRES_X86_SSE2;
11505 for (uint32_t n = 9; n < 16; n++) {
11506 for (size_t k = 1; k <= 20; k += 5) {
11507 GemmMicrokernelTester()
11508 .mr(1)
11509 .nr(8)
11510 .kr(1)
11511 .sr(1)
11512 .m(1)
11513 .n(n)
11514 .k(k)
11515 .cn_stride(11)
11516 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11517 }
11518 }
11519 }
11520
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_gt_8_strided_a)11521 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_a) {
11522 TEST_REQUIRES_X86_SSE2;
11523 for (uint32_t n = 9; n < 16; n++) {
11524 for (size_t k = 1; k <= 20; k += 5) {
11525 GemmMicrokernelTester()
11526 .mr(1)
11527 .nr(8)
11528 .kr(1)
11529 .sr(1)
11530 .m(1)
11531 .n(n)
11532 .k(k)
11533 .a_stride(23)
11534 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11535 }
11536 }
11537 }
11538
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_gt_8_subtile)11539 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8_subtile) {
11540 TEST_REQUIRES_X86_SSE2;
11541 for (uint32_t n = 9; n < 16; n++) {
11542 for (size_t k = 1; k <= 20; k += 5) {
11543 for (uint32_t m = 1; m <= 1; m++) {
11544 GemmMicrokernelTester()
11545 .mr(1)
11546 .nr(8)
11547 .kr(1)
11548 .sr(1)
11549 .m(m)
11550 .n(n)
11551 .k(k)
11552 .iterations(1)
11553 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11554 }
11555 }
11556 }
11557 }
11558
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_div_8)11559 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8) {
11560 TEST_REQUIRES_X86_SSE2;
11561 for (uint32_t n = 16; n <= 24; n += 8) {
11562 for (size_t k = 1; k <= 20; k += 5) {
11563 GemmMicrokernelTester()
11564 .mr(1)
11565 .nr(8)
11566 .kr(1)
11567 .sr(1)
11568 .m(1)
11569 .n(n)
11570 .k(k)
11571 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11572 }
11573 }
11574 }
11575
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_div_8_strided_cn)11576 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8_strided_cn) {
11577 TEST_REQUIRES_X86_SSE2;
11578 for (uint32_t n = 16; n <= 24; n += 8) {
11579 for (size_t k = 1; k <= 20; k += 5) {
11580 GemmMicrokernelTester()
11581 .mr(1)
11582 .nr(8)
11583 .kr(1)
11584 .sr(1)
11585 .m(1)
11586 .n(n)
11587 .k(k)
11588 .cn_stride(11)
11589 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11590 }
11591 }
11592 }
11593
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_div_8_strided_a)11594 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8_strided_a) {
11595 TEST_REQUIRES_X86_SSE2;
11596 for (uint32_t n = 16; n <= 24; n += 8) {
11597 for (size_t k = 1; k <= 20; k += 5) {
11598 GemmMicrokernelTester()
11599 .mr(1)
11600 .nr(8)
11601 .kr(1)
11602 .sr(1)
11603 .m(1)
11604 .n(n)
11605 .k(k)
11606 .a_stride(23)
11607 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11608 }
11609 }
11610 }
11611
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,n_div_8_subtile)11612 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8_subtile) {
11613 TEST_REQUIRES_X86_SSE2;
11614 for (uint32_t n = 16; n <= 24; n += 8) {
11615 for (size_t k = 1; k <= 20; k += 5) {
11616 for (uint32_t m = 1; m <= 1; m++) {
11617 GemmMicrokernelTester()
11618 .mr(1)
11619 .nr(8)
11620 .kr(1)
11621 .sr(1)
11622 .m(m)
11623 .n(n)
11624 .k(k)
11625 .iterations(1)
11626 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11627 }
11628 }
11629 }
11630 }
11631
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,strided_cm_subtile)11632 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, strided_cm_subtile) {
11633 TEST_REQUIRES_X86_SSE2;
11634 for (size_t k = 1; k <= 20; k += 5) {
11635 for (uint32_t n = 1; n <= 8; n++) {
11636 for (uint32_t m = 1; m <= 1; m++) {
11637 GemmMicrokernelTester()
11638 .mr(1)
11639 .nr(8)
11640 .kr(1)
11641 .sr(1)
11642 .m(m)
11643 .n(n)
11644 .k(k)
11645 .cm_stride(11)
11646 .iterations(1)
11647 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11648 }
11649 }
11650 }
11651 }
11652
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,qmin)11653 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, qmin) {
11654 TEST_REQUIRES_X86_SSE2;
11655 GemmMicrokernelTester()
11656 .mr(1)
11657 .nr(8)
11658 .kr(1)
11659 .sr(1)
11660 .m(1)
11661 .n(8)
11662 .k(4)
11663 .qmin(128)
11664 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11665 }
11666
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,qmax)11667 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, qmax) {
11668 TEST_REQUIRES_X86_SSE2;
11669 GemmMicrokernelTester()
11670 .mr(1)
11671 .nr(8)
11672 .kr(1)
11673 .sr(1)
11674 .m(1)
11675 .n(8)
11676 .k(4)
11677 .qmax(128)
11678 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11679 }
11680
TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP,strided_cm)11681 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, strided_cm) {
11682 TEST_REQUIRES_X86_SSE2;
11683 GemmMicrokernelTester()
11684 .mr(1)
11685 .nr(8)
11686 .kr(1)
11687 .sr(1)
11688 .m(1)
11689 .n(8)
11690 .k(4)
11691 .cm_stride(11)
11692 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
11693 }
11694 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11695
11696
11697 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_eq_4)11698 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4) {
11699 TEST_REQUIRES_X86_SSE;
11700 GemmMicrokernelTester()
11701 .mr(1)
11702 .nr(8)
11703 .kr(1)
11704 .sr(4)
11705 .m(1)
11706 .n(8)
11707 .k(4)
11708 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11709 }
11710
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,strided_cn)11711 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, strided_cn) {
11712 TEST_REQUIRES_X86_SSE;
11713 GemmMicrokernelTester()
11714 .mr(1)
11715 .nr(8)
11716 .kr(1)
11717 .sr(4)
11718 .m(1)
11719 .n(8)
11720 .k(4)
11721 .cn_stride(11)
11722 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11723 }
11724
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_eq_4_strided_a)11725 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_strided_a) {
11726 TEST_REQUIRES_X86_SSE;
11727 GemmMicrokernelTester()
11728 .mr(1)
11729 .nr(8)
11730 .kr(1)
11731 .sr(4)
11732 .m(1)
11733 .n(8)
11734 .k(4)
11735 .a_stride(7)
11736 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11737 }
11738
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_eq_4_subtile)11739 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_subtile) {
11740 TEST_REQUIRES_X86_SSE;
11741 for (uint32_t n = 1; n <= 8; n++) {
11742 for (uint32_t m = 1; m <= 1; m++) {
11743 GemmMicrokernelTester()
11744 .mr(1)
11745 .nr(8)
11746 .kr(1)
11747 .sr(4)
11748 .m(m)
11749 .n(n)
11750 .k(4)
11751 .iterations(1)
11752 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11753 }
11754 }
11755 }
11756
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_eq_4_subtile_m)11757 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_subtile_m) {
11758 TEST_REQUIRES_X86_SSE;
11759 for (uint32_t m = 1; m <= 1; m++) {
11760 GemmMicrokernelTester()
11761 .mr(1)
11762 .nr(8)
11763 .kr(1)
11764 .sr(4)
11765 .m(m)
11766 .n(8)
11767 .k(4)
11768 .iterations(1)
11769 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11770 }
11771 }
11772
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_eq_4_subtile_n)11773 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_subtile_n) {
11774 TEST_REQUIRES_X86_SSE;
11775 for (uint32_t n = 1; n <= 8; n++) {
11776 GemmMicrokernelTester()
11777 .mr(1)
11778 .nr(8)
11779 .kr(1)
11780 .sr(4)
11781 .m(1)
11782 .n(n)
11783 .k(4)
11784 .iterations(1)
11785 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11786 }
11787 }
11788
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_lt_4)11789 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_lt_4) {
11790 TEST_REQUIRES_X86_SSE;
11791 for (size_t k = 1; k < 4; k++) {
11792 GemmMicrokernelTester()
11793 .mr(1)
11794 .nr(8)
11795 .kr(1)
11796 .sr(4)
11797 .m(1)
11798 .n(8)
11799 .k(k)
11800 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11801 }
11802 }
11803
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_lt_4_strided_a)11804 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_lt_4_strided_a) {
11805 TEST_REQUIRES_X86_SSE;
11806 for (size_t k = 1; k < 4; k++) {
11807 GemmMicrokernelTester()
11808 .mr(1)
11809 .nr(8)
11810 .kr(1)
11811 .sr(4)
11812 .m(1)
11813 .n(8)
11814 .k(k)
11815 .a_stride(7)
11816 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11817 }
11818 }
11819
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_lt_4_subtile)11820 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_lt_4_subtile) {
11821 TEST_REQUIRES_X86_SSE;
11822 for (size_t k = 1; k < 4; k++) {
11823 for (uint32_t n = 1; n <= 8; n++) {
11824 for (uint32_t m = 1; m <= 1; m++) {
11825 GemmMicrokernelTester()
11826 .mr(1)
11827 .nr(8)
11828 .kr(1)
11829 .sr(4)
11830 .m(m)
11831 .n(n)
11832 .k(k)
11833 .iterations(1)
11834 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11835 }
11836 }
11837 }
11838 }
11839
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_gt_4)11840 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_gt_4) {
11841 TEST_REQUIRES_X86_SSE;
11842 for (size_t k = 5; k < 8; k++) {
11843 GemmMicrokernelTester()
11844 .mr(1)
11845 .nr(8)
11846 .kr(1)
11847 .sr(4)
11848 .m(1)
11849 .n(8)
11850 .k(k)
11851 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11852 }
11853 }
11854
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_gt_4_strided_a)11855 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_gt_4_strided_a) {
11856 TEST_REQUIRES_X86_SSE;
11857 for (size_t k = 5; k < 8; k++) {
11858 GemmMicrokernelTester()
11859 .mr(1)
11860 .nr(8)
11861 .kr(1)
11862 .sr(4)
11863 .m(1)
11864 .n(8)
11865 .k(k)
11866 .a_stride(11)
11867 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11868 }
11869 }
11870
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_gt_4_subtile)11871 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_gt_4_subtile) {
11872 TEST_REQUIRES_X86_SSE;
11873 for (size_t k = 5; k < 8; k++) {
11874 for (uint32_t n = 1; n <= 8; n++) {
11875 for (uint32_t m = 1; m <= 1; m++) {
11876 GemmMicrokernelTester()
11877 .mr(1)
11878 .nr(8)
11879 .kr(1)
11880 .sr(4)
11881 .m(m)
11882 .n(n)
11883 .k(k)
11884 .iterations(1)
11885 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11886 }
11887 }
11888 }
11889 }
11890
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_div_4)11891 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_div_4) {
11892 TEST_REQUIRES_X86_SSE;
11893 for (size_t k = 8; k <= 40; k += 4) {
11894 GemmMicrokernelTester()
11895 .mr(1)
11896 .nr(8)
11897 .kr(1)
11898 .sr(4)
11899 .m(1)
11900 .n(8)
11901 .k(k)
11902 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11903 }
11904 }
11905
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_div_4_strided_a)11906 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_div_4_strided_a) {
11907 TEST_REQUIRES_X86_SSE;
11908 for (size_t k = 8; k <= 40; k += 4) {
11909 GemmMicrokernelTester()
11910 .mr(1)
11911 .nr(8)
11912 .kr(1)
11913 .sr(4)
11914 .m(1)
11915 .n(8)
11916 .k(k)
11917 .a_stride(43)
11918 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11919 }
11920 }
11921
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,k_div_4_subtile)11922 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_div_4_subtile) {
11923 TEST_REQUIRES_X86_SSE;
11924 for (size_t k = 8; k <= 40; k += 4) {
11925 for (uint32_t n = 1; n <= 8; n++) {
11926 for (uint32_t m = 1; m <= 1; m++) {
11927 GemmMicrokernelTester()
11928 .mr(1)
11929 .nr(8)
11930 .kr(1)
11931 .sr(4)
11932 .m(m)
11933 .n(n)
11934 .k(k)
11935 .iterations(1)
11936 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11937 }
11938 }
11939 }
11940 }
11941
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_gt_8)11942 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8) {
11943 TEST_REQUIRES_X86_SSE;
11944 for (uint32_t n = 9; n < 16; n++) {
11945 for (size_t k = 1; k <= 20; k += 5) {
11946 GemmMicrokernelTester()
11947 .mr(1)
11948 .nr(8)
11949 .kr(1)
11950 .sr(4)
11951 .m(1)
11952 .n(n)
11953 .k(k)
11954 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11955 }
11956 }
11957 }
11958
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_gt_8_strided_cn)11959 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8_strided_cn) {
11960 TEST_REQUIRES_X86_SSE;
11961 for (uint32_t n = 9; n < 16; n++) {
11962 for (size_t k = 1; k <= 20; k += 5) {
11963 GemmMicrokernelTester()
11964 .mr(1)
11965 .nr(8)
11966 .kr(1)
11967 .sr(4)
11968 .m(1)
11969 .n(n)
11970 .k(k)
11971 .cn_stride(11)
11972 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11973 }
11974 }
11975 }
11976
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_gt_8_strided_a)11977 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8_strided_a) {
11978 TEST_REQUIRES_X86_SSE;
11979 for (uint32_t n = 9; n < 16; n++) {
11980 for (size_t k = 1; k <= 20; k += 5) {
11981 GemmMicrokernelTester()
11982 .mr(1)
11983 .nr(8)
11984 .kr(1)
11985 .sr(4)
11986 .m(1)
11987 .n(n)
11988 .k(k)
11989 .a_stride(23)
11990 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
11991 }
11992 }
11993 }
11994
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_gt_8_subtile)11995 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8_subtile) {
11996 TEST_REQUIRES_X86_SSE;
11997 for (uint32_t n = 9; n < 16; n++) {
11998 for (size_t k = 1; k <= 20; k += 5) {
11999 for (uint32_t m = 1; m <= 1; m++) {
12000 GemmMicrokernelTester()
12001 .mr(1)
12002 .nr(8)
12003 .kr(1)
12004 .sr(4)
12005 .m(m)
12006 .n(n)
12007 .k(k)
12008 .iterations(1)
12009 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12010 }
12011 }
12012 }
12013 }
12014
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_div_8)12015 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8) {
12016 TEST_REQUIRES_X86_SSE;
12017 for (uint32_t n = 16; n <= 24; n += 8) {
12018 for (size_t k = 1; k <= 20; k += 5) {
12019 GemmMicrokernelTester()
12020 .mr(1)
12021 .nr(8)
12022 .kr(1)
12023 .sr(4)
12024 .m(1)
12025 .n(n)
12026 .k(k)
12027 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12028 }
12029 }
12030 }
12031
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_div_8_strided_cn)12032 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8_strided_cn) {
12033 TEST_REQUIRES_X86_SSE;
12034 for (uint32_t n = 16; n <= 24; n += 8) {
12035 for (size_t k = 1; k <= 20; k += 5) {
12036 GemmMicrokernelTester()
12037 .mr(1)
12038 .nr(8)
12039 .kr(1)
12040 .sr(4)
12041 .m(1)
12042 .n(n)
12043 .k(k)
12044 .cn_stride(11)
12045 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12046 }
12047 }
12048 }
12049
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_div_8_strided_a)12050 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8_strided_a) {
12051 TEST_REQUIRES_X86_SSE;
12052 for (uint32_t n = 16; n <= 24; n += 8) {
12053 for (size_t k = 1; k <= 20; k += 5) {
12054 GemmMicrokernelTester()
12055 .mr(1)
12056 .nr(8)
12057 .kr(1)
12058 .sr(4)
12059 .m(1)
12060 .n(n)
12061 .k(k)
12062 .a_stride(23)
12063 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12064 }
12065 }
12066 }
12067
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,n_div_8_subtile)12068 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8_subtile) {
12069 TEST_REQUIRES_X86_SSE;
12070 for (uint32_t n = 16; n <= 24; n += 8) {
12071 for (size_t k = 1; k <= 20; k += 5) {
12072 for (uint32_t m = 1; m <= 1; m++) {
12073 GemmMicrokernelTester()
12074 .mr(1)
12075 .nr(8)
12076 .kr(1)
12077 .sr(4)
12078 .m(m)
12079 .n(n)
12080 .k(k)
12081 .iterations(1)
12082 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12083 }
12084 }
12085 }
12086 }
12087
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,strided_cm_subtile)12088 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, strided_cm_subtile) {
12089 TEST_REQUIRES_X86_SSE;
12090 for (size_t k = 1; k <= 20; k += 5) {
12091 for (uint32_t n = 1; n <= 8; n++) {
12092 for (uint32_t m = 1; m <= 1; m++) {
12093 GemmMicrokernelTester()
12094 .mr(1)
12095 .nr(8)
12096 .kr(1)
12097 .sr(4)
12098 .m(m)
12099 .n(n)
12100 .k(k)
12101 .cm_stride(11)
12102 .iterations(1)
12103 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12104 }
12105 }
12106 }
12107 }
12108
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,qmin)12109 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, qmin) {
12110 TEST_REQUIRES_X86_SSE;
12111 GemmMicrokernelTester()
12112 .mr(1)
12113 .nr(8)
12114 .kr(1)
12115 .sr(4)
12116 .m(1)
12117 .n(8)
12118 .k(4)
12119 .qmin(128)
12120 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12121 }
12122
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,qmax)12123 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, qmax) {
12124 TEST_REQUIRES_X86_SSE;
12125 GemmMicrokernelTester()
12126 .mr(1)
12127 .nr(8)
12128 .kr(1)
12129 .sr(4)
12130 .m(1)
12131 .n(8)
12132 .k(4)
12133 .qmax(128)
12134 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12135 }
12136
TEST(F32_GEMMINC_MINMAX_1X8S4__SSE,strided_cm)12137 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, strided_cm) {
12138 TEST_REQUIRES_X86_SSE;
12139 GemmMicrokernelTester()
12140 .mr(1)
12141 .nr(8)
12142 .kr(1)
12143 .sr(4)
12144 .m(1)
12145 .n(8)
12146 .k(4)
12147 .cm_stride(11)
12148 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
12149 }
12150 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12151
12152
12153 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_eq_4)12154 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4) {
12155 TEST_REQUIRES_X86_SSE;
12156 GemmMicrokernelTester()
12157 .mr(3)
12158 .nr(8)
12159 .kr(1)
12160 .sr(1)
12161 .m(3)
12162 .n(8)
12163 .k(4)
12164 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12165 }
12166
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,strided_cn)12167 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, strided_cn) {
12168 TEST_REQUIRES_X86_SSE;
12169 GemmMicrokernelTester()
12170 .mr(3)
12171 .nr(8)
12172 .kr(1)
12173 .sr(1)
12174 .m(3)
12175 .n(8)
12176 .k(4)
12177 .cn_stride(11)
12178 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12179 }
12180
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_eq_4_strided_a)12181 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_strided_a) {
12182 TEST_REQUIRES_X86_SSE;
12183 GemmMicrokernelTester()
12184 .mr(3)
12185 .nr(8)
12186 .kr(1)
12187 .sr(1)
12188 .m(3)
12189 .n(8)
12190 .k(4)
12191 .a_stride(7)
12192 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12193 }
12194
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_eq_4_subtile)12195 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_subtile) {
12196 TEST_REQUIRES_X86_SSE;
12197 for (uint32_t n = 1; n <= 8; n++) {
12198 for (uint32_t m = 1; m <= 3; m++) {
12199 GemmMicrokernelTester()
12200 .mr(3)
12201 .nr(8)
12202 .kr(1)
12203 .sr(1)
12204 .m(m)
12205 .n(n)
12206 .k(4)
12207 .iterations(1)
12208 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12209 }
12210 }
12211 }
12212
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_eq_4_subtile_m)12213 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_m) {
12214 TEST_REQUIRES_X86_SSE;
12215 for (uint32_t m = 1; m <= 3; m++) {
12216 GemmMicrokernelTester()
12217 .mr(3)
12218 .nr(8)
12219 .kr(1)
12220 .sr(1)
12221 .m(m)
12222 .n(8)
12223 .k(4)
12224 .iterations(1)
12225 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12226 }
12227 }
12228
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_eq_4_subtile_n)12229 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_n) {
12230 TEST_REQUIRES_X86_SSE;
12231 for (uint32_t n = 1; n <= 8; n++) {
12232 GemmMicrokernelTester()
12233 .mr(3)
12234 .nr(8)
12235 .kr(1)
12236 .sr(1)
12237 .m(3)
12238 .n(n)
12239 .k(4)
12240 .iterations(1)
12241 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12242 }
12243 }
12244
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_lt_4)12245 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_lt_4) {
12246 TEST_REQUIRES_X86_SSE;
12247 for (size_t k = 1; k < 4; k++) {
12248 GemmMicrokernelTester()
12249 .mr(3)
12250 .nr(8)
12251 .kr(1)
12252 .sr(1)
12253 .m(3)
12254 .n(8)
12255 .k(k)
12256 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12257 }
12258 }
12259
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_lt_4_strided_a)12260 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_lt_4_strided_a) {
12261 TEST_REQUIRES_X86_SSE;
12262 for (size_t k = 1; k < 4; k++) {
12263 GemmMicrokernelTester()
12264 .mr(3)
12265 .nr(8)
12266 .kr(1)
12267 .sr(1)
12268 .m(3)
12269 .n(8)
12270 .k(k)
12271 .a_stride(7)
12272 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12273 }
12274 }
12275
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_lt_4_subtile)12276 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_lt_4_subtile) {
12277 TEST_REQUIRES_X86_SSE;
12278 for (size_t k = 1; k < 4; k++) {
12279 for (uint32_t n = 1; n <= 8; n++) {
12280 for (uint32_t m = 1; m <= 3; m++) {
12281 GemmMicrokernelTester()
12282 .mr(3)
12283 .nr(8)
12284 .kr(1)
12285 .sr(1)
12286 .m(m)
12287 .n(n)
12288 .k(k)
12289 .iterations(1)
12290 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12291 }
12292 }
12293 }
12294 }
12295
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_gt_4)12296 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_gt_4) {
12297 TEST_REQUIRES_X86_SSE;
12298 for (size_t k = 5; k < 8; k++) {
12299 GemmMicrokernelTester()
12300 .mr(3)
12301 .nr(8)
12302 .kr(1)
12303 .sr(1)
12304 .m(3)
12305 .n(8)
12306 .k(k)
12307 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12308 }
12309 }
12310
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_gt_4_strided_a)12311 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_gt_4_strided_a) {
12312 TEST_REQUIRES_X86_SSE;
12313 for (size_t k = 5; k < 8; k++) {
12314 GemmMicrokernelTester()
12315 .mr(3)
12316 .nr(8)
12317 .kr(1)
12318 .sr(1)
12319 .m(3)
12320 .n(8)
12321 .k(k)
12322 .a_stride(11)
12323 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12324 }
12325 }
12326
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_gt_4_subtile)12327 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_gt_4_subtile) {
12328 TEST_REQUIRES_X86_SSE;
12329 for (size_t k = 5; k < 8; k++) {
12330 for (uint32_t n = 1; n <= 8; n++) {
12331 for (uint32_t m = 1; m <= 3; m++) {
12332 GemmMicrokernelTester()
12333 .mr(3)
12334 .nr(8)
12335 .kr(1)
12336 .sr(1)
12337 .m(m)
12338 .n(n)
12339 .k(k)
12340 .iterations(1)
12341 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12342 }
12343 }
12344 }
12345 }
12346
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_div_4)12347 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_div_4) {
12348 TEST_REQUIRES_X86_SSE;
12349 for (size_t k = 8; k <= 40; k += 4) {
12350 GemmMicrokernelTester()
12351 .mr(3)
12352 .nr(8)
12353 .kr(1)
12354 .sr(1)
12355 .m(3)
12356 .n(8)
12357 .k(k)
12358 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12359 }
12360 }
12361
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_div_4_strided_a)12362 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_div_4_strided_a) {
12363 TEST_REQUIRES_X86_SSE;
12364 for (size_t k = 8; k <= 40; k += 4) {
12365 GemmMicrokernelTester()
12366 .mr(3)
12367 .nr(8)
12368 .kr(1)
12369 .sr(1)
12370 .m(3)
12371 .n(8)
12372 .k(k)
12373 .a_stride(43)
12374 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12375 }
12376 }
12377
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,k_div_4_subtile)12378 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_div_4_subtile) {
12379 TEST_REQUIRES_X86_SSE;
12380 for (size_t k = 8; k <= 40; k += 4) {
12381 for (uint32_t n = 1; n <= 8; n++) {
12382 for (uint32_t m = 1; m <= 3; m++) {
12383 GemmMicrokernelTester()
12384 .mr(3)
12385 .nr(8)
12386 .kr(1)
12387 .sr(1)
12388 .m(m)
12389 .n(n)
12390 .k(k)
12391 .iterations(1)
12392 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12393 }
12394 }
12395 }
12396 }
12397
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_gt_8)12398 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8) {
12399 TEST_REQUIRES_X86_SSE;
12400 for (uint32_t n = 9; n < 16; n++) {
12401 for (size_t k = 1; k <= 20; k += 5) {
12402 GemmMicrokernelTester()
12403 .mr(3)
12404 .nr(8)
12405 .kr(1)
12406 .sr(1)
12407 .m(3)
12408 .n(n)
12409 .k(k)
12410 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12411 }
12412 }
12413 }
12414
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_gt_8_strided_cn)12415 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8_strided_cn) {
12416 TEST_REQUIRES_X86_SSE;
12417 for (uint32_t n = 9; n < 16; n++) {
12418 for (size_t k = 1; k <= 20; k += 5) {
12419 GemmMicrokernelTester()
12420 .mr(3)
12421 .nr(8)
12422 .kr(1)
12423 .sr(1)
12424 .m(3)
12425 .n(n)
12426 .k(k)
12427 .cn_stride(11)
12428 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12429 }
12430 }
12431 }
12432
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_gt_8_strided_a)12433 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8_strided_a) {
12434 TEST_REQUIRES_X86_SSE;
12435 for (uint32_t n = 9; n < 16; n++) {
12436 for (size_t k = 1; k <= 20; k += 5) {
12437 GemmMicrokernelTester()
12438 .mr(3)
12439 .nr(8)
12440 .kr(1)
12441 .sr(1)
12442 .m(3)
12443 .n(n)
12444 .k(k)
12445 .a_stride(23)
12446 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12447 }
12448 }
12449 }
12450
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_gt_8_subtile)12451 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8_subtile) {
12452 TEST_REQUIRES_X86_SSE;
12453 for (uint32_t n = 9; n < 16; n++) {
12454 for (size_t k = 1; k <= 20; k += 5) {
12455 for (uint32_t m = 1; m <= 3; m++) {
12456 GemmMicrokernelTester()
12457 .mr(3)
12458 .nr(8)
12459 .kr(1)
12460 .sr(1)
12461 .m(m)
12462 .n(n)
12463 .k(k)
12464 .iterations(1)
12465 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12466 }
12467 }
12468 }
12469 }
12470
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_div_8)12471 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8) {
12472 TEST_REQUIRES_X86_SSE;
12473 for (uint32_t n = 16; n <= 24; n += 8) {
12474 for (size_t k = 1; k <= 20; k += 5) {
12475 GemmMicrokernelTester()
12476 .mr(3)
12477 .nr(8)
12478 .kr(1)
12479 .sr(1)
12480 .m(3)
12481 .n(n)
12482 .k(k)
12483 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12484 }
12485 }
12486 }
12487
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_div_8_strided_cn)12488 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8_strided_cn) {
12489 TEST_REQUIRES_X86_SSE;
12490 for (uint32_t n = 16; n <= 24; n += 8) {
12491 for (size_t k = 1; k <= 20; k += 5) {
12492 GemmMicrokernelTester()
12493 .mr(3)
12494 .nr(8)
12495 .kr(1)
12496 .sr(1)
12497 .m(3)
12498 .n(n)
12499 .k(k)
12500 .cn_stride(11)
12501 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12502 }
12503 }
12504 }
12505
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_div_8_strided_a)12506 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8_strided_a) {
12507 TEST_REQUIRES_X86_SSE;
12508 for (uint32_t n = 16; n <= 24; n += 8) {
12509 for (size_t k = 1; k <= 20; k += 5) {
12510 GemmMicrokernelTester()
12511 .mr(3)
12512 .nr(8)
12513 .kr(1)
12514 .sr(1)
12515 .m(3)
12516 .n(n)
12517 .k(k)
12518 .a_stride(23)
12519 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12520 }
12521 }
12522 }
12523
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,n_div_8_subtile)12524 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8_subtile) {
12525 TEST_REQUIRES_X86_SSE;
12526 for (uint32_t n = 16; n <= 24; n += 8) {
12527 for (size_t k = 1; k <= 20; k += 5) {
12528 for (uint32_t m = 1; m <= 3; m++) {
12529 GemmMicrokernelTester()
12530 .mr(3)
12531 .nr(8)
12532 .kr(1)
12533 .sr(1)
12534 .m(m)
12535 .n(n)
12536 .k(k)
12537 .iterations(1)
12538 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12539 }
12540 }
12541 }
12542 }
12543
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,strided_cm_subtile)12544 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, strided_cm_subtile) {
12545 TEST_REQUIRES_X86_SSE;
12546 for (size_t k = 1; k <= 20; k += 5) {
12547 for (uint32_t n = 1; n <= 8; n++) {
12548 for (uint32_t m = 1; m <= 3; m++) {
12549 GemmMicrokernelTester()
12550 .mr(3)
12551 .nr(8)
12552 .kr(1)
12553 .sr(1)
12554 .m(m)
12555 .n(n)
12556 .k(k)
12557 .cm_stride(11)
12558 .iterations(1)
12559 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12560 }
12561 }
12562 }
12563 }
12564
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,qmin)12565 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, qmin) {
12566 TEST_REQUIRES_X86_SSE;
12567 GemmMicrokernelTester()
12568 .mr(3)
12569 .nr(8)
12570 .kr(1)
12571 .sr(1)
12572 .m(3)
12573 .n(8)
12574 .k(4)
12575 .qmin(128)
12576 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12577 }
12578
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,qmax)12579 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, qmax) {
12580 TEST_REQUIRES_X86_SSE;
12581 GemmMicrokernelTester()
12582 .mr(3)
12583 .nr(8)
12584 .kr(1)
12585 .sr(1)
12586 .m(3)
12587 .n(8)
12588 .k(4)
12589 .qmax(128)
12590 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12591 }
12592
TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP,strided_cm)12593 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, strided_cm) {
12594 TEST_REQUIRES_X86_SSE;
12595 GemmMicrokernelTester()
12596 .mr(3)
12597 .nr(8)
12598 .kr(1)
12599 .sr(1)
12600 .m(3)
12601 .n(8)
12602 .k(4)
12603 .cm_stride(11)
12604 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12605 }
12606 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12607
12608
12609 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_eq_1)12610 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1) {
12611 TEST_REQUIRES_X86_SSE;
12612 GemmMicrokernelTester()
12613 .mr(3)
12614 .nr(8)
12615 .kr(1)
12616 .sr(1)
12617 .m(3)
12618 .n(8)
12619 .k(1)
12620 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12621 }
12622
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,strided_cn)12623 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, strided_cn) {
12624 TEST_REQUIRES_X86_SSE;
12625 GemmMicrokernelTester()
12626 .mr(3)
12627 .nr(8)
12628 .kr(1)
12629 .sr(1)
12630 .m(3)
12631 .n(8)
12632 .k(1)
12633 .cn_stride(11)
12634 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12635 }
12636
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_eq_1_strided_a)12637 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_strided_a) {
12638 TEST_REQUIRES_X86_SSE;
12639 GemmMicrokernelTester()
12640 .mr(3)
12641 .nr(8)
12642 .kr(1)
12643 .sr(1)
12644 .m(3)
12645 .n(8)
12646 .k(1)
12647 .a_stride(3)
12648 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12649 }
12650
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_eq_1_subtile)12651 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile) {
12652 TEST_REQUIRES_X86_SSE;
12653 for (uint32_t n = 1; n <= 8; n++) {
12654 for (uint32_t m = 1; m <= 3; m++) {
12655 GemmMicrokernelTester()
12656 .mr(3)
12657 .nr(8)
12658 .kr(1)
12659 .sr(1)
12660 .m(m)
12661 .n(n)
12662 .k(1)
12663 .iterations(1)
12664 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12665 }
12666 }
12667 }
12668
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_eq_1_subtile_m)12669 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_m) {
12670 TEST_REQUIRES_X86_SSE;
12671 for (uint32_t m = 1; m <= 3; m++) {
12672 GemmMicrokernelTester()
12673 .mr(3)
12674 .nr(8)
12675 .kr(1)
12676 .sr(1)
12677 .m(m)
12678 .n(8)
12679 .k(1)
12680 .iterations(1)
12681 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12682 }
12683 }
12684
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_eq_1_subtile_n)12685 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_n) {
12686 TEST_REQUIRES_X86_SSE;
12687 for (uint32_t n = 1; n <= 8; n++) {
12688 GemmMicrokernelTester()
12689 .mr(3)
12690 .nr(8)
12691 .kr(1)
12692 .sr(1)
12693 .m(3)
12694 .n(n)
12695 .k(1)
12696 .iterations(1)
12697 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12698 }
12699 }
12700
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_gt_1)12701 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_gt_1) {
12702 TEST_REQUIRES_X86_SSE;
12703 for (size_t k = 2; k < 10; k++) {
12704 GemmMicrokernelTester()
12705 .mr(3)
12706 .nr(8)
12707 .kr(1)
12708 .sr(1)
12709 .m(3)
12710 .n(8)
12711 .k(k)
12712 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12713 }
12714 }
12715
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_gt_1_strided_a)12716 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_gt_1_strided_a) {
12717 TEST_REQUIRES_X86_SSE;
12718 for (size_t k = 2; k < 10; k++) {
12719 GemmMicrokernelTester()
12720 .mr(3)
12721 .nr(8)
12722 .kr(1)
12723 .sr(1)
12724 .m(3)
12725 .n(8)
12726 .k(k)
12727 .a_stride(11)
12728 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12729 }
12730 }
12731
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,k_gt_1_subtile)12732 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_gt_1_subtile) {
12733 TEST_REQUIRES_X86_SSE;
12734 for (size_t k = 2; k < 10; k++) {
12735 for (uint32_t n = 1; n <= 8; n++) {
12736 for (uint32_t m = 1; m <= 3; m++) {
12737 GemmMicrokernelTester()
12738 .mr(3)
12739 .nr(8)
12740 .kr(1)
12741 .sr(1)
12742 .m(m)
12743 .n(n)
12744 .k(k)
12745 .iterations(1)
12746 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12747 }
12748 }
12749 }
12750 }
12751
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_gt_8)12752 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8) {
12753 TEST_REQUIRES_X86_SSE;
12754 for (uint32_t n = 9; n < 16; n++) {
12755 for (size_t k = 1; k <= 5; k += 2) {
12756 GemmMicrokernelTester()
12757 .mr(3)
12758 .nr(8)
12759 .kr(1)
12760 .sr(1)
12761 .m(3)
12762 .n(n)
12763 .k(k)
12764 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12765 }
12766 }
12767 }
12768
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_gt_8_strided_cn)12769 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_cn) {
12770 TEST_REQUIRES_X86_SSE;
12771 for (uint32_t n = 9; n < 16; n++) {
12772 for (size_t k = 1; k <= 5; k += 2) {
12773 GemmMicrokernelTester()
12774 .mr(3)
12775 .nr(8)
12776 .kr(1)
12777 .sr(1)
12778 .m(3)
12779 .n(n)
12780 .k(k)
12781 .cn_stride(11)
12782 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12783 }
12784 }
12785 }
12786
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_gt_8_strided_a)12787 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_a) {
12788 TEST_REQUIRES_X86_SSE;
12789 for (uint32_t n = 9; n < 16; n++) {
12790 for (size_t k = 1; k <= 5; k += 2) {
12791 GemmMicrokernelTester()
12792 .mr(3)
12793 .nr(8)
12794 .kr(1)
12795 .sr(1)
12796 .m(3)
12797 .n(n)
12798 .k(k)
12799 .a_stride(7)
12800 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12801 }
12802 }
12803 }
12804
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_gt_8_subtile)12805 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8_subtile) {
12806 TEST_REQUIRES_X86_SSE;
12807 for (uint32_t n = 9; n < 16; n++) {
12808 for (size_t k = 1; k <= 5; k += 2) {
12809 for (uint32_t m = 1; m <= 3; m++) {
12810 GemmMicrokernelTester()
12811 .mr(3)
12812 .nr(8)
12813 .kr(1)
12814 .sr(1)
12815 .m(m)
12816 .n(n)
12817 .k(k)
12818 .iterations(1)
12819 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12820 }
12821 }
12822 }
12823 }
12824
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_div_8)12825 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8) {
12826 TEST_REQUIRES_X86_SSE;
12827 for (uint32_t n = 16; n <= 24; n += 8) {
12828 for (size_t k = 1; k <= 5; k += 2) {
12829 GemmMicrokernelTester()
12830 .mr(3)
12831 .nr(8)
12832 .kr(1)
12833 .sr(1)
12834 .m(3)
12835 .n(n)
12836 .k(k)
12837 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12838 }
12839 }
12840 }
12841
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_div_8_strided_cn)12842 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_cn) {
12843 TEST_REQUIRES_X86_SSE;
12844 for (uint32_t n = 16; n <= 24; n += 8) {
12845 for (size_t k = 1; k <= 5; k += 2) {
12846 GemmMicrokernelTester()
12847 .mr(3)
12848 .nr(8)
12849 .kr(1)
12850 .sr(1)
12851 .m(3)
12852 .n(n)
12853 .k(k)
12854 .cn_stride(11)
12855 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12856 }
12857 }
12858 }
12859
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_div_8_strided_a)12860 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_a) {
12861 TEST_REQUIRES_X86_SSE;
12862 for (uint32_t n = 16; n <= 24; n += 8) {
12863 for (size_t k = 1; k <= 5; k += 2) {
12864 GemmMicrokernelTester()
12865 .mr(3)
12866 .nr(8)
12867 .kr(1)
12868 .sr(1)
12869 .m(3)
12870 .n(n)
12871 .k(k)
12872 .a_stride(7)
12873 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12874 }
12875 }
12876 }
12877
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,n_div_8_subtile)12878 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8_subtile) {
12879 TEST_REQUIRES_X86_SSE;
12880 for (uint32_t n = 16; n <= 24; n += 8) {
12881 for (size_t k = 1; k <= 5; k += 2) {
12882 for (uint32_t m = 1; m <= 3; m++) {
12883 GemmMicrokernelTester()
12884 .mr(3)
12885 .nr(8)
12886 .kr(1)
12887 .sr(1)
12888 .m(m)
12889 .n(n)
12890 .k(k)
12891 .iterations(1)
12892 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12893 }
12894 }
12895 }
12896 }
12897
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,strided_cm_subtile)12898 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, strided_cm_subtile) {
12899 TEST_REQUIRES_X86_SSE;
12900 for (size_t k = 1; k <= 5; k += 2) {
12901 for (uint32_t n = 1; n <= 8; n++) {
12902 for (uint32_t m = 1; m <= 3; m++) {
12903 GemmMicrokernelTester()
12904 .mr(3)
12905 .nr(8)
12906 .kr(1)
12907 .sr(1)
12908 .m(m)
12909 .n(n)
12910 .k(k)
12911 .cm_stride(11)
12912 .iterations(1)
12913 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12914 }
12915 }
12916 }
12917 }
12918
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,qmin)12919 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, qmin) {
12920 TEST_REQUIRES_X86_SSE;
12921 GemmMicrokernelTester()
12922 .mr(3)
12923 .nr(8)
12924 .kr(1)
12925 .sr(1)
12926 .m(3)
12927 .n(8)
12928 .k(1)
12929 .qmin(128)
12930 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12931 }
12932
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,qmax)12933 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, qmax) {
12934 TEST_REQUIRES_X86_SSE;
12935 GemmMicrokernelTester()
12936 .mr(3)
12937 .nr(8)
12938 .kr(1)
12939 .sr(1)
12940 .m(3)
12941 .n(8)
12942 .k(1)
12943 .qmax(128)
12944 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12945 }
12946
TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1,strided_cm)12947 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, strided_cm) {
12948 TEST_REQUIRES_X86_SSE;
12949 GemmMicrokernelTester()
12950 .mr(3)
12951 .nr(8)
12952 .kr(1)
12953 .sr(1)
12954 .m(3)
12955 .n(8)
12956 .k(1)
12957 .cm_stride(11)
12958 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
12959 }
12960 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12961
12962
12963 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_eq_1)12964 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1) {
12965 TEST_REQUIRES_X86_SSE;
12966 GemmMicrokernelTester()
12967 .mr(4)
12968 .nr(8)
12969 .kr(1)
12970 .sr(1)
12971 .m(4)
12972 .n(8)
12973 .k(1)
12974 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
12975 }
12976
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,strided_cn)12977 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, strided_cn) {
12978 TEST_REQUIRES_X86_SSE;
12979 GemmMicrokernelTester()
12980 .mr(4)
12981 .nr(8)
12982 .kr(1)
12983 .sr(1)
12984 .m(4)
12985 .n(8)
12986 .k(1)
12987 .cn_stride(11)
12988 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
12989 }
12990
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_eq_1_strided_a)12991 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_strided_a) {
12992 TEST_REQUIRES_X86_SSE;
12993 GemmMicrokernelTester()
12994 .mr(4)
12995 .nr(8)
12996 .kr(1)
12997 .sr(1)
12998 .m(4)
12999 .n(8)
13000 .k(1)
13001 .a_stride(3)
13002 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13003 }
13004
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_eq_1_subtile)13005 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile) {
13006 TEST_REQUIRES_X86_SSE;
13007 for (uint32_t n = 1; n <= 8; n++) {
13008 for (uint32_t m = 1; m <= 4; m++) {
13009 GemmMicrokernelTester()
13010 .mr(4)
13011 .nr(8)
13012 .kr(1)
13013 .sr(1)
13014 .m(m)
13015 .n(n)
13016 .k(1)
13017 .iterations(1)
13018 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13019 }
13020 }
13021 }
13022
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_eq_1_subtile_m)13023 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_m) {
13024 TEST_REQUIRES_X86_SSE;
13025 for (uint32_t m = 1; m <= 4; m++) {
13026 GemmMicrokernelTester()
13027 .mr(4)
13028 .nr(8)
13029 .kr(1)
13030 .sr(1)
13031 .m(m)
13032 .n(8)
13033 .k(1)
13034 .iterations(1)
13035 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13036 }
13037 }
13038
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_eq_1_subtile_n)13039 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_n) {
13040 TEST_REQUIRES_X86_SSE;
13041 for (uint32_t n = 1; n <= 8; n++) {
13042 GemmMicrokernelTester()
13043 .mr(4)
13044 .nr(8)
13045 .kr(1)
13046 .sr(1)
13047 .m(4)
13048 .n(n)
13049 .k(1)
13050 .iterations(1)
13051 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13052 }
13053 }
13054
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_gt_1)13055 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_gt_1) {
13056 TEST_REQUIRES_X86_SSE;
13057 for (size_t k = 2; k < 10; k++) {
13058 GemmMicrokernelTester()
13059 .mr(4)
13060 .nr(8)
13061 .kr(1)
13062 .sr(1)
13063 .m(4)
13064 .n(8)
13065 .k(k)
13066 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13067 }
13068 }
13069
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_gt_1_strided_a)13070 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_gt_1_strided_a) {
13071 TEST_REQUIRES_X86_SSE;
13072 for (size_t k = 2; k < 10; k++) {
13073 GemmMicrokernelTester()
13074 .mr(4)
13075 .nr(8)
13076 .kr(1)
13077 .sr(1)
13078 .m(4)
13079 .n(8)
13080 .k(k)
13081 .a_stride(11)
13082 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13083 }
13084 }
13085
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,k_gt_1_subtile)13086 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_gt_1_subtile) {
13087 TEST_REQUIRES_X86_SSE;
13088 for (size_t k = 2; k < 10; k++) {
13089 for (uint32_t n = 1; n <= 8; n++) {
13090 for (uint32_t m = 1; m <= 4; m++) {
13091 GemmMicrokernelTester()
13092 .mr(4)
13093 .nr(8)
13094 .kr(1)
13095 .sr(1)
13096 .m(m)
13097 .n(n)
13098 .k(k)
13099 .iterations(1)
13100 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13101 }
13102 }
13103 }
13104 }
13105
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_gt_8)13106 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8) {
13107 TEST_REQUIRES_X86_SSE;
13108 for (uint32_t n = 9; n < 16; n++) {
13109 for (size_t k = 1; k <= 5; k += 2) {
13110 GemmMicrokernelTester()
13111 .mr(4)
13112 .nr(8)
13113 .kr(1)
13114 .sr(1)
13115 .m(4)
13116 .n(n)
13117 .k(k)
13118 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13119 }
13120 }
13121 }
13122
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_gt_8_strided_cn)13123 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_cn) {
13124 TEST_REQUIRES_X86_SSE;
13125 for (uint32_t n = 9; n < 16; n++) {
13126 for (size_t k = 1; k <= 5; k += 2) {
13127 GemmMicrokernelTester()
13128 .mr(4)
13129 .nr(8)
13130 .kr(1)
13131 .sr(1)
13132 .m(4)
13133 .n(n)
13134 .k(k)
13135 .cn_stride(11)
13136 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13137 }
13138 }
13139 }
13140
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_gt_8_strided_a)13141 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_a) {
13142 TEST_REQUIRES_X86_SSE;
13143 for (uint32_t n = 9; n < 16; n++) {
13144 for (size_t k = 1; k <= 5; k += 2) {
13145 GemmMicrokernelTester()
13146 .mr(4)
13147 .nr(8)
13148 .kr(1)
13149 .sr(1)
13150 .m(4)
13151 .n(n)
13152 .k(k)
13153 .a_stride(7)
13154 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13155 }
13156 }
13157 }
13158
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_gt_8_subtile)13159 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8_subtile) {
13160 TEST_REQUIRES_X86_SSE;
13161 for (uint32_t n = 9; n < 16; n++) {
13162 for (size_t k = 1; k <= 5; k += 2) {
13163 for (uint32_t m = 1; m <= 4; m++) {
13164 GemmMicrokernelTester()
13165 .mr(4)
13166 .nr(8)
13167 .kr(1)
13168 .sr(1)
13169 .m(m)
13170 .n(n)
13171 .k(k)
13172 .iterations(1)
13173 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13174 }
13175 }
13176 }
13177 }
13178
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_div_8)13179 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8) {
13180 TEST_REQUIRES_X86_SSE;
13181 for (uint32_t n = 16; n <= 24; n += 8) {
13182 for (size_t k = 1; k <= 5; k += 2) {
13183 GemmMicrokernelTester()
13184 .mr(4)
13185 .nr(8)
13186 .kr(1)
13187 .sr(1)
13188 .m(4)
13189 .n(n)
13190 .k(k)
13191 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13192 }
13193 }
13194 }
13195
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_div_8_strided_cn)13196 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_cn) {
13197 TEST_REQUIRES_X86_SSE;
13198 for (uint32_t n = 16; n <= 24; n += 8) {
13199 for (size_t k = 1; k <= 5; k += 2) {
13200 GemmMicrokernelTester()
13201 .mr(4)
13202 .nr(8)
13203 .kr(1)
13204 .sr(1)
13205 .m(4)
13206 .n(n)
13207 .k(k)
13208 .cn_stride(11)
13209 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13210 }
13211 }
13212 }
13213
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_div_8_strided_a)13214 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_a) {
13215 TEST_REQUIRES_X86_SSE;
13216 for (uint32_t n = 16; n <= 24; n += 8) {
13217 for (size_t k = 1; k <= 5; k += 2) {
13218 GemmMicrokernelTester()
13219 .mr(4)
13220 .nr(8)
13221 .kr(1)
13222 .sr(1)
13223 .m(4)
13224 .n(n)
13225 .k(k)
13226 .a_stride(7)
13227 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13228 }
13229 }
13230 }
13231
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,n_div_8_subtile)13232 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8_subtile) {
13233 TEST_REQUIRES_X86_SSE;
13234 for (uint32_t n = 16; n <= 24; n += 8) {
13235 for (size_t k = 1; k <= 5; k += 2) {
13236 for (uint32_t m = 1; m <= 4; m++) {
13237 GemmMicrokernelTester()
13238 .mr(4)
13239 .nr(8)
13240 .kr(1)
13241 .sr(1)
13242 .m(m)
13243 .n(n)
13244 .k(k)
13245 .iterations(1)
13246 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13247 }
13248 }
13249 }
13250 }
13251
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,strided_cm_subtile)13252 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, strided_cm_subtile) {
13253 TEST_REQUIRES_X86_SSE;
13254 for (size_t k = 1; k <= 5; k += 2) {
13255 for (uint32_t n = 1; n <= 8; n++) {
13256 for (uint32_t m = 1; m <= 4; m++) {
13257 GemmMicrokernelTester()
13258 .mr(4)
13259 .nr(8)
13260 .kr(1)
13261 .sr(1)
13262 .m(m)
13263 .n(n)
13264 .k(k)
13265 .cm_stride(11)
13266 .iterations(1)
13267 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13268 }
13269 }
13270 }
13271 }
13272
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,qmin)13273 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, qmin) {
13274 TEST_REQUIRES_X86_SSE;
13275 GemmMicrokernelTester()
13276 .mr(4)
13277 .nr(8)
13278 .kr(1)
13279 .sr(1)
13280 .m(4)
13281 .n(8)
13282 .k(1)
13283 .qmin(128)
13284 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13285 }
13286
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,qmax)13287 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, qmax) {
13288 TEST_REQUIRES_X86_SSE;
13289 GemmMicrokernelTester()
13290 .mr(4)
13291 .nr(8)
13292 .kr(1)
13293 .sr(1)
13294 .m(4)
13295 .n(8)
13296 .k(1)
13297 .qmax(128)
13298 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13299 }
13300
TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1,strided_cm)13301 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, strided_cm) {
13302 TEST_REQUIRES_X86_SSE;
13303 GemmMicrokernelTester()
13304 .mr(4)
13305 .nr(8)
13306 .kr(1)
13307 .sr(1)
13308 .m(4)
13309 .n(8)
13310 .k(1)
13311 .cm_stride(11)
13312 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
13313 }
13314 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13315
13316
13317 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_eq_4)13318 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4) {
13319 TEST_REQUIRES_X86_SSE;
13320 GemmMicrokernelTester()
13321 .mr(4)
13322 .nr(8)
13323 .kr(1)
13324 .sr(4)
13325 .m(4)
13326 .n(8)
13327 .k(4)
13328 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13329 }
13330
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,strided_cn)13331 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, strided_cn) {
13332 TEST_REQUIRES_X86_SSE;
13333 GemmMicrokernelTester()
13334 .mr(4)
13335 .nr(8)
13336 .kr(1)
13337 .sr(4)
13338 .m(4)
13339 .n(8)
13340 .k(4)
13341 .cn_stride(11)
13342 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13343 }
13344
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_eq_4_strided_a)13345 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_strided_a) {
13346 TEST_REQUIRES_X86_SSE;
13347 GemmMicrokernelTester()
13348 .mr(4)
13349 .nr(8)
13350 .kr(1)
13351 .sr(4)
13352 .m(4)
13353 .n(8)
13354 .k(4)
13355 .a_stride(7)
13356 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13357 }
13358
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_eq_4_subtile)13359 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_subtile) {
13360 TEST_REQUIRES_X86_SSE;
13361 for (uint32_t n = 1; n <= 8; n++) {
13362 for (uint32_t m = 1; m <= 4; m++) {
13363 GemmMicrokernelTester()
13364 .mr(4)
13365 .nr(8)
13366 .kr(1)
13367 .sr(4)
13368 .m(m)
13369 .n(n)
13370 .k(4)
13371 .iterations(1)
13372 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13373 }
13374 }
13375 }
13376
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_eq_4_subtile_m)13377 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_subtile_m) {
13378 TEST_REQUIRES_X86_SSE;
13379 for (uint32_t m = 1; m <= 4; m++) {
13380 GemmMicrokernelTester()
13381 .mr(4)
13382 .nr(8)
13383 .kr(1)
13384 .sr(4)
13385 .m(m)
13386 .n(8)
13387 .k(4)
13388 .iterations(1)
13389 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13390 }
13391 }
13392
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_eq_4_subtile_n)13393 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_subtile_n) {
13394 TEST_REQUIRES_X86_SSE;
13395 for (uint32_t n = 1; n <= 8; n++) {
13396 GemmMicrokernelTester()
13397 .mr(4)
13398 .nr(8)
13399 .kr(1)
13400 .sr(4)
13401 .m(4)
13402 .n(n)
13403 .k(4)
13404 .iterations(1)
13405 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13406 }
13407 }
13408
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_lt_4)13409 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_lt_4) {
13410 TEST_REQUIRES_X86_SSE;
13411 for (size_t k = 1; k < 4; k++) {
13412 GemmMicrokernelTester()
13413 .mr(4)
13414 .nr(8)
13415 .kr(1)
13416 .sr(4)
13417 .m(4)
13418 .n(8)
13419 .k(k)
13420 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13421 }
13422 }
13423
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_lt_4_strided_a)13424 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_lt_4_strided_a) {
13425 TEST_REQUIRES_X86_SSE;
13426 for (size_t k = 1; k < 4; k++) {
13427 GemmMicrokernelTester()
13428 .mr(4)
13429 .nr(8)
13430 .kr(1)
13431 .sr(4)
13432 .m(4)
13433 .n(8)
13434 .k(k)
13435 .a_stride(7)
13436 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13437 }
13438 }
13439
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_lt_4_subtile)13440 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_lt_4_subtile) {
13441 TEST_REQUIRES_X86_SSE;
13442 for (size_t k = 1; k < 4; k++) {
13443 for (uint32_t n = 1; n <= 8; n++) {
13444 for (uint32_t m = 1; m <= 4; m++) {
13445 GemmMicrokernelTester()
13446 .mr(4)
13447 .nr(8)
13448 .kr(1)
13449 .sr(4)
13450 .m(m)
13451 .n(n)
13452 .k(k)
13453 .iterations(1)
13454 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13455 }
13456 }
13457 }
13458 }
13459
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_gt_4)13460 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_gt_4) {
13461 TEST_REQUIRES_X86_SSE;
13462 for (size_t k = 5; k < 8; k++) {
13463 GemmMicrokernelTester()
13464 .mr(4)
13465 .nr(8)
13466 .kr(1)
13467 .sr(4)
13468 .m(4)
13469 .n(8)
13470 .k(k)
13471 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13472 }
13473 }
13474
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_gt_4_strided_a)13475 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_gt_4_strided_a) {
13476 TEST_REQUIRES_X86_SSE;
13477 for (size_t k = 5; k < 8; k++) {
13478 GemmMicrokernelTester()
13479 .mr(4)
13480 .nr(8)
13481 .kr(1)
13482 .sr(4)
13483 .m(4)
13484 .n(8)
13485 .k(k)
13486 .a_stride(11)
13487 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13488 }
13489 }
13490
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_gt_4_subtile)13491 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_gt_4_subtile) {
13492 TEST_REQUIRES_X86_SSE;
13493 for (size_t k = 5; k < 8; k++) {
13494 for (uint32_t n = 1; n <= 8; n++) {
13495 for (uint32_t m = 1; m <= 4; m++) {
13496 GemmMicrokernelTester()
13497 .mr(4)
13498 .nr(8)
13499 .kr(1)
13500 .sr(4)
13501 .m(m)
13502 .n(n)
13503 .k(k)
13504 .iterations(1)
13505 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13506 }
13507 }
13508 }
13509 }
13510
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_div_4)13511 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_div_4) {
13512 TEST_REQUIRES_X86_SSE;
13513 for (size_t k = 8; k <= 40; k += 4) {
13514 GemmMicrokernelTester()
13515 .mr(4)
13516 .nr(8)
13517 .kr(1)
13518 .sr(4)
13519 .m(4)
13520 .n(8)
13521 .k(k)
13522 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13523 }
13524 }
13525
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_div_4_strided_a)13526 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_div_4_strided_a) {
13527 TEST_REQUIRES_X86_SSE;
13528 for (size_t k = 8; k <= 40; k += 4) {
13529 GemmMicrokernelTester()
13530 .mr(4)
13531 .nr(8)
13532 .kr(1)
13533 .sr(4)
13534 .m(4)
13535 .n(8)
13536 .k(k)
13537 .a_stride(43)
13538 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13539 }
13540 }
13541
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,k_div_4_subtile)13542 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_div_4_subtile) {
13543 TEST_REQUIRES_X86_SSE;
13544 for (size_t k = 8; k <= 40; k += 4) {
13545 for (uint32_t n = 1; n <= 8; n++) {
13546 for (uint32_t m = 1; m <= 4; m++) {
13547 GemmMicrokernelTester()
13548 .mr(4)
13549 .nr(8)
13550 .kr(1)
13551 .sr(4)
13552 .m(m)
13553 .n(n)
13554 .k(k)
13555 .iterations(1)
13556 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13557 }
13558 }
13559 }
13560 }
13561
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_gt_8)13562 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8) {
13563 TEST_REQUIRES_X86_SSE;
13564 for (uint32_t n = 9; n < 16; n++) {
13565 for (size_t k = 1; k <= 20; k += 5) {
13566 GemmMicrokernelTester()
13567 .mr(4)
13568 .nr(8)
13569 .kr(1)
13570 .sr(4)
13571 .m(4)
13572 .n(n)
13573 .k(k)
13574 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13575 }
13576 }
13577 }
13578
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_gt_8_strided_cn)13579 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8_strided_cn) {
13580 TEST_REQUIRES_X86_SSE;
13581 for (uint32_t n = 9; n < 16; n++) {
13582 for (size_t k = 1; k <= 20; k += 5) {
13583 GemmMicrokernelTester()
13584 .mr(4)
13585 .nr(8)
13586 .kr(1)
13587 .sr(4)
13588 .m(4)
13589 .n(n)
13590 .k(k)
13591 .cn_stride(11)
13592 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13593 }
13594 }
13595 }
13596
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_gt_8_strided_a)13597 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8_strided_a) {
13598 TEST_REQUIRES_X86_SSE;
13599 for (uint32_t n = 9; n < 16; n++) {
13600 for (size_t k = 1; k <= 20; k += 5) {
13601 GemmMicrokernelTester()
13602 .mr(4)
13603 .nr(8)
13604 .kr(1)
13605 .sr(4)
13606 .m(4)
13607 .n(n)
13608 .k(k)
13609 .a_stride(23)
13610 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13611 }
13612 }
13613 }
13614
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_gt_8_subtile)13615 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8_subtile) {
13616 TEST_REQUIRES_X86_SSE;
13617 for (uint32_t n = 9; n < 16; n++) {
13618 for (size_t k = 1; k <= 20; k += 5) {
13619 for (uint32_t m = 1; m <= 4; m++) {
13620 GemmMicrokernelTester()
13621 .mr(4)
13622 .nr(8)
13623 .kr(1)
13624 .sr(4)
13625 .m(m)
13626 .n(n)
13627 .k(k)
13628 .iterations(1)
13629 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13630 }
13631 }
13632 }
13633 }
13634
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_div_8)13635 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8) {
13636 TEST_REQUIRES_X86_SSE;
13637 for (uint32_t n = 16; n <= 24; n += 8) {
13638 for (size_t k = 1; k <= 20; k += 5) {
13639 GemmMicrokernelTester()
13640 .mr(4)
13641 .nr(8)
13642 .kr(1)
13643 .sr(4)
13644 .m(4)
13645 .n(n)
13646 .k(k)
13647 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13648 }
13649 }
13650 }
13651
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_div_8_strided_cn)13652 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8_strided_cn) {
13653 TEST_REQUIRES_X86_SSE;
13654 for (uint32_t n = 16; n <= 24; n += 8) {
13655 for (size_t k = 1; k <= 20; k += 5) {
13656 GemmMicrokernelTester()
13657 .mr(4)
13658 .nr(8)
13659 .kr(1)
13660 .sr(4)
13661 .m(4)
13662 .n(n)
13663 .k(k)
13664 .cn_stride(11)
13665 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13666 }
13667 }
13668 }
13669
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_div_8_strided_a)13670 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8_strided_a) {
13671 TEST_REQUIRES_X86_SSE;
13672 for (uint32_t n = 16; n <= 24; n += 8) {
13673 for (size_t k = 1; k <= 20; k += 5) {
13674 GemmMicrokernelTester()
13675 .mr(4)
13676 .nr(8)
13677 .kr(1)
13678 .sr(4)
13679 .m(4)
13680 .n(n)
13681 .k(k)
13682 .a_stride(23)
13683 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13684 }
13685 }
13686 }
13687
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,n_div_8_subtile)13688 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8_subtile) {
13689 TEST_REQUIRES_X86_SSE;
13690 for (uint32_t n = 16; n <= 24; n += 8) {
13691 for (size_t k = 1; k <= 20; k += 5) {
13692 for (uint32_t m = 1; m <= 4; m++) {
13693 GemmMicrokernelTester()
13694 .mr(4)
13695 .nr(8)
13696 .kr(1)
13697 .sr(4)
13698 .m(m)
13699 .n(n)
13700 .k(k)
13701 .iterations(1)
13702 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13703 }
13704 }
13705 }
13706 }
13707
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,strided_cm_subtile)13708 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, strided_cm_subtile) {
13709 TEST_REQUIRES_X86_SSE;
13710 for (size_t k = 1; k <= 20; k += 5) {
13711 for (uint32_t n = 1; n <= 8; n++) {
13712 for (uint32_t m = 1; m <= 4; m++) {
13713 GemmMicrokernelTester()
13714 .mr(4)
13715 .nr(8)
13716 .kr(1)
13717 .sr(4)
13718 .m(m)
13719 .n(n)
13720 .k(k)
13721 .cm_stride(11)
13722 .iterations(1)
13723 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13724 }
13725 }
13726 }
13727 }
13728
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,qmin)13729 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, qmin) {
13730 TEST_REQUIRES_X86_SSE;
13731 GemmMicrokernelTester()
13732 .mr(4)
13733 .nr(8)
13734 .kr(1)
13735 .sr(4)
13736 .m(4)
13737 .n(8)
13738 .k(4)
13739 .qmin(128)
13740 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13741 }
13742
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,qmax)13743 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, qmax) {
13744 TEST_REQUIRES_X86_SSE;
13745 GemmMicrokernelTester()
13746 .mr(4)
13747 .nr(8)
13748 .kr(1)
13749 .sr(4)
13750 .m(4)
13751 .n(8)
13752 .k(4)
13753 .qmax(128)
13754 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13755 }
13756
TEST(F32_GEMMINC_MINMAX_4X8S4__SSE,strided_cm)13757 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, strided_cm) {
13758 TEST_REQUIRES_X86_SSE;
13759 GemmMicrokernelTester()
13760 .mr(4)
13761 .nr(8)
13762 .kr(1)
13763 .sr(4)
13764 .m(4)
13765 .n(8)
13766 .k(4)
13767 .cm_stride(11)
13768 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13769 }
13770 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13771
13772
13773 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_eq_4)13774 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4) {
13775 TEST_REQUIRES_X86_SSE;
13776 GemmMicrokernelTester()
13777 .mr(5)
13778 .nr(8)
13779 .kr(1)
13780 .sr(1)
13781 .m(5)
13782 .n(8)
13783 .k(4)
13784 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13785 }
13786
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,strided_cn)13787 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, strided_cn) {
13788 TEST_REQUIRES_X86_SSE;
13789 GemmMicrokernelTester()
13790 .mr(5)
13791 .nr(8)
13792 .kr(1)
13793 .sr(1)
13794 .m(5)
13795 .n(8)
13796 .k(4)
13797 .cn_stride(11)
13798 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13799 }
13800
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_eq_4_strided_a)13801 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_strided_a) {
13802 TEST_REQUIRES_X86_SSE;
13803 GemmMicrokernelTester()
13804 .mr(5)
13805 .nr(8)
13806 .kr(1)
13807 .sr(1)
13808 .m(5)
13809 .n(8)
13810 .k(4)
13811 .a_stride(7)
13812 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13813 }
13814
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_eq_4_subtile)13815 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_subtile) {
13816 TEST_REQUIRES_X86_SSE;
13817 for (uint32_t n = 1; n <= 8; n++) {
13818 for (uint32_t m = 1; m <= 5; m++) {
13819 GemmMicrokernelTester()
13820 .mr(5)
13821 .nr(8)
13822 .kr(1)
13823 .sr(1)
13824 .m(m)
13825 .n(n)
13826 .k(4)
13827 .iterations(1)
13828 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13829 }
13830 }
13831 }
13832
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_eq_4_subtile_m)13833 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_m) {
13834 TEST_REQUIRES_X86_SSE;
13835 for (uint32_t m = 1; m <= 5; m++) {
13836 GemmMicrokernelTester()
13837 .mr(5)
13838 .nr(8)
13839 .kr(1)
13840 .sr(1)
13841 .m(m)
13842 .n(8)
13843 .k(4)
13844 .iterations(1)
13845 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13846 }
13847 }
13848
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_eq_4_subtile_n)13849 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_n) {
13850 TEST_REQUIRES_X86_SSE;
13851 for (uint32_t n = 1; n <= 8; n++) {
13852 GemmMicrokernelTester()
13853 .mr(5)
13854 .nr(8)
13855 .kr(1)
13856 .sr(1)
13857 .m(5)
13858 .n(n)
13859 .k(4)
13860 .iterations(1)
13861 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13862 }
13863 }
13864
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_lt_4)13865 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_lt_4) {
13866 TEST_REQUIRES_X86_SSE;
13867 for (size_t k = 1; k < 4; k++) {
13868 GemmMicrokernelTester()
13869 .mr(5)
13870 .nr(8)
13871 .kr(1)
13872 .sr(1)
13873 .m(5)
13874 .n(8)
13875 .k(k)
13876 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13877 }
13878 }
13879
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_lt_4_strided_a)13880 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_lt_4_strided_a) {
13881 TEST_REQUIRES_X86_SSE;
13882 for (size_t k = 1; k < 4; k++) {
13883 GemmMicrokernelTester()
13884 .mr(5)
13885 .nr(8)
13886 .kr(1)
13887 .sr(1)
13888 .m(5)
13889 .n(8)
13890 .k(k)
13891 .a_stride(7)
13892 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13893 }
13894 }
13895
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_lt_4_subtile)13896 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_lt_4_subtile) {
13897 TEST_REQUIRES_X86_SSE;
13898 for (size_t k = 1; k < 4; k++) {
13899 for (uint32_t n = 1; n <= 8; n++) {
13900 for (uint32_t m = 1; m <= 5; m++) {
13901 GemmMicrokernelTester()
13902 .mr(5)
13903 .nr(8)
13904 .kr(1)
13905 .sr(1)
13906 .m(m)
13907 .n(n)
13908 .k(k)
13909 .iterations(1)
13910 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13911 }
13912 }
13913 }
13914 }
13915
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_gt_4)13916 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_gt_4) {
13917 TEST_REQUIRES_X86_SSE;
13918 for (size_t k = 5; k < 8; k++) {
13919 GemmMicrokernelTester()
13920 .mr(5)
13921 .nr(8)
13922 .kr(1)
13923 .sr(1)
13924 .m(5)
13925 .n(8)
13926 .k(k)
13927 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13928 }
13929 }
13930
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_gt_4_strided_a)13931 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_gt_4_strided_a) {
13932 TEST_REQUIRES_X86_SSE;
13933 for (size_t k = 5; k < 8; k++) {
13934 GemmMicrokernelTester()
13935 .mr(5)
13936 .nr(8)
13937 .kr(1)
13938 .sr(1)
13939 .m(5)
13940 .n(8)
13941 .k(k)
13942 .a_stride(11)
13943 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13944 }
13945 }
13946
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_gt_4_subtile)13947 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_gt_4_subtile) {
13948 TEST_REQUIRES_X86_SSE;
13949 for (size_t k = 5; k < 8; k++) {
13950 for (uint32_t n = 1; n <= 8; n++) {
13951 for (uint32_t m = 1; m <= 5; m++) {
13952 GemmMicrokernelTester()
13953 .mr(5)
13954 .nr(8)
13955 .kr(1)
13956 .sr(1)
13957 .m(m)
13958 .n(n)
13959 .k(k)
13960 .iterations(1)
13961 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13962 }
13963 }
13964 }
13965 }
13966
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_div_4)13967 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_div_4) {
13968 TEST_REQUIRES_X86_SSE;
13969 for (size_t k = 8; k <= 40; k += 4) {
13970 GemmMicrokernelTester()
13971 .mr(5)
13972 .nr(8)
13973 .kr(1)
13974 .sr(1)
13975 .m(5)
13976 .n(8)
13977 .k(k)
13978 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13979 }
13980 }
13981
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_div_4_strided_a)13982 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_div_4_strided_a) {
13983 TEST_REQUIRES_X86_SSE;
13984 for (size_t k = 8; k <= 40; k += 4) {
13985 GemmMicrokernelTester()
13986 .mr(5)
13987 .nr(8)
13988 .kr(1)
13989 .sr(1)
13990 .m(5)
13991 .n(8)
13992 .k(k)
13993 .a_stride(43)
13994 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13995 }
13996 }
13997
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,k_div_4_subtile)13998 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_div_4_subtile) {
13999 TEST_REQUIRES_X86_SSE;
14000 for (size_t k = 8; k <= 40; k += 4) {
14001 for (uint32_t n = 1; n <= 8; n++) {
14002 for (uint32_t m = 1; m <= 5; m++) {
14003 GemmMicrokernelTester()
14004 .mr(5)
14005 .nr(8)
14006 .kr(1)
14007 .sr(1)
14008 .m(m)
14009 .n(n)
14010 .k(k)
14011 .iterations(1)
14012 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14013 }
14014 }
14015 }
14016 }
14017
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_gt_8)14018 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8) {
14019 TEST_REQUIRES_X86_SSE;
14020 for (uint32_t n = 9; n < 16; n++) {
14021 for (size_t k = 1; k <= 20; k += 5) {
14022 GemmMicrokernelTester()
14023 .mr(5)
14024 .nr(8)
14025 .kr(1)
14026 .sr(1)
14027 .m(5)
14028 .n(n)
14029 .k(k)
14030 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14031 }
14032 }
14033 }
14034
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_gt_8_strided_cn)14035 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8_strided_cn) {
14036 TEST_REQUIRES_X86_SSE;
14037 for (uint32_t n = 9; n < 16; n++) {
14038 for (size_t k = 1; k <= 20; k += 5) {
14039 GemmMicrokernelTester()
14040 .mr(5)
14041 .nr(8)
14042 .kr(1)
14043 .sr(1)
14044 .m(5)
14045 .n(n)
14046 .k(k)
14047 .cn_stride(11)
14048 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14049 }
14050 }
14051 }
14052
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_gt_8_strided_a)14053 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8_strided_a) {
14054 TEST_REQUIRES_X86_SSE;
14055 for (uint32_t n = 9; n < 16; n++) {
14056 for (size_t k = 1; k <= 20; k += 5) {
14057 GemmMicrokernelTester()
14058 .mr(5)
14059 .nr(8)
14060 .kr(1)
14061 .sr(1)
14062 .m(5)
14063 .n(n)
14064 .k(k)
14065 .a_stride(23)
14066 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14067 }
14068 }
14069 }
14070
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_gt_8_subtile)14071 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8_subtile) {
14072 TEST_REQUIRES_X86_SSE;
14073 for (uint32_t n = 9; n < 16; n++) {
14074 for (size_t k = 1; k <= 20; k += 5) {
14075 for (uint32_t m = 1; m <= 5; m++) {
14076 GemmMicrokernelTester()
14077 .mr(5)
14078 .nr(8)
14079 .kr(1)
14080 .sr(1)
14081 .m(m)
14082 .n(n)
14083 .k(k)
14084 .iterations(1)
14085 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14086 }
14087 }
14088 }
14089 }
14090
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_div_8)14091 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8) {
14092 TEST_REQUIRES_X86_SSE;
14093 for (uint32_t n = 16; n <= 24; n += 8) {
14094 for (size_t k = 1; k <= 20; k += 5) {
14095 GemmMicrokernelTester()
14096 .mr(5)
14097 .nr(8)
14098 .kr(1)
14099 .sr(1)
14100 .m(5)
14101 .n(n)
14102 .k(k)
14103 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14104 }
14105 }
14106 }
14107
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_div_8_strided_cn)14108 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8_strided_cn) {
14109 TEST_REQUIRES_X86_SSE;
14110 for (uint32_t n = 16; n <= 24; n += 8) {
14111 for (size_t k = 1; k <= 20; k += 5) {
14112 GemmMicrokernelTester()
14113 .mr(5)
14114 .nr(8)
14115 .kr(1)
14116 .sr(1)
14117 .m(5)
14118 .n(n)
14119 .k(k)
14120 .cn_stride(11)
14121 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14122 }
14123 }
14124 }
14125
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_div_8_strided_a)14126 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8_strided_a) {
14127 TEST_REQUIRES_X86_SSE;
14128 for (uint32_t n = 16; n <= 24; n += 8) {
14129 for (size_t k = 1; k <= 20; k += 5) {
14130 GemmMicrokernelTester()
14131 .mr(5)
14132 .nr(8)
14133 .kr(1)
14134 .sr(1)
14135 .m(5)
14136 .n(n)
14137 .k(k)
14138 .a_stride(23)
14139 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14140 }
14141 }
14142 }
14143
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,n_div_8_subtile)14144 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8_subtile) {
14145 TEST_REQUIRES_X86_SSE;
14146 for (uint32_t n = 16; n <= 24; n += 8) {
14147 for (size_t k = 1; k <= 20; k += 5) {
14148 for (uint32_t m = 1; m <= 5; m++) {
14149 GemmMicrokernelTester()
14150 .mr(5)
14151 .nr(8)
14152 .kr(1)
14153 .sr(1)
14154 .m(m)
14155 .n(n)
14156 .k(k)
14157 .iterations(1)
14158 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14159 }
14160 }
14161 }
14162 }
14163
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,strided_cm_subtile)14164 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, strided_cm_subtile) {
14165 TEST_REQUIRES_X86_SSE;
14166 for (size_t k = 1; k <= 20; k += 5) {
14167 for (uint32_t n = 1; n <= 8; n++) {
14168 for (uint32_t m = 1; m <= 5; m++) {
14169 GemmMicrokernelTester()
14170 .mr(5)
14171 .nr(8)
14172 .kr(1)
14173 .sr(1)
14174 .m(m)
14175 .n(n)
14176 .k(k)
14177 .cm_stride(11)
14178 .iterations(1)
14179 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14180 }
14181 }
14182 }
14183 }
14184
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,qmin)14185 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, qmin) {
14186 TEST_REQUIRES_X86_SSE;
14187 GemmMicrokernelTester()
14188 .mr(5)
14189 .nr(8)
14190 .kr(1)
14191 .sr(1)
14192 .m(5)
14193 .n(8)
14194 .k(4)
14195 .qmin(128)
14196 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14197 }
14198
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,qmax)14199 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, qmax) {
14200 TEST_REQUIRES_X86_SSE;
14201 GemmMicrokernelTester()
14202 .mr(5)
14203 .nr(8)
14204 .kr(1)
14205 .sr(1)
14206 .m(5)
14207 .n(8)
14208 .k(4)
14209 .qmax(128)
14210 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14211 }
14212
TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP,strided_cm)14213 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, strided_cm) {
14214 TEST_REQUIRES_X86_SSE;
14215 GemmMicrokernelTester()
14216 .mr(5)
14217 .nr(8)
14218 .kr(1)
14219 .sr(1)
14220 .m(5)
14221 .n(8)
14222 .k(4)
14223 .cm_stride(11)
14224 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
14225 }
14226 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14227
14228
14229 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_eq_1)14230 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1) {
14231 TEST_REQUIRES_X86_SSE;
14232 GemmMicrokernelTester()
14233 .mr(5)
14234 .nr(8)
14235 .kr(1)
14236 .sr(1)
14237 .m(5)
14238 .n(8)
14239 .k(1)
14240 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14241 }
14242
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,strided_cn)14243 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, strided_cn) {
14244 TEST_REQUIRES_X86_SSE;
14245 GemmMicrokernelTester()
14246 .mr(5)
14247 .nr(8)
14248 .kr(1)
14249 .sr(1)
14250 .m(5)
14251 .n(8)
14252 .k(1)
14253 .cn_stride(11)
14254 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14255 }
14256
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_eq_1_strided_a)14257 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_strided_a) {
14258 TEST_REQUIRES_X86_SSE;
14259 GemmMicrokernelTester()
14260 .mr(5)
14261 .nr(8)
14262 .kr(1)
14263 .sr(1)
14264 .m(5)
14265 .n(8)
14266 .k(1)
14267 .a_stride(3)
14268 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14269 }
14270
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_eq_1_subtile)14271 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile) {
14272 TEST_REQUIRES_X86_SSE;
14273 for (uint32_t n = 1; n <= 8; n++) {
14274 for (uint32_t m = 1; m <= 5; m++) {
14275 GemmMicrokernelTester()
14276 .mr(5)
14277 .nr(8)
14278 .kr(1)
14279 .sr(1)
14280 .m(m)
14281 .n(n)
14282 .k(1)
14283 .iterations(1)
14284 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14285 }
14286 }
14287 }
14288
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_eq_1_subtile_m)14289 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_m) {
14290 TEST_REQUIRES_X86_SSE;
14291 for (uint32_t m = 1; m <= 5; m++) {
14292 GemmMicrokernelTester()
14293 .mr(5)
14294 .nr(8)
14295 .kr(1)
14296 .sr(1)
14297 .m(m)
14298 .n(8)
14299 .k(1)
14300 .iterations(1)
14301 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14302 }
14303 }
14304
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_eq_1_subtile_n)14305 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_n) {
14306 TEST_REQUIRES_X86_SSE;
14307 for (uint32_t n = 1; n <= 8; n++) {
14308 GemmMicrokernelTester()
14309 .mr(5)
14310 .nr(8)
14311 .kr(1)
14312 .sr(1)
14313 .m(5)
14314 .n(n)
14315 .k(1)
14316 .iterations(1)
14317 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14318 }
14319 }
14320
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_gt_1)14321 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_gt_1) {
14322 TEST_REQUIRES_X86_SSE;
14323 for (size_t k = 2; k < 10; k++) {
14324 GemmMicrokernelTester()
14325 .mr(5)
14326 .nr(8)
14327 .kr(1)
14328 .sr(1)
14329 .m(5)
14330 .n(8)
14331 .k(k)
14332 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14333 }
14334 }
14335
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_gt_1_strided_a)14336 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_gt_1_strided_a) {
14337 TEST_REQUIRES_X86_SSE;
14338 for (size_t k = 2; k < 10; k++) {
14339 GemmMicrokernelTester()
14340 .mr(5)
14341 .nr(8)
14342 .kr(1)
14343 .sr(1)
14344 .m(5)
14345 .n(8)
14346 .k(k)
14347 .a_stride(11)
14348 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14349 }
14350 }
14351
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,k_gt_1_subtile)14352 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_gt_1_subtile) {
14353 TEST_REQUIRES_X86_SSE;
14354 for (size_t k = 2; k < 10; k++) {
14355 for (uint32_t n = 1; n <= 8; n++) {
14356 for (uint32_t m = 1; m <= 5; m++) {
14357 GemmMicrokernelTester()
14358 .mr(5)
14359 .nr(8)
14360 .kr(1)
14361 .sr(1)
14362 .m(m)
14363 .n(n)
14364 .k(k)
14365 .iterations(1)
14366 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14367 }
14368 }
14369 }
14370 }
14371
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_gt_8)14372 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8) {
14373 TEST_REQUIRES_X86_SSE;
14374 for (uint32_t n = 9; n < 16; n++) {
14375 for (size_t k = 1; k <= 5; k += 2) {
14376 GemmMicrokernelTester()
14377 .mr(5)
14378 .nr(8)
14379 .kr(1)
14380 .sr(1)
14381 .m(5)
14382 .n(n)
14383 .k(k)
14384 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14385 }
14386 }
14387 }
14388
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_gt_8_strided_cn)14389 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_cn) {
14390 TEST_REQUIRES_X86_SSE;
14391 for (uint32_t n = 9; n < 16; n++) {
14392 for (size_t k = 1; k <= 5; k += 2) {
14393 GemmMicrokernelTester()
14394 .mr(5)
14395 .nr(8)
14396 .kr(1)
14397 .sr(1)
14398 .m(5)
14399 .n(n)
14400 .k(k)
14401 .cn_stride(11)
14402 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14403 }
14404 }
14405 }
14406
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_gt_8_strided_a)14407 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_a) {
14408 TEST_REQUIRES_X86_SSE;
14409 for (uint32_t n = 9; n < 16; n++) {
14410 for (size_t k = 1; k <= 5; k += 2) {
14411 GemmMicrokernelTester()
14412 .mr(5)
14413 .nr(8)
14414 .kr(1)
14415 .sr(1)
14416 .m(5)
14417 .n(n)
14418 .k(k)
14419 .a_stride(7)
14420 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14421 }
14422 }
14423 }
14424
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_gt_8_subtile)14425 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8_subtile) {
14426 TEST_REQUIRES_X86_SSE;
14427 for (uint32_t n = 9; n < 16; n++) {
14428 for (size_t k = 1; k <= 5; k += 2) {
14429 for (uint32_t m = 1; m <= 5; m++) {
14430 GemmMicrokernelTester()
14431 .mr(5)
14432 .nr(8)
14433 .kr(1)
14434 .sr(1)
14435 .m(m)
14436 .n(n)
14437 .k(k)
14438 .iterations(1)
14439 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14440 }
14441 }
14442 }
14443 }
14444
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_div_8)14445 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8) {
14446 TEST_REQUIRES_X86_SSE;
14447 for (uint32_t n = 16; n <= 24; n += 8) {
14448 for (size_t k = 1; k <= 5; k += 2) {
14449 GemmMicrokernelTester()
14450 .mr(5)
14451 .nr(8)
14452 .kr(1)
14453 .sr(1)
14454 .m(5)
14455 .n(n)
14456 .k(k)
14457 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14458 }
14459 }
14460 }
14461
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_div_8_strided_cn)14462 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_cn) {
14463 TEST_REQUIRES_X86_SSE;
14464 for (uint32_t n = 16; n <= 24; n += 8) {
14465 for (size_t k = 1; k <= 5; k += 2) {
14466 GemmMicrokernelTester()
14467 .mr(5)
14468 .nr(8)
14469 .kr(1)
14470 .sr(1)
14471 .m(5)
14472 .n(n)
14473 .k(k)
14474 .cn_stride(11)
14475 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14476 }
14477 }
14478 }
14479
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_div_8_strided_a)14480 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_a) {
14481 TEST_REQUIRES_X86_SSE;
14482 for (uint32_t n = 16; n <= 24; n += 8) {
14483 for (size_t k = 1; k <= 5; k += 2) {
14484 GemmMicrokernelTester()
14485 .mr(5)
14486 .nr(8)
14487 .kr(1)
14488 .sr(1)
14489 .m(5)
14490 .n(n)
14491 .k(k)
14492 .a_stride(7)
14493 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14494 }
14495 }
14496 }
14497
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,n_div_8_subtile)14498 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8_subtile) {
14499 TEST_REQUIRES_X86_SSE;
14500 for (uint32_t n = 16; n <= 24; n += 8) {
14501 for (size_t k = 1; k <= 5; k += 2) {
14502 for (uint32_t m = 1; m <= 5; m++) {
14503 GemmMicrokernelTester()
14504 .mr(5)
14505 .nr(8)
14506 .kr(1)
14507 .sr(1)
14508 .m(m)
14509 .n(n)
14510 .k(k)
14511 .iterations(1)
14512 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14513 }
14514 }
14515 }
14516 }
14517
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,strided_cm_subtile)14518 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, strided_cm_subtile) {
14519 TEST_REQUIRES_X86_SSE;
14520 for (size_t k = 1; k <= 5; k += 2) {
14521 for (uint32_t n = 1; n <= 8; n++) {
14522 for (uint32_t m = 1; m <= 5; m++) {
14523 GemmMicrokernelTester()
14524 .mr(5)
14525 .nr(8)
14526 .kr(1)
14527 .sr(1)
14528 .m(m)
14529 .n(n)
14530 .k(k)
14531 .cm_stride(11)
14532 .iterations(1)
14533 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14534 }
14535 }
14536 }
14537 }
14538
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,qmin)14539 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, qmin) {
14540 TEST_REQUIRES_X86_SSE;
14541 GemmMicrokernelTester()
14542 .mr(5)
14543 .nr(8)
14544 .kr(1)
14545 .sr(1)
14546 .m(5)
14547 .n(8)
14548 .k(1)
14549 .qmin(128)
14550 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14551 }
14552
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,qmax)14553 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, qmax) {
14554 TEST_REQUIRES_X86_SSE;
14555 GemmMicrokernelTester()
14556 .mr(5)
14557 .nr(8)
14558 .kr(1)
14559 .sr(1)
14560 .m(5)
14561 .n(8)
14562 .k(1)
14563 .qmax(128)
14564 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14565 }
14566
TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1,strided_cm)14567 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, strided_cm) {
14568 TEST_REQUIRES_X86_SSE;
14569 GemmMicrokernelTester()
14570 .mr(5)
14571 .nr(8)
14572 .kr(1)
14573 .sr(1)
14574 .m(5)
14575 .n(8)
14576 .k(1)
14577 .cm_stride(11)
14578 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
14579 }
14580 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14581
14582
14583 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_eq_4)14584 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4) {
14585 TEST_REQUIRES_X86_SSE2;
14586 GemmMicrokernelTester()
14587 .mr(5)
14588 .nr(8)
14589 .kr(1)
14590 .sr(1)
14591 .m(5)
14592 .n(8)
14593 .k(4)
14594 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14595 }
14596
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,strided_cn)14597 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, strided_cn) {
14598 TEST_REQUIRES_X86_SSE2;
14599 GemmMicrokernelTester()
14600 .mr(5)
14601 .nr(8)
14602 .kr(1)
14603 .sr(1)
14604 .m(5)
14605 .n(8)
14606 .k(4)
14607 .cn_stride(11)
14608 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14609 }
14610
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_eq_4_strided_a)14611 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_strided_a) {
14612 TEST_REQUIRES_X86_SSE2;
14613 GemmMicrokernelTester()
14614 .mr(5)
14615 .nr(8)
14616 .kr(1)
14617 .sr(1)
14618 .m(5)
14619 .n(8)
14620 .k(4)
14621 .a_stride(7)
14622 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14623 }
14624
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_eq_4_subtile)14625 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile) {
14626 TEST_REQUIRES_X86_SSE2;
14627 for (uint32_t n = 1; n <= 8; n++) {
14628 for (uint32_t m = 1; m <= 5; m++) {
14629 GemmMicrokernelTester()
14630 .mr(5)
14631 .nr(8)
14632 .kr(1)
14633 .sr(1)
14634 .m(m)
14635 .n(n)
14636 .k(4)
14637 .iterations(1)
14638 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14639 }
14640 }
14641 }
14642
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_eq_4_subtile_m)14643 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_m) {
14644 TEST_REQUIRES_X86_SSE2;
14645 for (uint32_t m = 1; m <= 5; m++) {
14646 GemmMicrokernelTester()
14647 .mr(5)
14648 .nr(8)
14649 .kr(1)
14650 .sr(1)
14651 .m(m)
14652 .n(8)
14653 .k(4)
14654 .iterations(1)
14655 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14656 }
14657 }
14658
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_eq_4_subtile_n)14659 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_n) {
14660 TEST_REQUIRES_X86_SSE2;
14661 for (uint32_t n = 1; n <= 8; n++) {
14662 GemmMicrokernelTester()
14663 .mr(5)
14664 .nr(8)
14665 .kr(1)
14666 .sr(1)
14667 .m(5)
14668 .n(n)
14669 .k(4)
14670 .iterations(1)
14671 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14672 }
14673 }
14674
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_lt_4)14675 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_lt_4) {
14676 TEST_REQUIRES_X86_SSE2;
14677 for (size_t k = 1; k < 4; k++) {
14678 GemmMicrokernelTester()
14679 .mr(5)
14680 .nr(8)
14681 .kr(1)
14682 .sr(1)
14683 .m(5)
14684 .n(8)
14685 .k(k)
14686 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14687 }
14688 }
14689
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_lt_4_strided_a)14690 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_lt_4_strided_a) {
14691 TEST_REQUIRES_X86_SSE2;
14692 for (size_t k = 1; k < 4; k++) {
14693 GemmMicrokernelTester()
14694 .mr(5)
14695 .nr(8)
14696 .kr(1)
14697 .sr(1)
14698 .m(5)
14699 .n(8)
14700 .k(k)
14701 .a_stride(7)
14702 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14703 }
14704 }
14705
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_lt_4_subtile)14706 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_lt_4_subtile) {
14707 TEST_REQUIRES_X86_SSE2;
14708 for (size_t k = 1; k < 4; k++) {
14709 for (uint32_t n = 1; n <= 8; n++) {
14710 for (uint32_t m = 1; m <= 5; m++) {
14711 GemmMicrokernelTester()
14712 .mr(5)
14713 .nr(8)
14714 .kr(1)
14715 .sr(1)
14716 .m(m)
14717 .n(n)
14718 .k(k)
14719 .iterations(1)
14720 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14721 }
14722 }
14723 }
14724 }
14725
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_gt_4)14726 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_gt_4) {
14727 TEST_REQUIRES_X86_SSE2;
14728 for (size_t k = 5; k < 8; k++) {
14729 GemmMicrokernelTester()
14730 .mr(5)
14731 .nr(8)
14732 .kr(1)
14733 .sr(1)
14734 .m(5)
14735 .n(8)
14736 .k(k)
14737 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14738 }
14739 }
14740
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_gt_4_strided_a)14741 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_gt_4_strided_a) {
14742 TEST_REQUIRES_X86_SSE2;
14743 for (size_t k = 5; k < 8; k++) {
14744 GemmMicrokernelTester()
14745 .mr(5)
14746 .nr(8)
14747 .kr(1)
14748 .sr(1)
14749 .m(5)
14750 .n(8)
14751 .k(k)
14752 .a_stride(11)
14753 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14754 }
14755 }
14756
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_gt_4_subtile)14757 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_gt_4_subtile) {
14758 TEST_REQUIRES_X86_SSE2;
14759 for (size_t k = 5; k < 8; k++) {
14760 for (uint32_t n = 1; n <= 8; n++) {
14761 for (uint32_t m = 1; m <= 5; m++) {
14762 GemmMicrokernelTester()
14763 .mr(5)
14764 .nr(8)
14765 .kr(1)
14766 .sr(1)
14767 .m(m)
14768 .n(n)
14769 .k(k)
14770 .iterations(1)
14771 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14772 }
14773 }
14774 }
14775 }
14776
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_div_4)14777 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_div_4) {
14778 TEST_REQUIRES_X86_SSE2;
14779 for (size_t k = 8; k <= 40; k += 4) {
14780 GemmMicrokernelTester()
14781 .mr(5)
14782 .nr(8)
14783 .kr(1)
14784 .sr(1)
14785 .m(5)
14786 .n(8)
14787 .k(k)
14788 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14789 }
14790 }
14791
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_div_4_strided_a)14792 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_div_4_strided_a) {
14793 TEST_REQUIRES_X86_SSE2;
14794 for (size_t k = 8; k <= 40; k += 4) {
14795 GemmMicrokernelTester()
14796 .mr(5)
14797 .nr(8)
14798 .kr(1)
14799 .sr(1)
14800 .m(5)
14801 .n(8)
14802 .k(k)
14803 .a_stride(43)
14804 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14805 }
14806 }
14807
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,k_div_4_subtile)14808 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_div_4_subtile) {
14809 TEST_REQUIRES_X86_SSE2;
14810 for (size_t k = 8; k <= 40; k += 4) {
14811 for (uint32_t n = 1; n <= 8; n++) {
14812 for (uint32_t m = 1; m <= 5; m++) {
14813 GemmMicrokernelTester()
14814 .mr(5)
14815 .nr(8)
14816 .kr(1)
14817 .sr(1)
14818 .m(m)
14819 .n(n)
14820 .k(k)
14821 .iterations(1)
14822 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14823 }
14824 }
14825 }
14826 }
14827
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_gt_8)14828 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8) {
14829 TEST_REQUIRES_X86_SSE2;
14830 for (uint32_t n = 9; n < 16; n++) {
14831 for (size_t k = 1; k <= 20; k += 5) {
14832 GemmMicrokernelTester()
14833 .mr(5)
14834 .nr(8)
14835 .kr(1)
14836 .sr(1)
14837 .m(5)
14838 .n(n)
14839 .k(k)
14840 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14841 }
14842 }
14843 }
14844
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_gt_8_strided_cn)14845 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_cn) {
14846 TEST_REQUIRES_X86_SSE2;
14847 for (uint32_t n = 9; n < 16; n++) {
14848 for (size_t k = 1; k <= 20; k += 5) {
14849 GemmMicrokernelTester()
14850 .mr(5)
14851 .nr(8)
14852 .kr(1)
14853 .sr(1)
14854 .m(5)
14855 .n(n)
14856 .k(k)
14857 .cn_stride(11)
14858 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14859 }
14860 }
14861 }
14862
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_gt_8_strided_a)14863 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_a) {
14864 TEST_REQUIRES_X86_SSE2;
14865 for (uint32_t n = 9; n < 16; n++) {
14866 for (size_t k = 1; k <= 20; k += 5) {
14867 GemmMicrokernelTester()
14868 .mr(5)
14869 .nr(8)
14870 .kr(1)
14871 .sr(1)
14872 .m(5)
14873 .n(n)
14874 .k(k)
14875 .a_stride(23)
14876 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14877 }
14878 }
14879 }
14880
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_gt_8_subtile)14881 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8_subtile) {
14882 TEST_REQUIRES_X86_SSE2;
14883 for (uint32_t n = 9; n < 16; n++) {
14884 for (size_t k = 1; k <= 20; k += 5) {
14885 for (uint32_t m = 1; m <= 5; m++) {
14886 GemmMicrokernelTester()
14887 .mr(5)
14888 .nr(8)
14889 .kr(1)
14890 .sr(1)
14891 .m(m)
14892 .n(n)
14893 .k(k)
14894 .iterations(1)
14895 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14896 }
14897 }
14898 }
14899 }
14900
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_div_8)14901 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8) {
14902 TEST_REQUIRES_X86_SSE2;
14903 for (uint32_t n = 16; n <= 24; n += 8) {
14904 for (size_t k = 1; k <= 20; k += 5) {
14905 GemmMicrokernelTester()
14906 .mr(5)
14907 .nr(8)
14908 .kr(1)
14909 .sr(1)
14910 .m(5)
14911 .n(n)
14912 .k(k)
14913 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14914 }
14915 }
14916 }
14917
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_div_8_strided_cn)14918 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8_strided_cn) {
14919 TEST_REQUIRES_X86_SSE2;
14920 for (uint32_t n = 16; n <= 24; n += 8) {
14921 for (size_t k = 1; k <= 20; k += 5) {
14922 GemmMicrokernelTester()
14923 .mr(5)
14924 .nr(8)
14925 .kr(1)
14926 .sr(1)
14927 .m(5)
14928 .n(n)
14929 .k(k)
14930 .cn_stride(11)
14931 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14932 }
14933 }
14934 }
14935
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_div_8_strided_a)14936 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8_strided_a) {
14937 TEST_REQUIRES_X86_SSE2;
14938 for (uint32_t n = 16; n <= 24; n += 8) {
14939 for (size_t k = 1; k <= 20; k += 5) {
14940 GemmMicrokernelTester()
14941 .mr(5)
14942 .nr(8)
14943 .kr(1)
14944 .sr(1)
14945 .m(5)
14946 .n(n)
14947 .k(k)
14948 .a_stride(23)
14949 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14950 }
14951 }
14952 }
14953
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,n_div_8_subtile)14954 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8_subtile) {
14955 TEST_REQUIRES_X86_SSE2;
14956 for (uint32_t n = 16; n <= 24; n += 8) {
14957 for (size_t k = 1; k <= 20; k += 5) {
14958 for (uint32_t m = 1; m <= 5; m++) {
14959 GemmMicrokernelTester()
14960 .mr(5)
14961 .nr(8)
14962 .kr(1)
14963 .sr(1)
14964 .m(m)
14965 .n(n)
14966 .k(k)
14967 .iterations(1)
14968 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14969 }
14970 }
14971 }
14972 }
14973
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,strided_cm_subtile)14974 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, strided_cm_subtile) {
14975 TEST_REQUIRES_X86_SSE2;
14976 for (size_t k = 1; k <= 20; k += 5) {
14977 for (uint32_t n = 1; n <= 8; n++) {
14978 for (uint32_t m = 1; m <= 5; m++) {
14979 GemmMicrokernelTester()
14980 .mr(5)
14981 .nr(8)
14982 .kr(1)
14983 .sr(1)
14984 .m(m)
14985 .n(n)
14986 .k(k)
14987 .cm_stride(11)
14988 .iterations(1)
14989 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14990 }
14991 }
14992 }
14993 }
14994
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,qmin)14995 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, qmin) {
14996 TEST_REQUIRES_X86_SSE2;
14997 GemmMicrokernelTester()
14998 .mr(5)
14999 .nr(8)
15000 .kr(1)
15001 .sr(1)
15002 .m(5)
15003 .n(8)
15004 .k(4)
15005 .qmin(128)
15006 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15007 }
15008
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,qmax)15009 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, qmax) {
15010 TEST_REQUIRES_X86_SSE2;
15011 GemmMicrokernelTester()
15012 .mr(5)
15013 .nr(8)
15014 .kr(1)
15015 .sr(1)
15016 .m(5)
15017 .n(8)
15018 .k(4)
15019 .qmax(128)
15020 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15021 }
15022
TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP,strided_cm)15023 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, strided_cm) {
15024 TEST_REQUIRES_X86_SSE2;
15025 GemmMicrokernelTester()
15026 .mr(5)
15027 .nr(8)
15028 .kr(1)
15029 .sr(1)
15030 .m(5)
15031 .n(8)
15032 .k(4)
15033 .cm_stride(11)
15034 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15035 }
15036 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15037
15038
15039 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_eq_4)15040 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4) {
15041 TEST_REQUIRES_X86_SSE;
15042 GemmMicrokernelTester()
15043 .mr(5)
15044 .nr(8)
15045 .kr(1)
15046 .sr(4)
15047 .m(5)
15048 .n(8)
15049 .k(4)
15050 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15051 }
15052
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,strided_cn)15053 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, strided_cn) {
15054 TEST_REQUIRES_X86_SSE;
15055 GemmMicrokernelTester()
15056 .mr(5)
15057 .nr(8)
15058 .kr(1)
15059 .sr(4)
15060 .m(5)
15061 .n(8)
15062 .k(4)
15063 .cn_stride(11)
15064 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15065 }
15066
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_eq_4_strided_a)15067 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_strided_a) {
15068 TEST_REQUIRES_X86_SSE;
15069 GemmMicrokernelTester()
15070 .mr(5)
15071 .nr(8)
15072 .kr(1)
15073 .sr(4)
15074 .m(5)
15075 .n(8)
15076 .k(4)
15077 .a_stride(7)
15078 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15079 }
15080
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_eq_4_subtile)15081 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_subtile) {
15082 TEST_REQUIRES_X86_SSE;
15083 for (uint32_t n = 1; n <= 8; n++) {
15084 for (uint32_t m = 1; m <= 5; m++) {
15085 GemmMicrokernelTester()
15086 .mr(5)
15087 .nr(8)
15088 .kr(1)
15089 .sr(4)
15090 .m(m)
15091 .n(n)
15092 .k(4)
15093 .iterations(1)
15094 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15095 }
15096 }
15097 }
15098
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_eq_4_subtile_m)15099 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_subtile_m) {
15100 TEST_REQUIRES_X86_SSE;
15101 for (uint32_t m = 1; m <= 5; m++) {
15102 GemmMicrokernelTester()
15103 .mr(5)
15104 .nr(8)
15105 .kr(1)
15106 .sr(4)
15107 .m(m)
15108 .n(8)
15109 .k(4)
15110 .iterations(1)
15111 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15112 }
15113 }
15114
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_eq_4_subtile_n)15115 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_subtile_n) {
15116 TEST_REQUIRES_X86_SSE;
15117 for (uint32_t n = 1; n <= 8; n++) {
15118 GemmMicrokernelTester()
15119 .mr(5)
15120 .nr(8)
15121 .kr(1)
15122 .sr(4)
15123 .m(5)
15124 .n(n)
15125 .k(4)
15126 .iterations(1)
15127 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15128 }
15129 }
15130
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_lt_4)15131 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_lt_4) {
15132 TEST_REQUIRES_X86_SSE;
15133 for (size_t k = 1; k < 4; k++) {
15134 GemmMicrokernelTester()
15135 .mr(5)
15136 .nr(8)
15137 .kr(1)
15138 .sr(4)
15139 .m(5)
15140 .n(8)
15141 .k(k)
15142 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15143 }
15144 }
15145
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_lt_4_strided_a)15146 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_lt_4_strided_a) {
15147 TEST_REQUIRES_X86_SSE;
15148 for (size_t k = 1; k < 4; k++) {
15149 GemmMicrokernelTester()
15150 .mr(5)
15151 .nr(8)
15152 .kr(1)
15153 .sr(4)
15154 .m(5)
15155 .n(8)
15156 .k(k)
15157 .a_stride(7)
15158 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15159 }
15160 }
15161
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_lt_4_subtile)15162 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_lt_4_subtile) {
15163 TEST_REQUIRES_X86_SSE;
15164 for (size_t k = 1; k < 4; k++) {
15165 for (uint32_t n = 1; n <= 8; n++) {
15166 for (uint32_t m = 1; m <= 5; m++) {
15167 GemmMicrokernelTester()
15168 .mr(5)
15169 .nr(8)
15170 .kr(1)
15171 .sr(4)
15172 .m(m)
15173 .n(n)
15174 .k(k)
15175 .iterations(1)
15176 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15177 }
15178 }
15179 }
15180 }
15181
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_gt_4)15182 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_gt_4) {
15183 TEST_REQUIRES_X86_SSE;
15184 for (size_t k = 5; k < 8; k++) {
15185 GemmMicrokernelTester()
15186 .mr(5)
15187 .nr(8)
15188 .kr(1)
15189 .sr(4)
15190 .m(5)
15191 .n(8)
15192 .k(k)
15193 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15194 }
15195 }
15196
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_gt_4_strided_a)15197 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_gt_4_strided_a) {
15198 TEST_REQUIRES_X86_SSE;
15199 for (size_t k = 5; k < 8; k++) {
15200 GemmMicrokernelTester()
15201 .mr(5)
15202 .nr(8)
15203 .kr(1)
15204 .sr(4)
15205 .m(5)
15206 .n(8)
15207 .k(k)
15208 .a_stride(11)
15209 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15210 }
15211 }
15212
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_gt_4_subtile)15213 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_gt_4_subtile) {
15214 TEST_REQUIRES_X86_SSE;
15215 for (size_t k = 5; k < 8; k++) {
15216 for (uint32_t n = 1; n <= 8; n++) {
15217 for (uint32_t m = 1; m <= 5; m++) {
15218 GemmMicrokernelTester()
15219 .mr(5)
15220 .nr(8)
15221 .kr(1)
15222 .sr(4)
15223 .m(m)
15224 .n(n)
15225 .k(k)
15226 .iterations(1)
15227 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15228 }
15229 }
15230 }
15231 }
15232
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_div_4)15233 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_div_4) {
15234 TEST_REQUIRES_X86_SSE;
15235 for (size_t k = 8; k <= 40; k += 4) {
15236 GemmMicrokernelTester()
15237 .mr(5)
15238 .nr(8)
15239 .kr(1)
15240 .sr(4)
15241 .m(5)
15242 .n(8)
15243 .k(k)
15244 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15245 }
15246 }
15247
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_div_4_strided_a)15248 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_div_4_strided_a) {
15249 TEST_REQUIRES_X86_SSE;
15250 for (size_t k = 8; k <= 40; k += 4) {
15251 GemmMicrokernelTester()
15252 .mr(5)
15253 .nr(8)
15254 .kr(1)
15255 .sr(4)
15256 .m(5)
15257 .n(8)
15258 .k(k)
15259 .a_stride(43)
15260 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15261 }
15262 }
15263
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,k_div_4_subtile)15264 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_div_4_subtile) {
15265 TEST_REQUIRES_X86_SSE;
15266 for (size_t k = 8; k <= 40; k += 4) {
15267 for (uint32_t n = 1; n <= 8; n++) {
15268 for (uint32_t m = 1; m <= 5; m++) {
15269 GemmMicrokernelTester()
15270 .mr(5)
15271 .nr(8)
15272 .kr(1)
15273 .sr(4)
15274 .m(m)
15275 .n(n)
15276 .k(k)
15277 .iterations(1)
15278 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15279 }
15280 }
15281 }
15282 }
15283
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_gt_8)15284 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8) {
15285 TEST_REQUIRES_X86_SSE;
15286 for (uint32_t n = 9; n < 16; n++) {
15287 for (size_t k = 1; k <= 20; k += 5) {
15288 GemmMicrokernelTester()
15289 .mr(5)
15290 .nr(8)
15291 .kr(1)
15292 .sr(4)
15293 .m(5)
15294 .n(n)
15295 .k(k)
15296 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15297 }
15298 }
15299 }
15300
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_gt_8_strided_cn)15301 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8_strided_cn) {
15302 TEST_REQUIRES_X86_SSE;
15303 for (uint32_t n = 9; n < 16; n++) {
15304 for (size_t k = 1; k <= 20; k += 5) {
15305 GemmMicrokernelTester()
15306 .mr(5)
15307 .nr(8)
15308 .kr(1)
15309 .sr(4)
15310 .m(5)
15311 .n(n)
15312 .k(k)
15313 .cn_stride(11)
15314 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15315 }
15316 }
15317 }
15318
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_gt_8_strided_a)15319 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8_strided_a) {
15320 TEST_REQUIRES_X86_SSE;
15321 for (uint32_t n = 9; n < 16; n++) {
15322 for (size_t k = 1; k <= 20; k += 5) {
15323 GemmMicrokernelTester()
15324 .mr(5)
15325 .nr(8)
15326 .kr(1)
15327 .sr(4)
15328 .m(5)
15329 .n(n)
15330 .k(k)
15331 .a_stride(23)
15332 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15333 }
15334 }
15335 }
15336
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_gt_8_subtile)15337 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8_subtile) {
15338 TEST_REQUIRES_X86_SSE;
15339 for (uint32_t n = 9; n < 16; n++) {
15340 for (size_t k = 1; k <= 20; k += 5) {
15341 for (uint32_t m = 1; m <= 5; m++) {
15342 GemmMicrokernelTester()
15343 .mr(5)
15344 .nr(8)
15345 .kr(1)
15346 .sr(4)
15347 .m(m)
15348 .n(n)
15349 .k(k)
15350 .iterations(1)
15351 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15352 }
15353 }
15354 }
15355 }
15356
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_div_8)15357 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8) {
15358 TEST_REQUIRES_X86_SSE;
15359 for (uint32_t n = 16; n <= 24; n += 8) {
15360 for (size_t k = 1; k <= 20; k += 5) {
15361 GemmMicrokernelTester()
15362 .mr(5)
15363 .nr(8)
15364 .kr(1)
15365 .sr(4)
15366 .m(5)
15367 .n(n)
15368 .k(k)
15369 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15370 }
15371 }
15372 }
15373
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_div_8_strided_cn)15374 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8_strided_cn) {
15375 TEST_REQUIRES_X86_SSE;
15376 for (uint32_t n = 16; n <= 24; n += 8) {
15377 for (size_t k = 1; k <= 20; k += 5) {
15378 GemmMicrokernelTester()
15379 .mr(5)
15380 .nr(8)
15381 .kr(1)
15382 .sr(4)
15383 .m(5)
15384 .n(n)
15385 .k(k)
15386 .cn_stride(11)
15387 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15388 }
15389 }
15390 }
15391
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_div_8_strided_a)15392 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8_strided_a) {
15393 TEST_REQUIRES_X86_SSE;
15394 for (uint32_t n = 16; n <= 24; n += 8) {
15395 for (size_t k = 1; k <= 20; k += 5) {
15396 GemmMicrokernelTester()
15397 .mr(5)
15398 .nr(8)
15399 .kr(1)
15400 .sr(4)
15401 .m(5)
15402 .n(n)
15403 .k(k)
15404 .a_stride(23)
15405 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15406 }
15407 }
15408 }
15409
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,n_div_8_subtile)15410 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8_subtile) {
15411 TEST_REQUIRES_X86_SSE;
15412 for (uint32_t n = 16; n <= 24; n += 8) {
15413 for (size_t k = 1; k <= 20; k += 5) {
15414 for (uint32_t m = 1; m <= 5; m++) {
15415 GemmMicrokernelTester()
15416 .mr(5)
15417 .nr(8)
15418 .kr(1)
15419 .sr(4)
15420 .m(m)
15421 .n(n)
15422 .k(k)
15423 .iterations(1)
15424 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15425 }
15426 }
15427 }
15428 }
15429
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,strided_cm_subtile)15430 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, strided_cm_subtile) {
15431 TEST_REQUIRES_X86_SSE;
15432 for (size_t k = 1; k <= 20; k += 5) {
15433 for (uint32_t n = 1; n <= 8; n++) {
15434 for (uint32_t m = 1; m <= 5; m++) {
15435 GemmMicrokernelTester()
15436 .mr(5)
15437 .nr(8)
15438 .kr(1)
15439 .sr(4)
15440 .m(m)
15441 .n(n)
15442 .k(k)
15443 .cm_stride(11)
15444 .iterations(1)
15445 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15446 }
15447 }
15448 }
15449 }
15450
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,qmin)15451 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, qmin) {
15452 TEST_REQUIRES_X86_SSE;
15453 GemmMicrokernelTester()
15454 .mr(5)
15455 .nr(8)
15456 .kr(1)
15457 .sr(4)
15458 .m(5)
15459 .n(8)
15460 .k(4)
15461 .qmin(128)
15462 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15463 }
15464
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,qmax)15465 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, qmax) {
15466 TEST_REQUIRES_X86_SSE;
15467 GemmMicrokernelTester()
15468 .mr(5)
15469 .nr(8)
15470 .kr(1)
15471 .sr(4)
15472 .m(5)
15473 .n(8)
15474 .k(4)
15475 .qmax(128)
15476 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15477 }
15478
TEST(F32_GEMMINC_MINMAX_5X8S4__SSE,strided_cm)15479 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, strided_cm) {
15480 TEST_REQUIRES_X86_SSE;
15481 GemmMicrokernelTester()
15482 .mr(5)
15483 .nr(8)
15484 .kr(1)
15485 .sr(4)
15486 .m(5)
15487 .n(8)
15488 .k(4)
15489 .cm_stride(11)
15490 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
15491 }
15492 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15493
15494
15495 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_eq_1)15496 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1) {
15497 TEST_REQUIRES_X86_AVX;
15498 GemmMicrokernelTester()
15499 .mr(3)
15500 .nr(16)
15501 .kr(1)
15502 .sr(1)
15503 .m(3)
15504 .n(16)
15505 .k(1)
15506 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15507 }
15508
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,strided_cn)15509 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, strided_cn) {
15510 TEST_REQUIRES_X86_AVX;
15511 GemmMicrokernelTester()
15512 .mr(3)
15513 .nr(16)
15514 .kr(1)
15515 .sr(1)
15516 .m(3)
15517 .n(16)
15518 .k(1)
15519 .cn_stride(19)
15520 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15521 }
15522
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_eq_1_strided_a)15523 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_strided_a) {
15524 TEST_REQUIRES_X86_AVX;
15525 GemmMicrokernelTester()
15526 .mr(3)
15527 .nr(16)
15528 .kr(1)
15529 .sr(1)
15530 .m(3)
15531 .n(16)
15532 .k(1)
15533 .a_stride(3)
15534 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15535 }
15536
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_eq_1_subtile)15537 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile) {
15538 TEST_REQUIRES_X86_AVX;
15539 for (uint32_t n = 1; n <= 16; n++) {
15540 for (uint32_t m = 1; m <= 3; m++) {
15541 GemmMicrokernelTester()
15542 .mr(3)
15543 .nr(16)
15544 .kr(1)
15545 .sr(1)
15546 .m(m)
15547 .n(n)
15548 .k(1)
15549 .iterations(1)
15550 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15551 }
15552 }
15553 }
15554
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_eq_1_subtile_m)15555 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_m) {
15556 TEST_REQUIRES_X86_AVX;
15557 for (uint32_t m = 1; m <= 3; m++) {
15558 GemmMicrokernelTester()
15559 .mr(3)
15560 .nr(16)
15561 .kr(1)
15562 .sr(1)
15563 .m(m)
15564 .n(16)
15565 .k(1)
15566 .iterations(1)
15567 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15568 }
15569 }
15570
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_eq_1_subtile_n)15571 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_n) {
15572 TEST_REQUIRES_X86_AVX;
15573 for (uint32_t n = 1; n <= 16; n++) {
15574 GemmMicrokernelTester()
15575 .mr(3)
15576 .nr(16)
15577 .kr(1)
15578 .sr(1)
15579 .m(3)
15580 .n(n)
15581 .k(1)
15582 .iterations(1)
15583 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15584 }
15585 }
15586
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_gt_1)15587 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_gt_1) {
15588 TEST_REQUIRES_X86_AVX;
15589 for (size_t k = 2; k < 10; k++) {
15590 GemmMicrokernelTester()
15591 .mr(3)
15592 .nr(16)
15593 .kr(1)
15594 .sr(1)
15595 .m(3)
15596 .n(16)
15597 .k(k)
15598 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15599 }
15600 }
15601
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_gt_1_strided_a)15602 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_gt_1_strided_a) {
15603 TEST_REQUIRES_X86_AVX;
15604 for (size_t k = 2; k < 10; k++) {
15605 GemmMicrokernelTester()
15606 .mr(3)
15607 .nr(16)
15608 .kr(1)
15609 .sr(1)
15610 .m(3)
15611 .n(16)
15612 .k(k)
15613 .a_stride(11)
15614 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15615 }
15616 }
15617
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,k_gt_1_subtile)15618 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_gt_1_subtile) {
15619 TEST_REQUIRES_X86_AVX;
15620 for (size_t k = 2; k < 10; k++) {
15621 for (uint32_t n = 1; n <= 16; n++) {
15622 for (uint32_t m = 1; m <= 3; m++) {
15623 GemmMicrokernelTester()
15624 .mr(3)
15625 .nr(16)
15626 .kr(1)
15627 .sr(1)
15628 .m(m)
15629 .n(n)
15630 .k(k)
15631 .iterations(1)
15632 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15633 }
15634 }
15635 }
15636 }
15637
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_gt_16)15638 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16) {
15639 TEST_REQUIRES_X86_AVX;
15640 for (uint32_t n = 17; n < 32; n++) {
15641 for (size_t k = 1; k <= 5; k += 2) {
15642 GemmMicrokernelTester()
15643 .mr(3)
15644 .nr(16)
15645 .kr(1)
15646 .sr(1)
15647 .m(3)
15648 .n(n)
15649 .k(k)
15650 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15651 }
15652 }
15653 }
15654
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_gt_16_strided_cn)15655 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_cn) {
15656 TEST_REQUIRES_X86_AVX;
15657 for (uint32_t n = 17; n < 32; n++) {
15658 for (size_t k = 1; k <= 5; k += 2) {
15659 GemmMicrokernelTester()
15660 .mr(3)
15661 .nr(16)
15662 .kr(1)
15663 .sr(1)
15664 .m(3)
15665 .n(n)
15666 .k(k)
15667 .cn_stride(19)
15668 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15669 }
15670 }
15671 }
15672
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_gt_16_strided_a)15673 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_a) {
15674 TEST_REQUIRES_X86_AVX;
15675 for (uint32_t n = 17; n < 32; n++) {
15676 for (size_t k = 1; k <= 5; k += 2) {
15677 GemmMicrokernelTester()
15678 .mr(3)
15679 .nr(16)
15680 .kr(1)
15681 .sr(1)
15682 .m(3)
15683 .n(n)
15684 .k(k)
15685 .a_stride(7)
15686 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15687 }
15688 }
15689 }
15690
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_gt_16_subtile)15691 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16_subtile) {
15692 TEST_REQUIRES_X86_AVX;
15693 for (uint32_t n = 17; n < 32; n++) {
15694 for (size_t k = 1; k <= 5; k += 2) {
15695 for (uint32_t m = 1; m <= 3; m++) {
15696 GemmMicrokernelTester()
15697 .mr(3)
15698 .nr(16)
15699 .kr(1)
15700 .sr(1)
15701 .m(m)
15702 .n(n)
15703 .k(k)
15704 .iterations(1)
15705 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15706 }
15707 }
15708 }
15709 }
15710
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_div_16)15711 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16) {
15712 TEST_REQUIRES_X86_AVX;
15713 for (uint32_t n = 32; n <= 48; n += 16) {
15714 for (size_t k = 1; k <= 5; k += 2) {
15715 GemmMicrokernelTester()
15716 .mr(3)
15717 .nr(16)
15718 .kr(1)
15719 .sr(1)
15720 .m(3)
15721 .n(n)
15722 .k(k)
15723 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15724 }
15725 }
15726 }
15727
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_div_16_strided_cn)15728 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_cn) {
15729 TEST_REQUIRES_X86_AVX;
15730 for (uint32_t n = 32; n <= 48; n += 16) {
15731 for (size_t k = 1; k <= 5; k += 2) {
15732 GemmMicrokernelTester()
15733 .mr(3)
15734 .nr(16)
15735 .kr(1)
15736 .sr(1)
15737 .m(3)
15738 .n(n)
15739 .k(k)
15740 .cn_stride(19)
15741 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15742 }
15743 }
15744 }
15745
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_div_16_strided_a)15746 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_a) {
15747 TEST_REQUIRES_X86_AVX;
15748 for (uint32_t n = 32; n <= 48; n += 16) {
15749 for (size_t k = 1; k <= 5; k += 2) {
15750 GemmMicrokernelTester()
15751 .mr(3)
15752 .nr(16)
15753 .kr(1)
15754 .sr(1)
15755 .m(3)
15756 .n(n)
15757 .k(k)
15758 .a_stride(7)
15759 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15760 }
15761 }
15762 }
15763
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,n_div_16_subtile)15764 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16_subtile) {
15765 TEST_REQUIRES_X86_AVX;
15766 for (uint32_t n = 32; n <= 48; n += 16) {
15767 for (size_t k = 1; k <= 5; k += 2) {
15768 for (uint32_t m = 1; m <= 3; m++) {
15769 GemmMicrokernelTester()
15770 .mr(3)
15771 .nr(16)
15772 .kr(1)
15773 .sr(1)
15774 .m(m)
15775 .n(n)
15776 .k(k)
15777 .iterations(1)
15778 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15779 }
15780 }
15781 }
15782 }
15783
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,strided_cm_subtile)15784 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, strided_cm_subtile) {
15785 TEST_REQUIRES_X86_AVX;
15786 for (size_t k = 1; k <= 5; k += 2) {
15787 for (uint32_t n = 1; n <= 16; n++) {
15788 for (uint32_t m = 1; m <= 3; m++) {
15789 GemmMicrokernelTester()
15790 .mr(3)
15791 .nr(16)
15792 .kr(1)
15793 .sr(1)
15794 .m(m)
15795 .n(n)
15796 .k(k)
15797 .cm_stride(19)
15798 .iterations(1)
15799 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15800 }
15801 }
15802 }
15803 }
15804
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,qmin)15805 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, qmin) {
15806 TEST_REQUIRES_X86_AVX;
15807 GemmMicrokernelTester()
15808 .mr(3)
15809 .nr(16)
15810 .kr(1)
15811 .sr(1)
15812 .m(3)
15813 .n(16)
15814 .k(1)
15815 .qmin(128)
15816 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15817 }
15818
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,qmax)15819 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, qmax) {
15820 TEST_REQUIRES_X86_AVX;
15821 GemmMicrokernelTester()
15822 .mr(3)
15823 .nr(16)
15824 .kr(1)
15825 .sr(1)
15826 .m(3)
15827 .n(16)
15828 .k(1)
15829 .qmax(128)
15830 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15831 }
15832
TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST,strided_cm)15833 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, strided_cm) {
15834 TEST_REQUIRES_X86_AVX;
15835 GemmMicrokernelTester()
15836 .mr(3)
15837 .nr(16)
15838 .kr(1)
15839 .sr(1)
15840 .m(3)
15841 .n(16)
15842 .k(1)
15843 .cm_stride(19)
15844 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15845 }
15846 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15847
15848
15849 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_eq_1)15850 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1) {
15851 TEST_REQUIRES_X86_AVX;
15852 GemmMicrokernelTester()
15853 .mr(4)
15854 .nr(8)
15855 .kr(1)
15856 .sr(1)
15857 .m(4)
15858 .n(8)
15859 .k(1)
15860 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15861 }
15862
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,strided_cn)15863 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, strided_cn) {
15864 TEST_REQUIRES_X86_AVX;
15865 GemmMicrokernelTester()
15866 .mr(4)
15867 .nr(8)
15868 .kr(1)
15869 .sr(1)
15870 .m(4)
15871 .n(8)
15872 .k(1)
15873 .cn_stride(11)
15874 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15875 }
15876
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_eq_1_strided_a)15877 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_strided_a) {
15878 TEST_REQUIRES_X86_AVX;
15879 GemmMicrokernelTester()
15880 .mr(4)
15881 .nr(8)
15882 .kr(1)
15883 .sr(1)
15884 .m(4)
15885 .n(8)
15886 .k(1)
15887 .a_stride(3)
15888 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15889 }
15890
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_eq_1_subtile)15891 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile) {
15892 TEST_REQUIRES_X86_AVX;
15893 for (uint32_t n = 1; n <= 8; n++) {
15894 for (uint32_t m = 1; m <= 4; m++) {
15895 GemmMicrokernelTester()
15896 .mr(4)
15897 .nr(8)
15898 .kr(1)
15899 .sr(1)
15900 .m(m)
15901 .n(n)
15902 .k(1)
15903 .iterations(1)
15904 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15905 }
15906 }
15907 }
15908
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_eq_1_subtile_m)15909 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_m) {
15910 TEST_REQUIRES_X86_AVX;
15911 for (uint32_t m = 1; m <= 4; m++) {
15912 GemmMicrokernelTester()
15913 .mr(4)
15914 .nr(8)
15915 .kr(1)
15916 .sr(1)
15917 .m(m)
15918 .n(8)
15919 .k(1)
15920 .iterations(1)
15921 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15922 }
15923 }
15924
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_eq_1_subtile_n)15925 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_n) {
15926 TEST_REQUIRES_X86_AVX;
15927 for (uint32_t n = 1; n <= 8; n++) {
15928 GemmMicrokernelTester()
15929 .mr(4)
15930 .nr(8)
15931 .kr(1)
15932 .sr(1)
15933 .m(4)
15934 .n(n)
15935 .k(1)
15936 .iterations(1)
15937 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15938 }
15939 }
15940
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_gt_1)15941 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_gt_1) {
15942 TEST_REQUIRES_X86_AVX;
15943 for (size_t k = 2; k < 10; k++) {
15944 GemmMicrokernelTester()
15945 .mr(4)
15946 .nr(8)
15947 .kr(1)
15948 .sr(1)
15949 .m(4)
15950 .n(8)
15951 .k(k)
15952 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15953 }
15954 }
15955
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_gt_1_strided_a)15956 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_gt_1_strided_a) {
15957 TEST_REQUIRES_X86_AVX;
15958 for (size_t k = 2; k < 10; k++) {
15959 GemmMicrokernelTester()
15960 .mr(4)
15961 .nr(8)
15962 .kr(1)
15963 .sr(1)
15964 .m(4)
15965 .n(8)
15966 .k(k)
15967 .a_stride(11)
15968 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15969 }
15970 }
15971
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,k_gt_1_subtile)15972 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_gt_1_subtile) {
15973 TEST_REQUIRES_X86_AVX;
15974 for (size_t k = 2; k < 10; k++) {
15975 for (uint32_t n = 1; n <= 8; n++) {
15976 for (uint32_t m = 1; m <= 4; m++) {
15977 GemmMicrokernelTester()
15978 .mr(4)
15979 .nr(8)
15980 .kr(1)
15981 .sr(1)
15982 .m(m)
15983 .n(n)
15984 .k(k)
15985 .iterations(1)
15986 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15987 }
15988 }
15989 }
15990 }
15991
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_gt_8)15992 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8) {
15993 TEST_REQUIRES_X86_AVX;
15994 for (uint32_t n = 9; n < 16; n++) {
15995 for (size_t k = 1; k <= 5; k += 2) {
15996 GemmMicrokernelTester()
15997 .mr(4)
15998 .nr(8)
15999 .kr(1)
16000 .sr(1)
16001 .m(4)
16002 .n(n)
16003 .k(k)
16004 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16005 }
16006 }
16007 }
16008
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_gt_8_strided_cn)16009 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8_strided_cn) {
16010 TEST_REQUIRES_X86_AVX;
16011 for (uint32_t n = 9; n < 16; n++) {
16012 for (size_t k = 1; k <= 5; k += 2) {
16013 GemmMicrokernelTester()
16014 .mr(4)
16015 .nr(8)
16016 .kr(1)
16017 .sr(1)
16018 .m(4)
16019 .n(n)
16020 .k(k)
16021 .cn_stride(11)
16022 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16023 }
16024 }
16025 }
16026
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_gt_8_strided_a)16027 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8_strided_a) {
16028 TEST_REQUIRES_X86_AVX;
16029 for (uint32_t n = 9; n < 16; n++) {
16030 for (size_t k = 1; k <= 5; k += 2) {
16031 GemmMicrokernelTester()
16032 .mr(4)
16033 .nr(8)
16034 .kr(1)
16035 .sr(1)
16036 .m(4)
16037 .n(n)
16038 .k(k)
16039 .a_stride(7)
16040 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16041 }
16042 }
16043 }
16044
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_gt_8_subtile)16045 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8_subtile) {
16046 TEST_REQUIRES_X86_AVX;
16047 for (uint32_t n = 9; n < 16; n++) {
16048 for (size_t k = 1; k <= 5; k += 2) {
16049 for (uint32_t m = 1; m <= 4; m++) {
16050 GemmMicrokernelTester()
16051 .mr(4)
16052 .nr(8)
16053 .kr(1)
16054 .sr(1)
16055 .m(m)
16056 .n(n)
16057 .k(k)
16058 .iterations(1)
16059 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16060 }
16061 }
16062 }
16063 }
16064
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_div_8)16065 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8) {
16066 TEST_REQUIRES_X86_AVX;
16067 for (uint32_t n = 16; n <= 24; n += 8) {
16068 for (size_t k = 1; k <= 5; k += 2) {
16069 GemmMicrokernelTester()
16070 .mr(4)
16071 .nr(8)
16072 .kr(1)
16073 .sr(1)
16074 .m(4)
16075 .n(n)
16076 .k(k)
16077 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16078 }
16079 }
16080 }
16081
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_div_8_strided_cn)16082 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8_strided_cn) {
16083 TEST_REQUIRES_X86_AVX;
16084 for (uint32_t n = 16; n <= 24; n += 8) {
16085 for (size_t k = 1; k <= 5; k += 2) {
16086 GemmMicrokernelTester()
16087 .mr(4)
16088 .nr(8)
16089 .kr(1)
16090 .sr(1)
16091 .m(4)
16092 .n(n)
16093 .k(k)
16094 .cn_stride(11)
16095 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16096 }
16097 }
16098 }
16099
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_div_8_strided_a)16100 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8_strided_a) {
16101 TEST_REQUIRES_X86_AVX;
16102 for (uint32_t n = 16; n <= 24; n += 8) {
16103 for (size_t k = 1; k <= 5; k += 2) {
16104 GemmMicrokernelTester()
16105 .mr(4)
16106 .nr(8)
16107 .kr(1)
16108 .sr(1)
16109 .m(4)
16110 .n(n)
16111 .k(k)
16112 .a_stride(7)
16113 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16114 }
16115 }
16116 }
16117
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,n_div_8_subtile)16118 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8_subtile) {
16119 TEST_REQUIRES_X86_AVX;
16120 for (uint32_t n = 16; n <= 24; n += 8) {
16121 for (size_t k = 1; k <= 5; k += 2) {
16122 for (uint32_t m = 1; m <= 4; m++) {
16123 GemmMicrokernelTester()
16124 .mr(4)
16125 .nr(8)
16126 .kr(1)
16127 .sr(1)
16128 .m(m)
16129 .n(n)
16130 .k(k)
16131 .iterations(1)
16132 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16133 }
16134 }
16135 }
16136 }
16137
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,strided_cm_subtile)16138 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, strided_cm_subtile) {
16139 TEST_REQUIRES_X86_AVX;
16140 for (size_t k = 1; k <= 5; k += 2) {
16141 for (uint32_t n = 1; n <= 8; n++) {
16142 for (uint32_t m = 1; m <= 4; m++) {
16143 GemmMicrokernelTester()
16144 .mr(4)
16145 .nr(8)
16146 .kr(1)
16147 .sr(1)
16148 .m(m)
16149 .n(n)
16150 .k(k)
16151 .cm_stride(11)
16152 .iterations(1)
16153 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16154 }
16155 }
16156 }
16157 }
16158
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,qmin)16159 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, qmin) {
16160 TEST_REQUIRES_X86_AVX;
16161 GemmMicrokernelTester()
16162 .mr(4)
16163 .nr(8)
16164 .kr(1)
16165 .sr(1)
16166 .m(4)
16167 .n(8)
16168 .k(1)
16169 .qmin(128)
16170 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16171 }
16172
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,qmax)16173 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, qmax) {
16174 TEST_REQUIRES_X86_AVX;
16175 GemmMicrokernelTester()
16176 .mr(4)
16177 .nr(8)
16178 .kr(1)
16179 .sr(1)
16180 .m(4)
16181 .n(8)
16182 .k(1)
16183 .qmax(128)
16184 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16185 }
16186
TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST,strided_cm)16187 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, strided_cm) {
16188 TEST_REQUIRES_X86_AVX;
16189 GemmMicrokernelTester()
16190 .mr(4)
16191 .nr(8)
16192 .kr(1)
16193 .sr(1)
16194 .m(4)
16195 .n(8)
16196 .k(1)
16197 .cm_stride(11)
16198 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16199 }
16200 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16201
16202
16203 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_eq_1)16204 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1) {
16205 TEST_REQUIRES_X86_AVX;
16206 GemmMicrokernelTester()
16207 .mr(7)
16208 .nr(8)
16209 .kr(1)
16210 .sr(1)
16211 .m(7)
16212 .n(8)
16213 .k(1)
16214 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16215 }
16216
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,strided_cn)16217 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, strided_cn) {
16218 TEST_REQUIRES_X86_AVX;
16219 GemmMicrokernelTester()
16220 .mr(7)
16221 .nr(8)
16222 .kr(1)
16223 .sr(1)
16224 .m(7)
16225 .n(8)
16226 .k(1)
16227 .cn_stride(11)
16228 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16229 }
16230
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_eq_1_strided_a)16231 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_strided_a) {
16232 TEST_REQUIRES_X86_AVX;
16233 GemmMicrokernelTester()
16234 .mr(7)
16235 .nr(8)
16236 .kr(1)
16237 .sr(1)
16238 .m(7)
16239 .n(8)
16240 .k(1)
16241 .a_stride(3)
16242 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16243 }
16244
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_eq_1_subtile)16245 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile) {
16246 TEST_REQUIRES_X86_AVX;
16247 for (uint32_t n = 1; n <= 8; n++) {
16248 for (uint32_t m = 1; m <= 7; m++) {
16249 GemmMicrokernelTester()
16250 .mr(7)
16251 .nr(8)
16252 .kr(1)
16253 .sr(1)
16254 .m(m)
16255 .n(n)
16256 .k(1)
16257 .iterations(1)
16258 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16259 }
16260 }
16261 }
16262
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_eq_1_subtile_m)16263 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_m) {
16264 TEST_REQUIRES_X86_AVX;
16265 for (uint32_t m = 1; m <= 7; m++) {
16266 GemmMicrokernelTester()
16267 .mr(7)
16268 .nr(8)
16269 .kr(1)
16270 .sr(1)
16271 .m(m)
16272 .n(8)
16273 .k(1)
16274 .iterations(1)
16275 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16276 }
16277 }
16278
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_eq_1_subtile_n)16279 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_n) {
16280 TEST_REQUIRES_X86_AVX;
16281 for (uint32_t n = 1; n <= 8; n++) {
16282 GemmMicrokernelTester()
16283 .mr(7)
16284 .nr(8)
16285 .kr(1)
16286 .sr(1)
16287 .m(7)
16288 .n(n)
16289 .k(1)
16290 .iterations(1)
16291 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16292 }
16293 }
16294
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_gt_1)16295 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_gt_1) {
16296 TEST_REQUIRES_X86_AVX;
16297 for (size_t k = 2; k < 10; k++) {
16298 GemmMicrokernelTester()
16299 .mr(7)
16300 .nr(8)
16301 .kr(1)
16302 .sr(1)
16303 .m(7)
16304 .n(8)
16305 .k(k)
16306 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16307 }
16308 }
16309
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_gt_1_strided_a)16310 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_gt_1_strided_a) {
16311 TEST_REQUIRES_X86_AVX;
16312 for (size_t k = 2; k < 10; k++) {
16313 GemmMicrokernelTester()
16314 .mr(7)
16315 .nr(8)
16316 .kr(1)
16317 .sr(1)
16318 .m(7)
16319 .n(8)
16320 .k(k)
16321 .a_stride(11)
16322 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16323 }
16324 }
16325
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,k_gt_1_subtile)16326 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_gt_1_subtile) {
16327 TEST_REQUIRES_X86_AVX;
16328 for (size_t k = 2; k < 10; k++) {
16329 for (uint32_t n = 1; n <= 8; n++) {
16330 for (uint32_t m = 1; m <= 7; m++) {
16331 GemmMicrokernelTester()
16332 .mr(7)
16333 .nr(8)
16334 .kr(1)
16335 .sr(1)
16336 .m(m)
16337 .n(n)
16338 .k(k)
16339 .iterations(1)
16340 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16341 }
16342 }
16343 }
16344 }
16345
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_gt_8)16346 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8) {
16347 TEST_REQUIRES_X86_AVX;
16348 for (uint32_t n = 9; n < 16; n++) {
16349 for (size_t k = 1; k <= 5; k += 2) {
16350 GemmMicrokernelTester()
16351 .mr(7)
16352 .nr(8)
16353 .kr(1)
16354 .sr(1)
16355 .m(7)
16356 .n(n)
16357 .k(k)
16358 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16359 }
16360 }
16361 }
16362
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_gt_8_strided_cn)16363 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8_strided_cn) {
16364 TEST_REQUIRES_X86_AVX;
16365 for (uint32_t n = 9; n < 16; n++) {
16366 for (size_t k = 1; k <= 5; k += 2) {
16367 GemmMicrokernelTester()
16368 .mr(7)
16369 .nr(8)
16370 .kr(1)
16371 .sr(1)
16372 .m(7)
16373 .n(n)
16374 .k(k)
16375 .cn_stride(11)
16376 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16377 }
16378 }
16379 }
16380
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_gt_8_strided_a)16381 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8_strided_a) {
16382 TEST_REQUIRES_X86_AVX;
16383 for (uint32_t n = 9; n < 16; n++) {
16384 for (size_t k = 1; k <= 5; k += 2) {
16385 GemmMicrokernelTester()
16386 .mr(7)
16387 .nr(8)
16388 .kr(1)
16389 .sr(1)
16390 .m(7)
16391 .n(n)
16392 .k(k)
16393 .a_stride(7)
16394 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16395 }
16396 }
16397 }
16398
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_gt_8_subtile)16399 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8_subtile) {
16400 TEST_REQUIRES_X86_AVX;
16401 for (uint32_t n = 9; n < 16; n++) {
16402 for (size_t k = 1; k <= 5; k += 2) {
16403 for (uint32_t m = 1; m <= 7; m++) {
16404 GemmMicrokernelTester()
16405 .mr(7)
16406 .nr(8)
16407 .kr(1)
16408 .sr(1)
16409 .m(m)
16410 .n(n)
16411 .k(k)
16412 .iterations(1)
16413 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16414 }
16415 }
16416 }
16417 }
16418
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_div_8)16419 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8) {
16420 TEST_REQUIRES_X86_AVX;
16421 for (uint32_t n = 16; n <= 24; n += 8) {
16422 for (size_t k = 1; k <= 5; k += 2) {
16423 GemmMicrokernelTester()
16424 .mr(7)
16425 .nr(8)
16426 .kr(1)
16427 .sr(1)
16428 .m(7)
16429 .n(n)
16430 .k(k)
16431 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16432 }
16433 }
16434 }
16435
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_div_8_strided_cn)16436 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8_strided_cn) {
16437 TEST_REQUIRES_X86_AVX;
16438 for (uint32_t n = 16; n <= 24; n += 8) {
16439 for (size_t k = 1; k <= 5; k += 2) {
16440 GemmMicrokernelTester()
16441 .mr(7)
16442 .nr(8)
16443 .kr(1)
16444 .sr(1)
16445 .m(7)
16446 .n(n)
16447 .k(k)
16448 .cn_stride(11)
16449 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16450 }
16451 }
16452 }
16453
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_div_8_strided_a)16454 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8_strided_a) {
16455 TEST_REQUIRES_X86_AVX;
16456 for (uint32_t n = 16; n <= 24; n += 8) {
16457 for (size_t k = 1; k <= 5; k += 2) {
16458 GemmMicrokernelTester()
16459 .mr(7)
16460 .nr(8)
16461 .kr(1)
16462 .sr(1)
16463 .m(7)
16464 .n(n)
16465 .k(k)
16466 .a_stride(7)
16467 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16468 }
16469 }
16470 }
16471
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,n_div_8_subtile)16472 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8_subtile) {
16473 TEST_REQUIRES_X86_AVX;
16474 for (uint32_t n = 16; n <= 24; n += 8) {
16475 for (size_t k = 1; k <= 5; k += 2) {
16476 for (uint32_t m = 1; m <= 7; m++) {
16477 GemmMicrokernelTester()
16478 .mr(7)
16479 .nr(8)
16480 .kr(1)
16481 .sr(1)
16482 .m(m)
16483 .n(n)
16484 .k(k)
16485 .iterations(1)
16486 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16487 }
16488 }
16489 }
16490 }
16491
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,strided_cm_subtile)16492 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, strided_cm_subtile) {
16493 TEST_REQUIRES_X86_AVX;
16494 for (size_t k = 1; k <= 5; k += 2) {
16495 for (uint32_t n = 1; n <= 8; n++) {
16496 for (uint32_t m = 1; m <= 7; m++) {
16497 GemmMicrokernelTester()
16498 .mr(7)
16499 .nr(8)
16500 .kr(1)
16501 .sr(1)
16502 .m(m)
16503 .n(n)
16504 .k(k)
16505 .cm_stride(11)
16506 .iterations(1)
16507 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16508 }
16509 }
16510 }
16511 }
16512
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,qmin)16513 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, qmin) {
16514 TEST_REQUIRES_X86_AVX;
16515 GemmMicrokernelTester()
16516 .mr(7)
16517 .nr(8)
16518 .kr(1)
16519 .sr(1)
16520 .m(7)
16521 .n(8)
16522 .k(1)
16523 .qmin(128)
16524 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16525 }
16526
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,qmax)16527 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, qmax) {
16528 TEST_REQUIRES_X86_AVX;
16529 GemmMicrokernelTester()
16530 .mr(7)
16531 .nr(8)
16532 .kr(1)
16533 .sr(1)
16534 .m(7)
16535 .n(8)
16536 .k(1)
16537 .qmax(128)
16538 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16539 }
16540
TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST,strided_cm)16541 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, strided_cm) {
16542 TEST_REQUIRES_X86_AVX;
16543 GemmMicrokernelTester()
16544 .mr(7)
16545 .nr(8)
16546 .kr(1)
16547 .sr(1)
16548 .m(7)
16549 .n(8)
16550 .k(1)
16551 .cm_stride(11)
16552 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16553 }
16554 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16555
16556
16557 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_eq_1)16558 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1) {
16559 TEST_REQUIRES_X86_FMA3;
16560 GemmMicrokernelTester()
16561 .mr(1)
16562 .nr(8)
16563 .kr(1)
16564 .sr(1)
16565 .m(1)
16566 .n(8)
16567 .k(1)
16568 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16569 }
16570
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,strided_cn)16571 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, strided_cn) {
16572 TEST_REQUIRES_X86_FMA3;
16573 GemmMicrokernelTester()
16574 .mr(1)
16575 .nr(8)
16576 .kr(1)
16577 .sr(1)
16578 .m(1)
16579 .n(8)
16580 .k(1)
16581 .cn_stride(11)
16582 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16583 }
16584
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_eq_1_strided_a)16585 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_strided_a) {
16586 TEST_REQUIRES_X86_FMA3;
16587 GemmMicrokernelTester()
16588 .mr(1)
16589 .nr(8)
16590 .kr(1)
16591 .sr(1)
16592 .m(1)
16593 .n(8)
16594 .k(1)
16595 .a_stride(3)
16596 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16597 }
16598
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_eq_1_subtile)16599 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile) {
16600 TEST_REQUIRES_X86_FMA3;
16601 for (uint32_t n = 1; n <= 8; n++) {
16602 for (uint32_t m = 1; m <= 1; m++) {
16603 GemmMicrokernelTester()
16604 .mr(1)
16605 .nr(8)
16606 .kr(1)
16607 .sr(1)
16608 .m(m)
16609 .n(n)
16610 .k(1)
16611 .iterations(1)
16612 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16613 }
16614 }
16615 }
16616
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_eq_1_subtile_m)16617 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
16618 TEST_REQUIRES_X86_FMA3;
16619 for (uint32_t m = 1; m <= 1; m++) {
16620 GemmMicrokernelTester()
16621 .mr(1)
16622 .nr(8)
16623 .kr(1)
16624 .sr(1)
16625 .m(m)
16626 .n(8)
16627 .k(1)
16628 .iterations(1)
16629 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16630 }
16631 }
16632
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_eq_1_subtile_n)16633 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
16634 TEST_REQUIRES_X86_FMA3;
16635 for (uint32_t n = 1; n <= 8; n++) {
16636 GemmMicrokernelTester()
16637 .mr(1)
16638 .nr(8)
16639 .kr(1)
16640 .sr(1)
16641 .m(1)
16642 .n(n)
16643 .k(1)
16644 .iterations(1)
16645 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16646 }
16647 }
16648
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_gt_1)16649 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_gt_1) {
16650 TEST_REQUIRES_X86_FMA3;
16651 for (size_t k = 2; k < 10; k++) {
16652 GemmMicrokernelTester()
16653 .mr(1)
16654 .nr(8)
16655 .kr(1)
16656 .sr(1)
16657 .m(1)
16658 .n(8)
16659 .k(k)
16660 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16661 }
16662 }
16663
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_gt_1_strided_a)16664 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_strided_a) {
16665 TEST_REQUIRES_X86_FMA3;
16666 for (size_t k = 2; k < 10; k++) {
16667 GemmMicrokernelTester()
16668 .mr(1)
16669 .nr(8)
16670 .kr(1)
16671 .sr(1)
16672 .m(1)
16673 .n(8)
16674 .k(k)
16675 .a_stride(11)
16676 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16677 }
16678 }
16679
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,k_gt_1_subtile)16680 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_subtile) {
16681 TEST_REQUIRES_X86_FMA3;
16682 for (size_t k = 2; k < 10; k++) {
16683 for (uint32_t n = 1; n <= 8; n++) {
16684 for (uint32_t m = 1; m <= 1; m++) {
16685 GemmMicrokernelTester()
16686 .mr(1)
16687 .nr(8)
16688 .kr(1)
16689 .sr(1)
16690 .m(m)
16691 .n(n)
16692 .k(k)
16693 .iterations(1)
16694 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16695 }
16696 }
16697 }
16698 }
16699
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_gt_8)16700 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8) {
16701 TEST_REQUIRES_X86_FMA3;
16702 for (uint32_t n = 9; n < 16; n++) {
16703 for (size_t k = 1; k <= 5; k += 2) {
16704 GemmMicrokernelTester()
16705 .mr(1)
16706 .nr(8)
16707 .kr(1)
16708 .sr(1)
16709 .m(1)
16710 .n(n)
16711 .k(k)
16712 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16713 }
16714 }
16715 }
16716
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_gt_8_strided_cn)16717 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
16718 TEST_REQUIRES_X86_FMA3;
16719 for (uint32_t n = 9; n < 16; n++) {
16720 for (size_t k = 1; k <= 5; k += 2) {
16721 GemmMicrokernelTester()
16722 .mr(1)
16723 .nr(8)
16724 .kr(1)
16725 .sr(1)
16726 .m(1)
16727 .n(n)
16728 .k(k)
16729 .cn_stride(11)
16730 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16731 }
16732 }
16733 }
16734
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_gt_8_strided_a)16735 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_a) {
16736 TEST_REQUIRES_X86_FMA3;
16737 for (uint32_t n = 9; n < 16; n++) {
16738 for (size_t k = 1; k <= 5; k += 2) {
16739 GemmMicrokernelTester()
16740 .mr(1)
16741 .nr(8)
16742 .kr(1)
16743 .sr(1)
16744 .m(1)
16745 .n(n)
16746 .k(k)
16747 .a_stride(7)
16748 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16749 }
16750 }
16751 }
16752
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_gt_8_subtile)16753 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_subtile) {
16754 TEST_REQUIRES_X86_FMA3;
16755 for (uint32_t n = 9; n < 16; n++) {
16756 for (size_t k = 1; k <= 5; k += 2) {
16757 for (uint32_t m = 1; m <= 1; m++) {
16758 GemmMicrokernelTester()
16759 .mr(1)
16760 .nr(8)
16761 .kr(1)
16762 .sr(1)
16763 .m(m)
16764 .n(n)
16765 .k(k)
16766 .iterations(1)
16767 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16768 }
16769 }
16770 }
16771 }
16772
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_div_8)16773 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8) {
16774 TEST_REQUIRES_X86_FMA3;
16775 for (uint32_t n = 16; n <= 24; n += 8) {
16776 for (size_t k = 1; k <= 5; k += 2) {
16777 GemmMicrokernelTester()
16778 .mr(1)
16779 .nr(8)
16780 .kr(1)
16781 .sr(1)
16782 .m(1)
16783 .n(n)
16784 .k(k)
16785 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16786 }
16787 }
16788 }
16789
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_div_8_strided_cn)16790 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_cn) {
16791 TEST_REQUIRES_X86_FMA3;
16792 for (uint32_t n = 16; n <= 24; n += 8) {
16793 for (size_t k = 1; k <= 5; k += 2) {
16794 GemmMicrokernelTester()
16795 .mr(1)
16796 .nr(8)
16797 .kr(1)
16798 .sr(1)
16799 .m(1)
16800 .n(n)
16801 .k(k)
16802 .cn_stride(11)
16803 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16804 }
16805 }
16806 }
16807
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_div_8_strided_a)16808 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_a) {
16809 TEST_REQUIRES_X86_FMA3;
16810 for (uint32_t n = 16; n <= 24; n += 8) {
16811 for (size_t k = 1; k <= 5; k += 2) {
16812 GemmMicrokernelTester()
16813 .mr(1)
16814 .nr(8)
16815 .kr(1)
16816 .sr(1)
16817 .m(1)
16818 .n(n)
16819 .k(k)
16820 .a_stride(7)
16821 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16822 }
16823 }
16824 }
16825
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,n_div_8_subtile)16826 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8_subtile) {
16827 TEST_REQUIRES_X86_FMA3;
16828 for (uint32_t n = 16; n <= 24; n += 8) {
16829 for (size_t k = 1; k <= 5; k += 2) {
16830 for (uint32_t m = 1; m <= 1; m++) {
16831 GemmMicrokernelTester()
16832 .mr(1)
16833 .nr(8)
16834 .kr(1)
16835 .sr(1)
16836 .m(m)
16837 .n(n)
16838 .k(k)
16839 .iterations(1)
16840 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16841 }
16842 }
16843 }
16844 }
16845
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,strided_cm_subtile)16846 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, strided_cm_subtile) {
16847 TEST_REQUIRES_X86_FMA3;
16848 for (size_t k = 1; k <= 5; k += 2) {
16849 for (uint32_t n = 1; n <= 8; n++) {
16850 for (uint32_t m = 1; m <= 1; m++) {
16851 GemmMicrokernelTester()
16852 .mr(1)
16853 .nr(8)
16854 .kr(1)
16855 .sr(1)
16856 .m(m)
16857 .n(n)
16858 .k(k)
16859 .cm_stride(11)
16860 .iterations(1)
16861 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16862 }
16863 }
16864 }
16865 }
16866
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,qmin)16867 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, qmin) {
16868 TEST_REQUIRES_X86_FMA3;
16869 GemmMicrokernelTester()
16870 .mr(1)
16871 .nr(8)
16872 .kr(1)
16873 .sr(1)
16874 .m(1)
16875 .n(8)
16876 .k(1)
16877 .qmin(128)
16878 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16879 }
16880
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,qmax)16881 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, qmax) {
16882 TEST_REQUIRES_X86_FMA3;
16883 GemmMicrokernelTester()
16884 .mr(1)
16885 .nr(8)
16886 .kr(1)
16887 .sr(1)
16888 .m(1)
16889 .n(8)
16890 .k(1)
16891 .qmax(128)
16892 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16893 }
16894
TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST,strided_cm)16895 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, strided_cm) {
16896 TEST_REQUIRES_X86_FMA3;
16897 GemmMicrokernelTester()
16898 .mr(1)
16899 .nr(8)
16900 .kr(1)
16901 .sr(1)
16902 .m(1)
16903 .n(8)
16904 .k(1)
16905 .cm_stride(11)
16906 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16907 }
16908 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16909
16910
16911 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_eq_1)16912 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1) {
16913 TEST_REQUIRES_X86_FMA3;
16914 GemmMicrokernelTester()
16915 .mr(1)
16916 .nr(16)
16917 .kr(1)
16918 .sr(1)
16919 .m(1)
16920 .n(16)
16921 .k(1)
16922 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16923 }
16924
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,strided_cn)16925 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, strided_cn) {
16926 TEST_REQUIRES_X86_FMA3;
16927 GemmMicrokernelTester()
16928 .mr(1)
16929 .nr(16)
16930 .kr(1)
16931 .sr(1)
16932 .m(1)
16933 .n(16)
16934 .k(1)
16935 .cn_stride(19)
16936 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16937 }
16938
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_eq_1_strided_a)16939 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_strided_a) {
16940 TEST_REQUIRES_X86_FMA3;
16941 GemmMicrokernelTester()
16942 .mr(1)
16943 .nr(16)
16944 .kr(1)
16945 .sr(1)
16946 .m(1)
16947 .n(16)
16948 .k(1)
16949 .a_stride(3)
16950 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16951 }
16952
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_eq_1_subtile)16953 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile) {
16954 TEST_REQUIRES_X86_FMA3;
16955 for (uint32_t n = 1; n <= 16; n++) {
16956 for (uint32_t m = 1; m <= 1; m++) {
16957 GemmMicrokernelTester()
16958 .mr(1)
16959 .nr(16)
16960 .kr(1)
16961 .sr(1)
16962 .m(m)
16963 .n(n)
16964 .k(1)
16965 .iterations(1)
16966 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16967 }
16968 }
16969 }
16970
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_eq_1_subtile_m)16971 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
16972 TEST_REQUIRES_X86_FMA3;
16973 for (uint32_t m = 1; m <= 1; m++) {
16974 GemmMicrokernelTester()
16975 .mr(1)
16976 .nr(16)
16977 .kr(1)
16978 .sr(1)
16979 .m(m)
16980 .n(16)
16981 .k(1)
16982 .iterations(1)
16983 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16984 }
16985 }
16986
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_eq_1_subtile_n)16987 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
16988 TEST_REQUIRES_X86_FMA3;
16989 for (uint32_t n = 1; n <= 16; n++) {
16990 GemmMicrokernelTester()
16991 .mr(1)
16992 .nr(16)
16993 .kr(1)
16994 .sr(1)
16995 .m(1)
16996 .n(n)
16997 .k(1)
16998 .iterations(1)
16999 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17000 }
17001 }
17002
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_gt_1)17003 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_gt_1) {
17004 TEST_REQUIRES_X86_FMA3;
17005 for (size_t k = 2; k < 10; k++) {
17006 GemmMicrokernelTester()
17007 .mr(1)
17008 .nr(16)
17009 .kr(1)
17010 .sr(1)
17011 .m(1)
17012 .n(16)
17013 .k(k)
17014 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17015 }
17016 }
17017
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_gt_1_strided_a)17018 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_strided_a) {
17019 TEST_REQUIRES_X86_FMA3;
17020 for (size_t k = 2; k < 10; k++) {
17021 GemmMicrokernelTester()
17022 .mr(1)
17023 .nr(16)
17024 .kr(1)
17025 .sr(1)
17026 .m(1)
17027 .n(16)
17028 .k(k)
17029 .a_stride(11)
17030 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17031 }
17032 }
17033
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,k_gt_1_subtile)17034 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_subtile) {
17035 TEST_REQUIRES_X86_FMA3;
17036 for (size_t k = 2; k < 10; k++) {
17037 for (uint32_t n = 1; n <= 16; n++) {
17038 for (uint32_t m = 1; m <= 1; m++) {
17039 GemmMicrokernelTester()
17040 .mr(1)
17041 .nr(16)
17042 .kr(1)
17043 .sr(1)
17044 .m(m)
17045 .n(n)
17046 .k(k)
17047 .iterations(1)
17048 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17049 }
17050 }
17051 }
17052 }
17053
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_gt_16)17054 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16) {
17055 TEST_REQUIRES_X86_FMA3;
17056 for (uint32_t n = 17; n < 32; n++) {
17057 for (size_t k = 1; k <= 5; k += 2) {
17058 GemmMicrokernelTester()
17059 .mr(1)
17060 .nr(16)
17061 .kr(1)
17062 .sr(1)
17063 .m(1)
17064 .n(n)
17065 .k(k)
17066 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17067 }
17068 }
17069 }
17070
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_gt_16_strided_cn)17071 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
17072 TEST_REQUIRES_X86_FMA3;
17073 for (uint32_t n = 17; n < 32; n++) {
17074 for (size_t k = 1; k <= 5; k += 2) {
17075 GemmMicrokernelTester()
17076 .mr(1)
17077 .nr(16)
17078 .kr(1)
17079 .sr(1)
17080 .m(1)
17081 .n(n)
17082 .k(k)
17083 .cn_stride(19)
17084 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17085 }
17086 }
17087 }
17088
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_gt_16_strided_a)17089 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_a) {
17090 TEST_REQUIRES_X86_FMA3;
17091 for (uint32_t n = 17; n < 32; n++) {
17092 for (size_t k = 1; k <= 5; k += 2) {
17093 GemmMicrokernelTester()
17094 .mr(1)
17095 .nr(16)
17096 .kr(1)
17097 .sr(1)
17098 .m(1)
17099 .n(n)
17100 .k(k)
17101 .a_stride(7)
17102 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17103 }
17104 }
17105 }
17106
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_gt_16_subtile)17107 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_subtile) {
17108 TEST_REQUIRES_X86_FMA3;
17109 for (uint32_t n = 17; n < 32; n++) {
17110 for (size_t k = 1; k <= 5; k += 2) {
17111 for (uint32_t m = 1; m <= 1; m++) {
17112 GemmMicrokernelTester()
17113 .mr(1)
17114 .nr(16)
17115 .kr(1)
17116 .sr(1)
17117 .m(m)
17118 .n(n)
17119 .k(k)
17120 .iterations(1)
17121 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17122 }
17123 }
17124 }
17125 }
17126
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_div_16)17127 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16) {
17128 TEST_REQUIRES_X86_FMA3;
17129 for (uint32_t n = 32; n <= 48; n += 16) {
17130 for (size_t k = 1; k <= 5; k += 2) {
17131 GemmMicrokernelTester()
17132 .mr(1)
17133 .nr(16)
17134 .kr(1)
17135 .sr(1)
17136 .m(1)
17137 .n(n)
17138 .k(k)
17139 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17140 }
17141 }
17142 }
17143
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_div_16_strided_cn)17144 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_cn) {
17145 TEST_REQUIRES_X86_FMA3;
17146 for (uint32_t n = 32; n <= 48; n += 16) {
17147 for (size_t k = 1; k <= 5; k += 2) {
17148 GemmMicrokernelTester()
17149 .mr(1)
17150 .nr(16)
17151 .kr(1)
17152 .sr(1)
17153 .m(1)
17154 .n(n)
17155 .k(k)
17156 .cn_stride(19)
17157 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17158 }
17159 }
17160 }
17161
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_div_16_strided_a)17162 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_a) {
17163 TEST_REQUIRES_X86_FMA3;
17164 for (uint32_t n = 32; n <= 48; n += 16) {
17165 for (size_t k = 1; k <= 5; k += 2) {
17166 GemmMicrokernelTester()
17167 .mr(1)
17168 .nr(16)
17169 .kr(1)
17170 .sr(1)
17171 .m(1)
17172 .n(n)
17173 .k(k)
17174 .a_stride(7)
17175 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17176 }
17177 }
17178 }
17179
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,n_div_16_subtile)17180 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16_subtile) {
17181 TEST_REQUIRES_X86_FMA3;
17182 for (uint32_t n = 32; n <= 48; n += 16) {
17183 for (size_t k = 1; k <= 5; k += 2) {
17184 for (uint32_t m = 1; m <= 1; m++) {
17185 GemmMicrokernelTester()
17186 .mr(1)
17187 .nr(16)
17188 .kr(1)
17189 .sr(1)
17190 .m(m)
17191 .n(n)
17192 .k(k)
17193 .iterations(1)
17194 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17195 }
17196 }
17197 }
17198 }
17199
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,strided_cm_subtile)17200 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, strided_cm_subtile) {
17201 TEST_REQUIRES_X86_FMA3;
17202 for (size_t k = 1; k <= 5; k += 2) {
17203 for (uint32_t n = 1; n <= 16; n++) {
17204 for (uint32_t m = 1; m <= 1; m++) {
17205 GemmMicrokernelTester()
17206 .mr(1)
17207 .nr(16)
17208 .kr(1)
17209 .sr(1)
17210 .m(m)
17211 .n(n)
17212 .k(k)
17213 .cm_stride(19)
17214 .iterations(1)
17215 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17216 }
17217 }
17218 }
17219 }
17220
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,qmin)17221 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, qmin) {
17222 TEST_REQUIRES_X86_FMA3;
17223 GemmMicrokernelTester()
17224 .mr(1)
17225 .nr(16)
17226 .kr(1)
17227 .sr(1)
17228 .m(1)
17229 .n(16)
17230 .k(1)
17231 .qmin(128)
17232 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17233 }
17234
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,qmax)17235 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, qmax) {
17236 TEST_REQUIRES_X86_FMA3;
17237 GemmMicrokernelTester()
17238 .mr(1)
17239 .nr(16)
17240 .kr(1)
17241 .sr(1)
17242 .m(1)
17243 .n(16)
17244 .k(1)
17245 .qmax(128)
17246 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17247 }
17248
TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST,strided_cm)17249 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, strided_cm) {
17250 TEST_REQUIRES_X86_FMA3;
17251 GemmMicrokernelTester()
17252 .mr(1)
17253 .nr(16)
17254 .kr(1)
17255 .sr(1)
17256 .m(1)
17257 .n(16)
17258 .k(1)
17259 .cm_stride(19)
17260 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17261 }
17262 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17263
17264
17265 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_eq_1)17266 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1) {
17267 TEST_REQUIRES_X86_FMA3;
17268 GemmMicrokernelTester()
17269 .mr(3)
17270 .nr(16)
17271 .kr(1)
17272 .sr(1)
17273 .m(3)
17274 .n(16)
17275 .k(1)
17276 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17277 }
17278
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,strided_cn)17279 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, strided_cn) {
17280 TEST_REQUIRES_X86_FMA3;
17281 GemmMicrokernelTester()
17282 .mr(3)
17283 .nr(16)
17284 .kr(1)
17285 .sr(1)
17286 .m(3)
17287 .n(16)
17288 .k(1)
17289 .cn_stride(19)
17290 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17291 }
17292
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_eq_1_strided_a)17293 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_strided_a) {
17294 TEST_REQUIRES_X86_FMA3;
17295 GemmMicrokernelTester()
17296 .mr(3)
17297 .nr(16)
17298 .kr(1)
17299 .sr(1)
17300 .m(3)
17301 .n(16)
17302 .k(1)
17303 .a_stride(3)
17304 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17305 }
17306
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_eq_1_subtile)17307 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile) {
17308 TEST_REQUIRES_X86_FMA3;
17309 for (uint32_t n = 1; n <= 16; n++) {
17310 for (uint32_t m = 1; m <= 3; m++) {
17311 GemmMicrokernelTester()
17312 .mr(3)
17313 .nr(16)
17314 .kr(1)
17315 .sr(1)
17316 .m(m)
17317 .n(n)
17318 .k(1)
17319 .iterations(1)
17320 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17321 }
17322 }
17323 }
17324
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_eq_1_subtile_m)17325 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
17326 TEST_REQUIRES_X86_FMA3;
17327 for (uint32_t m = 1; m <= 3; m++) {
17328 GemmMicrokernelTester()
17329 .mr(3)
17330 .nr(16)
17331 .kr(1)
17332 .sr(1)
17333 .m(m)
17334 .n(16)
17335 .k(1)
17336 .iterations(1)
17337 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17338 }
17339 }
17340
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_eq_1_subtile_n)17341 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
17342 TEST_REQUIRES_X86_FMA3;
17343 for (uint32_t n = 1; n <= 16; n++) {
17344 GemmMicrokernelTester()
17345 .mr(3)
17346 .nr(16)
17347 .kr(1)
17348 .sr(1)
17349 .m(3)
17350 .n(n)
17351 .k(1)
17352 .iterations(1)
17353 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17354 }
17355 }
17356
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_gt_1)17357 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_gt_1) {
17358 TEST_REQUIRES_X86_FMA3;
17359 for (size_t k = 2; k < 10; k++) {
17360 GemmMicrokernelTester()
17361 .mr(3)
17362 .nr(16)
17363 .kr(1)
17364 .sr(1)
17365 .m(3)
17366 .n(16)
17367 .k(k)
17368 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17369 }
17370 }
17371
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_gt_1_strided_a)17372 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_strided_a) {
17373 TEST_REQUIRES_X86_FMA3;
17374 for (size_t k = 2; k < 10; k++) {
17375 GemmMicrokernelTester()
17376 .mr(3)
17377 .nr(16)
17378 .kr(1)
17379 .sr(1)
17380 .m(3)
17381 .n(16)
17382 .k(k)
17383 .a_stride(11)
17384 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17385 }
17386 }
17387
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,k_gt_1_subtile)17388 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_subtile) {
17389 TEST_REQUIRES_X86_FMA3;
17390 for (size_t k = 2; k < 10; k++) {
17391 for (uint32_t n = 1; n <= 16; n++) {
17392 for (uint32_t m = 1; m <= 3; m++) {
17393 GemmMicrokernelTester()
17394 .mr(3)
17395 .nr(16)
17396 .kr(1)
17397 .sr(1)
17398 .m(m)
17399 .n(n)
17400 .k(k)
17401 .iterations(1)
17402 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17403 }
17404 }
17405 }
17406 }
17407
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_gt_16)17408 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16) {
17409 TEST_REQUIRES_X86_FMA3;
17410 for (uint32_t n = 17; n < 32; n++) {
17411 for (size_t k = 1; k <= 5; k += 2) {
17412 GemmMicrokernelTester()
17413 .mr(3)
17414 .nr(16)
17415 .kr(1)
17416 .sr(1)
17417 .m(3)
17418 .n(n)
17419 .k(k)
17420 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17421 }
17422 }
17423 }
17424
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_gt_16_strided_cn)17425 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
17426 TEST_REQUIRES_X86_FMA3;
17427 for (uint32_t n = 17; n < 32; n++) {
17428 for (size_t k = 1; k <= 5; k += 2) {
17429 GemmMicrokernelTester()
17430 .mr(3)
17431 .nr(16)
17432 .kr(1)
17433 .sr(1)
17434 .m(3)
17435 .n(n)
17436 .k(k)
17437 .cn_stride(19)
17438 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17439 }
17440 }
17441 }
17442
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_gt_16_strided_a)17443 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_a) {
17444 TEST_REQUIRES_X86_FMA3;
17445 for (uint32_t n = 17; n < 32; n++) {
17446 for (size_t k = 1; k <= 5; k += 2) {
17447 GemmMicrokernelTester()
17448 .mr(3)
17449 .nr(16)
17450 .kr(1)
17451 .sr(1)
17452 .m(3)
17453 .n(n)
17454 .k(k)
17455 .a_stride(7)
17456 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17457 }
17458 }
17459 }
17460
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_gt_16_subtile)17461 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_subtile) {
17462 TEST_REQUIRES_X86_FMA3;
17463 for (uint32_t n = 17; n < 32; n++) {
17464 for (size_t k = 1; k <= 5; k += 2) {
17465 for (uint32_t m = 1; m <= 3; m++) {
17466 GemmMicrokernelTester()
17467 .mr(3)
17468 .nr(16)
17469 .kr(1)
17470 .sr(1)
17471 .m(m)
17472 .n(n)
17473 .k(k)
17474 .iterations(1)
17475 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17476 }
17477 }
17478 }
17479 }
17480
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_div_16)17481 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16) {
17482 TEST_REQUIRES_X86_FMA3;
17483 for (uint32_t n = 32; n <= 48; n += 16) {
17484 for (size_t k = 1; k <= 5; k += 2) {
17485 GemmMicrokernelTester()
17486 .mr(3)
17487 .nr(16)
17488 .kr(1)
17489 .sr(1)
17490 .m(3)
17491 .n(n)
17492 .k(k)
17493 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17494 }
17495 }
17496 }
17497
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_div_16_strided_cn)17498 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_cn) {
17499 TEST_REQUIRES_X86_FMA3;
17500 for (uint32_t n = 32; n <= 48; n += 16) {
17501 for (size_t k = 1; k <= 5; k += 2) {
17502 GemmMicrokernelTester()
17503 .mr(3)
17504 .nr(16)
17505 .kr(1)
17506 .sr(1)
17507 .m(3)
17508 .n(n)
17509 .k(k)
17510 .cn_stride(19)
17511 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17512 }
17513 }
17514 }
17515
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_div_16_strided_a)17516 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_a) {
17517 TEST_REQUIRES_X86_FMA3;
17518 for (uint32_t n = 32; n <= 48; n += 16) {
17519 for (size_t k = 1; k <= 5; k += 2) {
17520 GemmMicrokernelTester()
17521 .mr(3)
17522 .nr(16)
17523 .kr(1)
17524 .sr(1)
17525 .m(3)
17526 .n(n)
17527 .k(k)
17528 .a_stride(7)
17529 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17530 }
17531 }
17532 }
17533
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,n_div_16_subtile)17534 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16_subtile) {
17535 TEST_REQUIRES_X86_FMA3;
17536 for (uint32_t n = 32; n <= 48; n += 16) {
17537 for (size_t k = 1; k <= 5; k += 2) {
17538 for (uint32_t m = 1; m <= 3; m++) {
17539 GemmMicrokernelTester()
17540 .mr(3)
17541 .nr(16)
17542 .kr(1)
17543 .sr(1)
17544 .m(m)
17545 .n(n)
17546 .k(k)
17547 .iterations(1)
17548 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17549 }
17550 }
17551 }
17552 }
17553
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,strided_cm_subtile)17554 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, strided_cm_subtile) {
17555 TEST_REQUIRES_X86_FMA3;
17556 for (size_t k = 1; k <= 5; k += 2) {
17557 for (uint32_t n = 1; n <= 16; n++) {
17558 for (uint32_t m = 1; m <= 3; m++) {
17559 GemmMicrokernelTester()
17560 .mr(3)
17561 .nr(16)
17562 .kr(1)
17563 .sr(1)
17564 .m(m)
17565 .n(n)
17566 .k(k)
17567 .cm_stride(19)
17568 .iterations(1)
17569 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17570 }
17571 }
17572 }
17573 }
17574
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,qmin)17575 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, qmin) {
17576 TEST_REQUIRES_X86_FMA3;
17577 GemmMicrokernelTester()
17578 .mr(3)
17579 .nr(16)
17580 .kr(1)
17581 .sr(1)
17582 .m(3)
17583 .n(16)
17584 .k(1)
17585 .qmin(128)
17586 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17587 }
17588
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,qmax)17589 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, qmax) {
17590 TEST_REQUIRES_X86_FMA3;
17591 GemmMicrokernelTester()
17592 .mr(3)
17593 .nr(16)
17594 .kr(1)
17595 .sr(1)
17596 .m(3)
17597 .n(16)
17598 .k(1)
17599 .qmax(128)
17600 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17601 }
17602
TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST,strided_cm)17603 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, strided_cm) {
17604 TEST_REQUIRES_X86_FMA3;
17605 GemmMicrokernelTester()
17606 .mr(3)
17607 .nr(16)
17608 .kr(1)
17609 .sr(1)
17610 .m(3)
17611 .n(16)
17612 .k(1)
17613 .cm_stride(19)
17614 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17615 }
17616 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17617
17618
17619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_eq_1)17620 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1) {
17621 TEST_REQUIRES_X86_FMA3;
17622 GemmMicrokernelTester()
17623 .mr(4)
17624 .nr(8)
17625 .kr(1)
17626 .sr(1)
17627 .m(4)
17628 .n(8)
17629 .k(1)
17630 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17631 }
17632
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,strided_cn)17633 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, strided_cn) {
17634 TEST_REQUIRES_X86_FMA3;
17635 GemmMicrokernelTester()
17636 .mr(4)
17637 .nr(8)
17638 .kr(1)
17639 .sr(1)
17640 .m(4)
17641 .n(8)
17642 .k(1)
17643 .cn_stride(11)
17644 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17645 }
17646
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_eq_1_strided_a)17647 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_strided_a) {
17648 TEST_REQUIRES_X86_FMA3;
17649 GemmMicrokernelTester()
17650 .mr(4)
17651 .nr(8)
17652 .kr(1)
17653 .sr(1)
17654 .m(4)
17655 .n(8)
17656 .k(1)
17657 .a_stride(3)
17658 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17659 }
17660
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_eq_1_subtile)17661 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile) {
17662 TEST_REQUIRES_X86_FMA3;
17663 for (uint32_t n = 1; n <= 8; n++) {
17664 for (uint32_t m = 1; m <= 4; m++) {
17665 GemmMicrokernelTester()
17666 .mr(4)
17667 .nr(8)
17668 .kr(1)
17669 .sr(1)
17670 .m(m)
17671 .n(n)
17672 .k(1)
17673 .iterations(1)
17674 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17675 }
17676 }
17677 }
17678
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_eq_1_subtile_m)17679 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
17680 TEST_REQUIRES_X86_FMA3;
17681 for (uint32_t m = 1; m <= 4; m++) {
17682 GemmMicrokernelTester()
17683 .mr(4)
17684 .nr(8)
17685 .kr(1)
17686 .sr(1)
17687 .m(m)
17688 .n(8)
17689 .k(1)
17690 .iterations(1)
17691 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17692 }
17693 }
17694
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_eq_1_subtile_n)17695 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
17696 TEST_REQUIRES_X86_FMA3;
17697 for (uint32_t n = 1; n <= 8; n++) {
17698 GemmMicrokernelTester()
17699 .mr(4)
17700 .nr(8)
17701 .kr(1)
17702 .sr(1)
17703 .m(4)
17704 .n(n)
17705 .k(1)
17706 .iterations(1)
17707 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17708 }
17709 }
17710
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_gt_1)17711 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_gt_1) {
17712 TEST_REQUIRES_X86_FMA3;
17713 for (size_t k = 2; k < 10; k++) {
17714 GemmMicrokernelTester()
17715 .mr(4)
17716 .nr(8)
17717 .kr(1)
17718 .sr(1)
17719 .m(4)
17720 .n(8)
17721 .k(k)
17722 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17723 }
17724 }
17725
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_gt_1_strided_a)17726 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_strided_a) {
17727 TEST_REQUIRES_X86_FMA3;
17728 for (size_t k = 2; k < 10; k++) {
17729 GemmMicrokernelTester()
17730 .mr(4)
17731 .nr(8)
17732 .kr(1)
17733 .sr(1)
17734 .m(4)
17735 .n(8)
17736 .k(k)
17737 .a_stride(11)
17738 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17739 }
17740 }
17741
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,k_gt_1_subtile)17742 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_subtile) {
17743 TEST_REQUIRES_X86_FMA3;
17744 for (size_t k = 2; k < 10; k++) {
17745 for (uint32_t n = 1; n <= 8; n++) {
17746 for (uint32_t m = 1; m <= 4; m++) {
17747 GemmMicrokernelTester()
17748 .mr(4)
17749 .nr(8)
17750 .kr(1)
17751 .sr(1)
17752 .m(m)
17753 .n(n)
17754 .k(k)
17755 .iterations(1)
17756 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17757 }
17758 }
17759 }
17760 }
17761
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_gt_8)17762 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8) {
17763 TEST_REQUIRES_X86_FMA3;
17764 for (uint32_t n = 9; n < 16; n++) {
17765 for (size_t k = 1; k <= 5; k += 2) {
17766 GemmMicrokernelTester()
17767 .mr(4)
17768 .nr(8)
17769 .kr(1)
17770 .sr(1)
17771 .m(4)
17772 .n(n)
17773 .k(k)
17774 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17775 }
17776 }
17777 }
17778
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_gt_8_strided_cn)17779 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
17780 TEST_REQUIRES_X86_FMA3;
17781 for (uint32_t n = 9; n < 16; n++) {
17782 for (size_t k = 1; k <= 5; k += 2) {
17783 GemmMicrokernelTester()
17784 .mr(4)
17785 .nr(8)
17786 .kr(1)
17787 .sr(1)
17788 .m(4)
17789 .n(n)
17790 .k(k)
17791 .cn_stride(11)
17792 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17793 }
17794 }
17795 }
17796
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_gt_8_strided_a)17797 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_a) {
17798 TEST_REQUIRES_X86_FMA3;
17799 for (uint32_t n = 9; n < 16; n++) {
17800 for (size_t k = 1; k <= 5; k += 2) {
17801 GemmMicrokernelTester()
17802 .mr(4)
17803 .nr(8)
17804 .kr(1)
17805 .sr(1)
17806 .m(4)
17807 .n(n)
17808 .k(k)
17809 .a_stride(7)
17810 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17811 }
17812 }
17813 }
17814
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_gt_8_subtile)17815 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_subtile) {
17816 TEST_REQUIRES_X86_FMA3;
17817 for (uint32_t n = 9; n < 16; n++) {
17818 for (size_t k = 1; k <= 5; k += 2) {
17819 for (uint32_t m = 1; m <= 4; m++) {
17820 GemmMicrokernelTester()
17821 .mr(4)
17822 .nr(8)
17823 .kr(1)
17824 .sr(1)
17825 .m(m)
17826 .n(n)
17827 .k(k)
17828 .iterations(1)
17829 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17830 }
17831 }
17832 }
17833 }
17834
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_div_8)17835 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8) {
17836 TEST_REQUIRES_X86_FMA3;
17837 for (uint32_t n = 16; n <= 24; n += 8) {
17838 for (size_t k = 1; k <= 5; k += 2) {
17839 GemmMicrokernelTester()
17840 .mr(4)
17841 .nr(8)
17842 .kr(1)
17843 .sr(1)
17844 .m(4)
17845 .n(n)
17846 .k(k)
17847 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17848 }
17849 }
17850 }
17851
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_div_8_strided_cn)17852 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_cn) {
17853 TEST_REQUIRES_X86_FMA3;
17854 for (uint32_t n = 16; n <= 24; n += 8) {
17855 for (size_t k = 1; k <= 5; k += 2) {
17856 GemmMicrokernelTester()
17857 .mr(4)
17858 .nr(8)
17859 .kr(1)
17860 .sr(1)
17861 .m(4)
17862 .n(n)
17863 .k(k)
17864 .cn_stride(11)
17865 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17866 }
17867 }
17868 }
17869
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_div_8_strided_a)17870 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_a) {
17871 TEST_REQUIRES_X86_FMA3;
17872 for (uint32_t n = 16; n <= 24; n += 8) {
17873 for (size_t k = 1; k <= 5; k += 2) {
17874 GemmMicrokernelTester()
17875 .mr(4)
17876 .nr(8)
17877 .kr(1)
17878 .sr(1)
17879 .m(4)
17880 .n(n)
17881 .k(k)
17882 .a_stride(7)
17883 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17884 }
17885 }
17886 }
17887
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,n_div_8_subtile)17888 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8_subtile) {
17889 TEST_REQUIRES_X86_FMA3;
17890 for (uint32_t n = 16; n <= 24; n += 8) {
17891 for (size_t k = 1; k <= 5; k += 2) {
17892 for (uint32_t m = 1; m <= 4; m++) {
17893 GemmMicrokernelTester()
17894 .mr(4)
17895 .nr(8)
17896 .kr(1)
17897 .sr(1)
17898 .m(m)
17899 .n(n)
17900 .k(k)
17901 .iterations(1)
17902 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17903 }
17904 }
17905 }
17906 }
17907
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,strided_cm_subtile)17908 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, strided_cm_subtile) {
17909 TEST_REQUIRES_X86_FMA3;
17910 for (size_t k = 1; k <= 5; k += 2) {
17911 for (uint32_t n = 1; n <= 8; n++) {
17912 for (uint32_t m = 1; m <= 4; m++) {
17913 GemmMicrokernelTester()
17914 .mr(4)
17915 .nr(8)
17916 .kr(1)
17917 .sr(1)
17918 .m(m)
17919 .n(n)
17920 .k(k)
17921 .cm_stride(11)
17922 .iterations(1)
17923 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17924 }
17925 }
17926 }
17927 }
17928
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,qmin)17929 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, qmin) {
17930 TEST_REQUIRES_X86_FMA3;
17931 GemmMicrokernelTester()
17932 .mr(4)
17933 .nr(8)
17934 .kr(1)
17935 .sr(1)
17936 .m(4)
17937 .n(8)
17938 .k(1)
17939 .qmin(128)
17940 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17941 }
17942
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,qmax)17943 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, qmax) {
17944 TEST_REQUIRES_X86_FMA3;
17945 GemmMicrokernelTester()
17946 .mr(4)
17947 .nr(8)
17948 .kr(1)
17949 .sr(1)
17950 .m(4)
17951 .n(8)
17952 .k(1)
17953 .qmax(128)
17954 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17955 }
17956
TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST,strided_cm)17957 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, strided_cm) {
17958 TEST_REQUIRES_X86_FMA3;
17959 GemmMicrokernelTester()
17960 .mr(4)
17961 .nr(8)
17962 .kr(1)
17963 .sr(1)
17964 .m(4)
17965 .n(8)
17966 .k(1)
17967 .cm_stride(11)
17968 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17969 }
17970 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17971
17972
17973 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_eq_1)17974 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1) {
17975 TEST_REQUIRES_X86_FMA3;
17976 GemmMicrokernelTester()
17977 .mr(5)
17978 .nr(8)
17979 .kr(1)
17980 .sr(1)
17981 .m(5)
17982 .n(8)
17983 .k(1)
17984 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17985 }
17986
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,strided_cn)17987 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, strided_cn) {
17988 TEST_REQUIRES_X86_FMA3;
17989 GemmMicrokernelTester()
17990 .mr(5)
17991 .nr(8)
17992 .kr(1)
17993 .sr(1)
17994 .m(5)
17995 .n(8)
17996 .k(1)
17997 .cn_stride(11)
17998 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17999 }
18000
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_eq_1_strided_a)18001 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_strided_a) {
18002 TEST_REQUIRES_X86_FMA3;
18003 GemmMicrokernelTester()
18004 .mr(5)
18005 .nr(8)
18006 .kr(1)
18007 .sr(1)
18008 .m(5)
18009 .n(8)
18010 .k(1)
18011 .a_stride(3)
18012 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18013 }
18014
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_eq_1_subtile)18015 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile) {
18016 TEST_REQUIRES_X86_FMA3;
18017 for (uint32_t n = 1; n <= 8; n++) {
18018 for (uint32_t m = 1; m <= 5; m++) {
18019 GemmMicrokernelTester()
18020 .mr(5)
18021 .nr(8)
18022 .kr(1)
18023 .sr(1)
18024 .m(m)
18025 .n(n)
18026 .k(1)
18027 .iterations(1)
18028 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18029 }
18030 }
18031 }
18032
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_eq_1_subtile_m)18033 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
18034 TEST_REQUIRES_X86_FMA3;
18035 for (uint32_t m = 1; m <= 5; m++) {
18036 GemmMicrokernelTester()
18037 .mr(5)
18038 .nr(8)
18039 .kr(1)
18040 .sr(1)
18041 .m(m)
18042 .n(8)
18043 .k(1)
18044 .iterations(1)
18045 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18046 }
18047 }
18048
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_eq_1_subtile_n)18049 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
18050 TEST_REQUIRES_X86_FMA3;
18051 for (uint32_t n = 1; n <= 8; n++) {
18052 GemmMicrokernelTester()
18053 .mr(5)
18054 .nr(8)
18055 .kr(1)
18056 .sr(1)
18057 .m(5)
18058 .n(n)
18059 .k(1)
18060 .iterations(1)
18061 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18062 }
18063 }
18064
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_gt_1)18065 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_gt_1) {
18066 TEST_REQUIRES_X86_FMA3;
18067 for (size_t k = 2; k < 10; k++) {
18068 GemmMicrokernelTester()
18069 .mr(5)
18070 .nr(8)
18071 .kr(1)
18072 .sr(1)
18073 .m(5)
18074 .n(8)
18075 .k(k)
18076 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18077 }
18078 }
18079
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_gt_1_strided_a)18080 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_strided_a) {
18081 TEST_REQUIRES_X86_FMA3;
18082 for (size_t k = 2; k < 10; k++) {
18083 GemmMicrokernelTester()
18084 .mr(5)
18085 .nr(8)
18086 .kr(1)
18087 .sr(1)
18088 .m(5)
18089 .n(8)
18090 .k(k)
18091 .a_stride(11)
18092 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18093 }
18094 }
18095
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,k_gt_1_subtile)18096 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_subtile) {
18097 TEST_REQUIRES_X86_FMA3;
18098 for (size_t k = 2; k < 10; k++) {
18099 for (uint32_t n = 1; n <= 8; n++) {
18100 for (uint32_t m = 1; m <= 5; m++) {
18101 GemmMicrokernelTester()
18102 .mr(5)
18103 .nr(8)
18104 .kr(1)
18105 .sr(1)
18106 .m(m)
18107 .n(n)
18108 .k(k)
18109 .iterations(1)
18110 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18111 }
18112 }
18113 }
18114 }
18115
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_gt_8)18116 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8) {
18117 TEST_REQUIRES_X86_FMA3;
18118 for (uint32_t n = 9; n < 16; n++) {
18119 for (size_t k = 1; k <= 5; k += 2) {
18120 GemmMicrokernelTester()
18121 .mr(5)
18122 .nr(8)
18123 .kr(1)
18124 .sr(1)
18125 .m(5)
18126 .n(n)
18127 .k(k)
18128 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18129 }
18130 }
18131 }
18132
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_gt_8_strided_cn)18133 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
18134 TEST_REQUIRES_X86_FMA3;
18135 for (uint32_t n = 9; n < 16; n++) {
18136 for (size_t k = 1; k <= 5; k += 2) {
18137 GemmMicrokernelTester()
18138 .mr(5)
18139 .nr(8)
18140 .kr(1)
18141 .sr(1)
18142 .m(5)
18143 .n(n)
18144 .k(k)
18145 .cn_stride(11)
18146 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18147 }
18148 }
18149 }
18150
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_gt_8_strided_a)18151 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_a) {
18152 TEST_REQUIRES_X86_FMA3;
18153 for (uint32_t n = 9; n < 16; n++) {
18154 for (size_t k = 1; k <= 5; k += 2) {
18155 GemmMicrokernelTester()
18156 .mr(5)
18157 .nr(8)
18158 .kr(1)
18159 .sr(1)
18160 .m(5)
18161 .n(n)
18162 .k(k)
18163 .a_stride(7)
18164 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18165 }
18166 }
18167 }
18168
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_gt_8_subtile)18169 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_subtile) {
18170 TEST_REQUIRES_X86_FMA3;
18171 for (uint32_t n = 9; n < 16; n++) {
18172 for (size_t k = 1; k <= 5; k += 2) {
18173 for (uint32_t m = 1; m <= 5; m++) {
18174 GemmMicrokernelTester()
18175 .mr(5)
18176 .nr(8)
18177 .kr(1)
18178 .sr(1)
18179 .m(m)
18180 .n(n)
18181 .k(k)
18182 .iterations(1)
18183 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18184 }
18185 }
18186 }
18187 }
18188
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_div_8)18189 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8) {
18190 TEST_REQUIRES_X86_FMA3;
18191 for (uint32_t n = 16; n <= 24; n += 8) {
18192 for (size_t k = 1; k <= 5; k += 2) {
18193 GemmMicrokernelTester()
18194 .mr(5)
18195 .nr(8)
18196 .kr(1)
18197 .sr(1)
18198 .m(5)
18199 .n(n)
18200 .k(k)
18201 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18202 }
18203 }
18204 }
18205
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_div_8_strided_cn)18206 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_cn) {
18207 TEST_REQUIRES_X86_FMA3;
18208 for (uint32_t n = 16; n <= 24; n += 8) {
18209 for (size_t k = 1; k <= 5; k += 2) {
18210 GemmMicrokernelTester()
18211 .mr(5)
18212 .nr(8)
18213 .kr(1)
18214 .sr(1)
18215 .m(5)
18216 .n(n)
18217 .k(k)
18218 .cn_stride(11)
18219 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18220 }
18221 }
18222 }
18223
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_div_8_strided_a)18224 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_a) {
18225 TEST_REQUIRES_X86_FMA3;
18226 for (uint32_t n = 16; n <= 24; n += 8) {
18227 for (size_t k = 1; k <= 5; k += 2) {
18228 GemmMicrokernelTester()
18229 .mr(5)
18230 .nr(8)
18231 .kr(1)
18232 .sr(1)
18233 .m(5)
18234 .n(n)
18235 .k(k)
18236 .a_stride(7)
18237 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18238 }
18239 }
18240 }
18241
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,n_div_8_subtile)18242 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8_subtile) {
18243 TEST_REQUIRES_X86_FMA3;
18244 for (uint32_t n = 16; n <= 24; n += 8) {
18245 for (size_t k = 1; k <= 5; k += 2) {
18246 for (uint32_t m = 1; m <= 5; m++) {
18247 GemmMicrokernelTester()
18248 .mr(5)
18249 .nr(8)
18250 .kr(1)
18251 .sr(1)
18252 .m(m)
18253 .n(n)
18254 .k(k)
18255 .iterations(1)
18256 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18257 }
18258 }
18259 }
18260 }
18261
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,strided_cm_subtile)18262 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, strided_cm_subtile) {
18263 TEST_REQUIRES_X86_FMA3;
18264 for (size_t k = 1; k <= 5; k += 2) {
18265 for (uint32_t n = 1; n <= 8; n++) {
18266 for (uint32_t m = 1; m <= 5; m++) {
18267 GemmMicrokernelTester()
18268 .mr(5)
18269 .nr(8)
18270 .kr(1)
18271 .sr(1)
18272 .m(m)
18273 .n(n)
18274 .k(k)
18275 .cm_stride(11)
18276 .iterations(1)
18277 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18278 }
18279 }
18280 }
18281 }
18282
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,qmin)18283 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, qmin) {
18284 TEST_REQUIRES_X86_FMA3;
18285 GemmMicrokernelTester()
18286 .mr(5)
18287 .nr(8)
18288 .kr(1)
18289 .sr(1)
18290 .m(5)
18291 .n(8)
18292 .k(1)
18293 .qmin(128)
18294 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18295 }
18296
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,qmax)18297 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, qmax) {
18298 TEST_REQUIRES_X86_FMA3;
18299 GemmMicrokernelTester()
18300 .mr(5)
18301 .nr(8)
18302 .kr(1)
18303 .sr(1)
18304 .m(5)
18305 .n(8)
18306 .k(1)
18307 .qmax(128)
18308 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18309 }
18310
TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST,strided_cm)18311 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, strided_cm) {
18312 TEST_REQUIRES_X86_FMA3;
18313 GemmMicrokernelTester()
18314 .mr(5)
18315 .nr(8)
18316 .kr(1)
18317 .sr(1)
18318 .m(5)
18319 .n(8)
18320 .k(1)
18321 .cm_stride(11)
18322 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18323 }
18324 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18325
18326
18327 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1)18328 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1) {
18329 TEST_REQUIRES_X86_AVX512F;
18330 GemmMicrokernelTester()
18331 .mr(1)
18332 .nr(16)
18333 .kr(1)
18334 .sr(1)
18335 .m(1)
18336 .n(16)
18337 .k(1)
18338 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18339 }
18340
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,strided_cn)18341 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, strided_cn) {
18342 TEST_REQUIRES_X86_AVX512F;
18343 GemmMicrokernelTester()
18344 .mr(1)
18345 .nr(16)
18346 .kr(1)
18347 .sr(1)
18348 .m(1)
18349 .n(16)
18350 .k(1)
18351 .cn_stride(19)
18352 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18353 }
18354
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1_strided_a)18355 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
18356 TEST_REQUIRES_X86_AVX512F;
18357 GemmMicrokernelTester()
18358 .mr(1)
18359 .nr(16)
18360 .kr(1)
18361 .sr(1)
18362 .m(1)
18363 .n(16)
18364 .k(1)
18365 .a_stride(3)
18366 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18367 }
18368
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1_subtile)18369 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile) {
18370 TEST_REQUIRES_X86_AVX512F;
18371 for (uint32_t n = 1; n <= 16; n++) {
18372 for (uint32_t m = 1; m <= 1; m++) {
18373 GemmMicrokernelTester()
18374 .mr(1)
18375 .nr(16)
18376 .kr(1)
18377 .sr(1)
18378 .m(m)
18379 .n(n)
18380 .k(1)
18381 .iterations(1)
18382 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18383 }
18384 }
18385 }
18386
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1_subtile_m)18387 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
18388 TEST_REQUIRES_X86_AVX512F;
18389 for (uint32_t m = 1; m <= 1; m++) {
18390 GemmMicrokernelTester()
18391 .mr(1)
18392 .nr(16)
18393 .kr(1)
18394 .sr(1)
18395 .m(m)
18396 .n(16)
18397 .k(1)
18398 .iterations(1)
18399 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18400 }
18401 }
18402
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_eq_1_subtile_n)18403 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
18404 TEST_REQUIRES_X86_AVX512F;
18405 for (uint32_t n = 1; n <= 16; n++) {
18406 GemmMicrokernelTester()
18407 .mr(1)
18408 .nr(16)
18409 .kr(1)
18410 .sr(1)
18411 .m(1)
18412 .n(n)
18413 .k(1)
18414 .iterations(1)
18415 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18416 }
18417 }
18418
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_gt_1)18419 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1) {
18420 TEST_REQUIRES_X86_AVX512F;
18421 for (size_t k = 2; k < 10; k++) {
18422 GemmMicrokernelTester()
18423 .mr(1)
18424 .nr(16)
18425 .kr(1)
18426 .sr(1)
18427 .m(1)
18428 .n(16)
18429 .k(k)
18430 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18431 }
18432 }
18433
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_gt_1_strided_a)18434 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
18435 TEST_REQUIRES_X86_AVX512F;
18436 for (size_t k = 2; k < 10; k++) {
18437 GemmMicrokernelTester()
18438 .mr(1)
18439 .nr(16)
18440 .kr(1)
18441 .sr(1)
18442 .m(1)
18443 .n(16)
18444 .k(k)
18445 .a_stride(11)
18446 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18447 }
18448 }
18449
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,k_gt_1_subtile)18450 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_subtile) {
18451 TEST_REQUIRES_X86_AVX512F;
18452 for (size_t k = 2; k < 10; k++) {
18453 for (uint32_t n = 1; n <= 16; n++) {
18454 for (uint32_t m = 1; m <= 1; m++) {
18455 GemmMicrokernelTester()
18456 .mr(1)
18457 .nr(16)
18458 .kr(1)
18459 .sr(1)
18460 .m(m)
18461 .n(n)
18462 .k(k)
18463 .iterations(1)
18464 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18465 }
18466 }
18467 }
18468 }
18469
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16)18470 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16) {
18471 TEST_REQUIRES_X86_AVX512F;
18472 for (uint32_t n = 17; n < 32; n++) {
18473 for (size_t k = 1; k <= 5; k += 2) {
18474 GemmMicrokernelTester()
18475 .mr(1)
18476 .nr(16)
18477 .kr(1)
18478 .sr(1)
18479 .m(1)
18480 .n(n)
18481 .k(k)
18482 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18483 }
18484 }
18485 }
18486
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16_strided_cn)18487 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
18488 TEST_REQUIRES_X86_AVX512F;
18489 for (uint32_t n = 17; n < 32; n++) {
18490 for (size_t k = 1; k <= 5; k += 2) {
18491 GemmMicrokernelTester()
18492 .mr(1)
18493 .nr(16)
18494 .kr(1)
18495 .sr(1)
18496 .m(1)
18497 .n(n)
18498 .k(k)
18499 .cn_stride(19)
18500 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18501 }
18502 }
18503 }
18504
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16_strided_a)18505 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
18506 TEST_REQUIRES_X86_AVX512F;
18507 for (uint32_t n = 17; n < 32; n++) {
18508 for (size_t k = 1; k <= 5; k += 2) {
18509 GemmMicrokernelTester()
18510 .mr(1)
18511 .nr(16)
18512 .kr(1)
18513 .sr(1)
18514 .m(1)
18515 .n(n)
18516 .k(k)
18517 .a_stride(7)
18518 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18519 }
18520 }
18521 }
18522
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_gt_16_subtile)18523 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_subtile) {
18524 TEST_REQUIRES_X86_AVX512F;
18525 for (uint32_t n = 17; n < 32; n++) {
18526 for (size_t k = 1; k <= 5; k += 2) {
18527 for (uint32_t m = 1; m <= 1; m++) {
18528 GemmMicrokernelTester()
18529 .mr(1)
18530 .nr(16)
18531 .kr(1)
18532 .sr(1)
18533 .m(m)
18534 .n(n)
18535 .k(k)
18536 .iterations(1)
18537 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18538 }
18539 }
18540 }
18541 }
18542
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_div_16)18543 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16) {
18544 TEST_REQUIRES_X86_AVX512F;
18545 for (uint32_t n = 32; n <= 48; n += 16) {
18546 for (size_t k = 1; k <= 5; k += 2) {
18547 GemmMicrokernelTester()
18548 .mr(1)
18549 .nr(16)
18550 .kr(1)
18551 .sr(1)
18552 .m(1)
18553 .n(n)
18554 .k(k)
18555 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18556 }
18557 }
18558 }
18559
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_div_16_strided_cn)18560 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
18561 TEST_REQUIRES_X86_AVX512F;
18562 for (uint32_t n = 32; n <= 48; n += 16) {
18563 for (size_t k = 1; k <= 5; k += 2) {
18564 GemmMicrokernelTester()
18565 .mr(1)
18566 .nr(16)
18567 .kr(1)
18568 .sr(1)
18569 .m(1)
18570 .n(n)
18571 .k(k)
18572 .cn_stride(19)
18573 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18574 }
18575 }
18576 }
18577
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_div_16_strided_a)18578 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_a) {
18579 TEST_REQUIRES_X86_AVX512F;
18580 for (uint32_t n = 32; n <= 48; n += 16) {
18581 for (size_t k = 1; k <= 5; k += 2) {
18582 GemmMicrokernelTester()
18583 .mr(1)
18584 .nr(16)
18585 .kr(1)
18586 .sr(1)
18587 .m(1)
18588 .n(n)
18589 .k(k)
18590 .a_stride(7)
18591 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18592 }
18593 }
18594 }
18595
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,n_div_16_subtile)18596 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_subtile) {
18597 TEST_REQUIRES_X86_AVX512F;
18598 for (uint32_t n = 32; n <= 48; n += 16) {
18599 for (size_t k = 1; k <= 5; k += 2) {
18600 for (uint32_t m = 1; m <= 1; m++) {
18601 GemmMicrokernelTester()
18602 .mr(1)
18603 .nr(16)
18604 .kr(1)
18605 .sr(1)
18606 .m(m)
18607 .n(n)
18608 .k(k)
18609 .iterations(1)
18610 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18611 }
18612 }
18613 }
18614 }
18615
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,strided_cm_subtile)18616 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, strided_cm_subtile) {
18617 TEST_REQUIRES_X86_AVX512F;
18618 for (size_t k = 1; k <= 5; k += 2) {
18619 for (uint32_t n = 1; n <= 16; n++) {
18620 for (uint32_t m = 1; m <= 1; m++) {
18621 GemmMicrokernelTester()
18622 .mr(1)
18623 .nr(16)
18624 .kr(1)
18625 .sr(1)
18626 .m(m)
18627 .n(n)
18628 .k(k)
18629 .cm_stride(19)
18630 .iterations(1)
18631 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18632 }
18633 }
18634 }
18635 }
18636
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,qmin)18637 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, qmin) {
18638 TEST_REQUIRES_X86_AVX512F;
18639 GemmMicrokernelTester()
18640 .mr(1)
18641 .nr(16)
18642 .kr(1)
18643 .sr(1)
18644 .m(1)
18645 .n(16)
18646 .k(1)
18647 .qmin(128)
18648 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18649 }
18650
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,qmax)18651 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, qmax) {
18652 TEST_REQUIRES_X86_AVX512F;
18653 GemmMicrokernelTester()
18654 .mr(1)
18655 .nr(16)
18656 .kr(1)
18657 .sr(1)
18658 .m(1)
18659 .n(16)
18660 .k(1)
18661 .qmax(128)
18662 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18663 }
18664
TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST,strided_cm)18665 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, strided_cm) {
18666 TEST_REQUIRES_X86_AVX512F;
18667 GemmMicrokernelTester()
18668 .mr(1)
18669 .nr(16)
18670 .kr(1)
18671 .sr(1)
18672 .m(1)
18673 .n(16)
18674 .k(1)
18675 .cm_stride(19)
18676 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18677 }
18678 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18679
18680
18681 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1)18682 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1) {
18683 TEST_REQUIRES_X86_AVX512F;
18684 GemmMicrokernelTester()
18685 .mr(6)
18686 .nr(16)
18687 .kr(1)
18688 .sr(1)
18689 .m(6)
18690 .n(16)
18691 .k(1)
18692 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18693 }
18694
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,strided_cn)18695 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, strided_cn) {
18696 TEST_REQUIRES_X86_AVX512F;
18697 GemmMicrokernelTester()
18698 .mr(6)
18699 .nr(16)
18700 .kr(1)
18701 .sr(1)
18702 .m(6)
18703 .n(16)
18704 .k(1)
18705 .cn_stride(19)
18706 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18707 }
18708
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1_strided_a)18709 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
18710 TEST_REQUIRES_X86_AVX512F;
18711 GemmMicrokernelTester()
18712 .mr(6)
18713 .nr(16)
18714 .kr(1)
18715 .sr(1)
18716 .m(6)
18717 .n(16)
18718 .k(1)
18719 .a_stride(3)
18720 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18721 }
18722
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1_subtile)18723 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile) {
18724 TEST_REQUIRES_X86_AVX512F;
18725 for (uint32_t n = 1; n <= 16; n++) {
18726 for (uint32_t m = 1; m <= 6; m++) {
18727 GemmMicrokernelTester()
18728 .mr(6)
18729 .nr(16)
18730 .kr(1)
18731 .sr(1)
18732 .m(m)
18733 .n(n)
18734 .k(1)
18735 .iterations(1)
18736 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18737 }
18738 }
18739 }
18740
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1_subtile_m)18741 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
18742 TEST_REQUIRES_X86_AVX512F;
18743 for (uint32_t m = 1; m <= 6; m++) {
18744 GemmMicrokernelTester()
18745 .mr(6)
18746 .nr(16)
18747 .kr(1)
18748 .sr(1)
18749 .m(m)
18750 .n(16)
18751 .k(1)
18752 .iterations(1)
18753 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18754 }
18755 }
18756
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_eq_1_subtile_n)18757 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
18758 TEST_REQUIRES_X86_AVX512F;
18759 for (uint32_t n = 1; n <= 16; n++) {
18760 GemmMicrokernelTester()
18761 .mr(6)
18762 .nr(16)
18763 .kr(1)
18764 .sr(1)
18765 .m(6)
18766 .n(n)
18767 .k(1)
18768 .iterations(1)
18769 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18770 }
18771 }
18772
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_gt_1)18773 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1) {
18774 TEST_REQUIRES_X86_AVX512F;
18775 for (size_t k = 2; k < 10; k++) {
18776 GemmMicrokernelTester()
18777 .mr(6)
18778 .nr(16)
18779 .kr(1)
18780 .sr(1)
18781 .m(6)
18782 .n(16)
18783 .k(k)
18784 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18785 }
18786 }
18787
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_gt_1_strided_a)18788 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
18789 TEST_REQUIRES_X86_AVX512F;
18790 for (size_t k = 2; k < 10; k++) {
18791 GemmMicrokernelTester()
18792 .mr(6)
18793 .nr(16)
18794 .kr(1)
18795 .sr(1)
18796 .m(6)
18797 .n(16)
18798 .k(k)
18799 .a_stride(11)
18800 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18801 }
18802 }
18803
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,k_gt_1_subtile)18804 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_subtile) {
18805 TEST_REQUIRES_X86_AVX512F;
18806 for (size_t k = 2; k < 10; k++) {
18807 for (uint32_t n = 1; n <= 16; n++) {
18808 for (uint32_t m = 1; m <= 6; m++) {
18809 GemmMicrokernelTester()
18810 .mr(6)
18811 .nr(16)
18812 .kr(1)
18813 .sr(1)
18814 .m(m)
18815 .n(n)
18816 .k(k)
18817 .iterations(1)
18818 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18819 }
18820 }
18821 }
18822 }
18823
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16)18824 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16) {
18825 TEST_REQUIRES_X86_AVX512F;
18826 for (uint32_t n = 17; n < 32; n++) {
18827 for (size_t k = 1; k <= 5; k += 2) {
18828 GemmMicrokernelTester()
18829 .mr(6)
18830 .nr(16)
18831 .kr(1)
18832 .sr(1)
18833 .m(6)
18834 .n(n)
18835 .k(k)
18836 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18837 }
18838 }
18839 }
18840
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16_strided_cn)18841 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
18842 TEST_REQUIRES_X86_AVX512F;
18843 for (uint32_t n = 17; n < 32; n++) {
18844 for (size_t k = 1; k <= 5; k += 2) {
18845 GemmMicrokernelTester()
18846 .mr(6)
18847 .nr(16)
18848 .kr(1)
18849 .sr(1)
18850 .m(6)
18851 .n(n)
18852 .k(k)
18853 .cn_stride(19)
18854 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18855 }
18856 }
18857 }
18858
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16_strided_a)18859 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
18860 TEST_REQUIRES_X86_AVX512F;
18861 for (uint32_t n = 17; n < 32; n++) {
18862 for (size_t k = 1; k <= 5; k += 2) {
18863 GemmMicrokernelTester()
18864 .mr(6)
18865 .nr(16)
18866 .kr(1)
18867 .sr(1)
18868 .m(6)
18869 .n(n)
18870 .k(k)
18871 .a_stride(7)
18872 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18873 }
18874 }
18875 }
18876
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_gt_16_subtile)18877 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_subtile) {
18878 TEST_REQUIRES_X86_AVX512F;
18879 for (uint32_t n = 17; n < 32; n++) {
18880 for (size_t k = 1; k <= 5; k += 2) {
18881 for (uint32_t m = 1; m <= 6; m++) {
18882 GemmMicrokernelTester()
18883 .mr(6)
18884 .nr(16)
18885 .kr(1)
18886 .sr(1)
18887 .m(m)
18888 .n(n)
18889 .k(k)
18890 .iterations(1)
18891 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18892 }
18893 }
18894 }
18895 }
18896
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_div_16)18897 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16) {
18898 TEST_REQUIRES_X86_AVX512F;
18899 for (uint32_t n = 32; n <= 48; n += 16) {
18900 for (size_t k = 1; k <= 5; k += 2) {
18901 GemmMicrokernelTester()
18902 .mr(6)
18903 .nr(16)
18904 .kr(1)
18905 .sr(1)
18906 .m(6)
18907 .n(n)
18908 .k(k)
18909 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18910 }
18911 }
18912 }
18913
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_div_16_strided_cn)18914 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
18915 TEST_REQUIRES_X86_AVX512F;
18916 for (uint32_t n = 32; n <= 48; n += 16) {
18917 for (size_t k = 1; k <= 5; k += 2) {
18918 GemmMicrokernelTester()
18919 .mr(6)
18920 .nr(16)
18921 .kr(1)
18922 .sr(1)
18923 .m(6)
18924 .n(n)
18925 .k(k)
18926 .cn_stride(19)
18927 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18928 }
18929 }
18930 }
18931
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_div_16_strided_a)18932 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_a) {
18933 TEST_REQUIRES_X86_AVX512F;
18934 for (uint32_t n = 32; n <= 48; n += 16) {
18935 for (size_t k = 1; k <= 5; k += 2) {
18936 GemmMicrokernelTester()
18937 .mr(6)
18938 .nr(16)
18939 .kr(1)
18940 .sr(1)
18941 .m(6)
18942 .n(n)
18943 .k(k)
18944 .a_stride(7)
18945 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18946 }
18947 }
18948 }
18949
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,n_div_16_subtile)18950 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_subtile) {
18951 TEST_REQUIRES_X86_AVX512F;
18952 for (uint32_t n = 32; n <= 48; n += 16) {
18953 for (size_t k = 1; k <= 5; k += 2) {
18954 for (uint32_t m = 1; m <= 6; m++) {
18955 GemmMicrokernelTester()
18956 .mr(6)
18957 .nr(16)
18958 .kr(1)
18959 .sr(1)
18960 .m(m)
18961 .n(n)
18962 .k(k)
18963 .iterations(1)
18964 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18965 }
18966 }
18967 }
18968 }
18969
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,strided_cm_subtile)18970 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, strided_cm_subtile) {
18971 TEST_REQUIRES_X86_AVX512F;
18972 for (size_t k = 1; k <= 5; k += 2) {
18973 for (uint32_t n = 1; n <= 16; n++) {
18974 for (uint32_t m = 1; m <= 6; m++) {
18975 GemmMicrokernelTester()
18976 .mr(6)
18977 .nr(16)
18978 .kr(1)
18979 .sr(1)
18980 .m(m)
18981 .n(n)
18982 .k(k)
18983 .cm_stride(19)
18984 .iterations(1)
18985 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18986 }
18987 }
18988 }
18989 }
18990
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,qmin)18991 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, qmin) {
18992 TEST_REQUIRES_X86_AVX512F;
18993 GemmMicrokernelTester()
18994 .mr(6)
18995 .nr(16)
18996 .kr(1)
18997 .sr(1)
18998 .m(6)
18999 .n(16)
19000 .k(1)
19001 .qmin(128)
19002 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19003 }
19004
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,qmax)19005 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, qmax) {
19006 TEST_REQUIRES_X86_AVX512F;
19007 GemmMicrokernelTester()
19008 .mr(6)
19009 .nr(16)
19010 .kr(1)
19011 .sr(1)
19012 .m(6)
19013 .n(16)
19014 .k(1)
19015 .qmax(128)
19016 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19017 }
19018
TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST,strided_cm)19019 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, strided_cm) {
19020 TEST_REQUIRES_X86_AVX512F;
19021 GemmMicrokernelTester()
19022 .mr(6)
19023 .nr(16)
19024 .kr(1)
19025 .sr(1)
19026 .m(6)
19027 .n(16)
19028 .k(1)
19029 .cm_stride(19)
19030 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19031 }
19032 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19033
19034
19035 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1)19036 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1) {
19037 TEST_REQUIRES_X86_AVX512F;
19038 GemmMicrokernelTester()
19039 .mr(7)
19040 .nr(16)
19041 .kr(1)
19042 .sr(1)
19043 .m(7)
19044 .n(16)
19045 .k(1)
19046 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19047 }
19048
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,strided_cn)19049 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, strided_cn) {
19050 TEST_REQUIRES_X86_AVX512F;
19051 GemmMicrokernelTester()
19052 .mr(7)
19053 .nr(16)
19054 .kr(1)
19055 .sr(1)
19056 .m(7)
19057 .n(16)
19058 .k(1)
19059 .cn_stride(19)
19060 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19061 }
19062
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1_strided_a)19063 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
19064 TEST_REQUIRES_X86_AVX512F;
19065 GemmMicrokernelTester()
19066 .mr(7)
19067 .nr(16)
19068 .kr(1)
19069 .sr(1)
19070 .m(7)
19071 .n(16)
19072 .k(1)
19073 .a_stride(3)
19074 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19075 }
19076
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1_subtile)19077 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile) {
19078 TEST_REQUIRES_X86_AVX512F;
19079 for (uint32_t n = 1; n <= 16; n++) {
19080 for (uint32_t m = 1; m <= 7; m++) {
19081 GemmMicrokernelTester()
19082 .mr(7)
19083 .nr(16)
19084 .kr(1)
19085 .sr(1)
19086 .m(m)
19087 .n(n)
19088 .k(1)
19089 .iterations(1)
19090 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19091 }
19092 }
19093 }
19094
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1_subtile_m)19095 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
19096 TEST_REQUIRES_X86_AVX512F;
19097 for (uint32_t m = 1; m <= 7; m++) {
19098 GemmMicrokernelTester()
19099 .mr(7)
19100 .nr(16)
19101 .kr(1)
19102 .sr(1)
19103 .m(m)
19104 .n(16)
19105 .k(1)
19106 .iterations(1)
19107 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19108 }
19109 }
19110
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_eq_1_subtile_n)19111 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
19112 TEST_REQUIRES_X86_AVX512F;
19113 for (uint32_t n = 1; n <= 16; n++) {
19114 GemmMicrokernelTester()
19115 .mr(7)
19116 .nr(16)
19117 .kr(1)
19118 .sr(1)
19119 .m(7)
19120 .n(n)
19121 .k(1)
19122 .iterations(1)
19123 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19124 }
19125 }
19126
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_gt_1)19127 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1) {
19128 TEST_REQUIRES_X86_AVX512F;
19129 for (size_t k = 2; k < 10; k++) {
19130 GemmMicrokernelTester()
19131 .mr(7)
19132 .nr(16)
19133 .kr(1)
19134 .sr(1)
19135 .m(7)
19136 .n(16)
19137 .k(k)
19138 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19139 }
19140 }
19141
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_gt_1_strided_a)19142 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
19143 TEST_REQUIRES_X86_AVX512F;
19144 for (size_t k = 2; k < 10; k++) {
19145 GemmMicrokernelTester()
19146 .mr(7)
19147 .nr(16)
19148 .kr(1)
19149 .sr(1)
19150 .m(7)
19151 .n(16)
19152 .k(k)
19153 .a_stride(11)
19154 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19155 }
19156 }
19157
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,k_gt_1_subtile)19158 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_subtile) {
19159 TEST_REQUIRES_X86_AVX512F;
19160 for (size_t k = 2; k < 10; k++) {
19161 for (uint32_t n = 1; n <= 16; n++) {
19162 for (uint32_t m = 1; m <= 7; m++) {
19163 GemmMicrokernelTester()
19164 .mr(7)
19165 .nr(16)
19166 .kr(1)
19167 .sr(1)
19168 .m(m)
19169 .n(n)
19170 .k(k)
19171 .iterations(1)
19172 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19173 }
19174 }
19175 }
19176 }
19177
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16)19178 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16) {
19179 TEST_REQUIRES_X86_AVX512F;
19180 for (uint32_t n = 17; n < 32; n++) {
19181 for (size_t k = 1; k <= 5; k += 2) {
19182 GemmMicrokernelTester()
19183 .mr(7)
19184 .nr(16)
19185 .kr(1)
19186 .sr(1)
19187 .m(7)
19188 .n(n)
19189 .k(k)
19190 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19191 }
19192 }
19193 }
19194
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16_strided_cn)19195 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
19196 TEST_REQUIRES_X86_AVX512F;
19197 for (uint32_t n = 17; n < 32; n++) {
19198 for (size_t k = 1; k <= 5; k += 2) {
19199 GemmMicrokernelTester()
19200 .mr(7)
19201 .nr(16)
19202 .kr(1)
19203 .sr(1)
19204 .m(7)
19205 .n(n)
19206 .k(k)
19207 .cn_stride(19)
19208 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19209 }
19210 }
19211 }
19212
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16_strided_a)19213 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
19214 TEST_REQUIRES_X86_AVX512F;
19215 for (uint32_t n = 17; n < 32; n++) {
19216 for (size_t k = 1; k <= 5; k += 2) {
19217 GemmMicrokernelTester()
19218 .mr(7)
19219 .nr(16)
19220 .kr(1)
19221 .sr(1)
19222 .m(7)
19223 .n(n)
19224 .k(k)
19225 .a_stride(7)
19226 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19227 }
19228 }
19229 }
19230
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_gt_16_subtile)19231 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_subtile) {
19232 TEST_REQUIRES_X86_AVX512F;
19233 for (uint32_t n = 17; n < 32; n++) {
19234 for (size_t k = 1; k <= 5; k += 2) {
19235 for (uint32_t m = 1; m <= 7; m++) {
19236 GemmMicrokernelTester()
19237 .mr(7)
19238 .nr(16)
19239 .kr(1)
19240 .sr(1)
19241 .m(m)
19242 .n(n)
19243 .k(k)
19244 .iterations(1)
19245 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19246 }
19247 }
19248 }
19249 }
19250
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_div_16)19251 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16) {
19252 TEST_REQUIRES_X86_AVX512F;
19253 for (uint32_t n = 32; n <= 48; n += 16) {
19254 for (size_t k = 1; k <= 5; k += 2) {
19255 GemmMicrokernelTester()
19256 .mr(7)
19257 .nr(16)
19258 .kr(1)
19259 .sr(1)
19260 .m(7)
19261 .n(n)
19262 .k(k)
19263 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19264 }
19265 }
19266 }
19267
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_div_16_strided_cn)19268 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
19269 TEST_REQUIRES_X86_AVX512F;
19270 for (uint32_t n = 32; n <= 48; n += 16) {
19271 for (size_t k = 1; k <= 5; k += 2) {
19272 GemmMicrokernelTester()
19273 .mr(7)
19274 .nr(16)
19275 .kr(1)
19276 .sr(1)
19277 .m(7)
19278 .n(n)
19279 .k(k)
19280 .cn_stride(19)
19281 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19282 }
19283 }
19284 }
19285
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_div_16_strided_a)19286 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_a) {
19287 TEST_REQUIRES_X86_AVX512F;
19288 for (uint32_t n = 32; n <= 48; n += 16) {
19289 for (size_t k = 1; k <= 5; k += 2) {
19290 GemmMicrokernelTester()
19291 .mr(7)
19292 .nr(16)
19293 .kr(1)
19294 .sr(1)
19295 .m(7)
19296 .n(n)
19297 .k(k)
19298 .a_stride(7)
19299 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19300 }
19301 }
19302 }
19303
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,n_div_16_subtile)19304 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_subtile) {
19305 TEST_REQUIRES_X86_AVX512F;
19306 for (uint32_t n = 32; n <= 48; n += 16) {
19307 for (size_t k = 1; k <= 5; k += 2) {
19308 for (uint32_t m = 1; m <= 7; m++) {
19309 GemmMicrokernelTester()
19310 .mr(7)
19311 .nr(16)
19312 .kr(1)
19313 .sr(1)
19314 .m(m)
19315 .n(n)
19316 .k(k)
19317 .iterations(1)
19318 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19319 }
19320 }
19321 }
19322 }
19323
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,strided_cm_subtile)19324 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, strided_cm_subtile) {
19325 TEST_REQUIRES_X86_AVX512F;
19326 for (size_t k = 1; k <= 5; k += 2) {
19327 for (uint32_t n = 1; n <= 16; n++) {
19328 for (uint32_t m = 1; m <= 7; m++) {
19329 GemmMicrokernelTester()
19330 .mr(7)
19331 .nr(16)
19332 .kr(1)
19333 .sr(1)
19334 .m(m)
19335 .n(n)
19336 .k(k)
19337 .cm_stride(19)
19338 .iterations(1)
19339 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19340 }
19341 }
19342 }
19343 }
19344
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,qmin)19345 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, qmin) {
19346 TEST_REQUIRES_X86_AVX512F;
19347 GemmMicrokernelTester()
19348 .mr(7)
19349 .nr(16)
19350 .kr(1)
19351 .sr(1)
19352 .m(7)
19353 .n(16)
19354 .k(1)
19355 .qmin(128)
19356 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19357 }
19358
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,qmax)19359 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, qmax) {
19360 TEST_REQUIRES_X86_AVX512F;
19361 GemmMicrokernelTester()
19362 .mr(7)
19363 .nr(16)
19364 .kr(1)
19365 .sr(1)
19366 .m(7)
19367 .n(16)
19368 .k(1)
19369 .qmax(128)
19370 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19371 }
19372
TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST,strided_cm)19373 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, strided_cm) {
19374 TEST_REQUIRES_X86_AVX512F;
19375 GemmMicrokernelTester()
19376 .mr(7)
19377 .nr(16)
19378 .kr(1)
19379 .sr(1)
19380 .m(7)
19381 .n(16)
19382 .k(1)
19383 .cm_stride(19)
19384 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19385 }
19386 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19387
19388
19389 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1)19390 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1) {
19391 TEST_REQUIRES_X86_AVX512F;
19392 GemmMicrokernelTester()
19393 .mr(8)
19394 .nr(16)
19395 .kr(1)
19396 .sr(1)
19397 .m(8)
19398 .n(16)
19399 .k(1)
19400 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19401 }
19402
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,strided_cn)19403 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, strided_cn) {
19404 TEST_REQUIRES_X86_AVX512F;
19405 GemmMicrokernelTester()
19406 .mr(8)
19407 .nr(16)
19408 .kr(1)
19409 .sr(1)
19410 .m(8)
19411 .n(16)
19412 .k(1)
19413 .cn_stride(19)
19414 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19415 }
19416
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1_strided_a)19417 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
19418 TEST_REQUIRES_X86_AVX512F;
19419 GemmMicrokernelTester()
19420 .mr(8)
19421 .nr(16)
19422 .kr(1)
19423 .sr(1)
19424 .m(8)
19425 .n(16)
19426 .k(1)
19427 .a_stride(3)
19428 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19429 }
19430
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1_subtile)19431 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile) {
19432 TEST_REQUIRES_X86_AVX512F;
19433 for (uint32_t n = 1; n <= 16; n++) {
19434 for (uint32_t m = 1; m <= 8; m++) {
19435 GemmMicrokernelTester()
19436 .mr(8)
19437 .nr(16)
19438 .kr(1)
19439 .sr(1)
19440 .m(m)
19441 .n(n)
19442 .k(1)
19443 .iterations(1)
19444 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19445 }
19446 }
19447 }
19448
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1_subtile_m)19449 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
19450 TEST_REQUIRES_X86_AVX512F;
19451 for (uint32_t m = 1; m <= 8; m++) {
19452 GemmMicrokernelTester()
19453 .mr(8)
19454 .nr(16)
19455 .kr(1)
19456 .sr(1)
19457 .m(m)
19458 .n(16)
19459 .k(1)
19460 .iterations(1)
19461 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19462 }
19463 }
19464
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_eq_1_subtile_n)19465 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
19466 TEST_REQUIRES_X86_AVX512F;
19467 for (uint32_t n = 1; n <= 16; n++) {
19468 GemmMicrokernelTester()
19469 .mr(8)
19470 .nr(16)
19471 .kr(1)
19472 .sr(1)
19473 .m(8)
19474 .n(n)
19475 .k(1)
19476 .iterations(1)
19477 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19478 }
19479 }
19480
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_gt_1)19481 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1) {
19482 TEST_REQUIRES_X86_AVX512F;
19483 for (size_t k = 2; k < 10; k++) {
19484 GemmMicrokernelTester()
19485 .mr(8)
19486 .nr(16)
19487 .kr(1)
19488 .sr(1)
19489 .m(8)
19490 .n(16)
19491 .k(k)
19492 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19493 }
19494 }
19495
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_gt_1_strided_a)19496 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
19497 TEST_REQUIRES_X86_AVX512F;
19498 for (size_t k = 2; k < 10; k++) {
19499 GemmMicrokernelTester()
19500 .mr(8)
19501 .nr(16)
19502 .kr(1)
19503 .sr(1)
19504 .m(8)
19505 .n(16)
19506 .k(k)
19507 .a_stride(11)
19508 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19509 }
19510 }
19511
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,k_gt_1_subtile)19512 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_subtile) {
19513 TEST_REQUIRES_X86_AVX512F;
19514 for (size_t k = 2; k < 10; k++) {
19515 for (uint32_t n = 1; n <= 16; n++) {
19516 for (uint32_t m = 1; m <= 8; m++) {
19517 GemmMicrokernelTester()
19518 .mr(8)
19519 .nr(16)
19520 .kr(1)
19521 .sr(1)
19522 .m(m)
19523 .n(n)
19524 .k(k)
19525 .iterations(1)
19526 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19527 }
19528 }
19529 }
19530 }
19531
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16)19532 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16) {
19533 TEST_REQUIRES_X86_AVX512F;
19534 for (uint32_t n = 17; n < 32; n++) {
19535 for (size_t k = 1; k <= 5; k += 2) {
19536 GemmMicrokernelTester()
19537 .mr(8)
19538 .nr(16)
19539 .kr(1)
19540 .sr(1)
19541 .m(8)
19542 .n(n)
19543 .k(k)
19544 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19545 }
19546 }
19547 }
19548
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16_strided_cn)19549 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
19550 TEST_REQUIRES_X86_AVX512F;
19551 for (uint32_t n = 17; n < 32; n++) {
19552 for (size_t k = 1; k <= 5; k += 2) {
19553 GemmMicrokernelTester()
19554 .mr(8)
19555 .nr(16)
19556 .kr(1)
19557 .sr(1)
19558 .m(8)
19559 .n(n)
19560 .k(k)
19561 .cn_stride(19)
19562 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19563 }
19564 }
19565 }
19566
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16_strided_a)19567 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
19568 TEST_REQUIRES_X86_AVX512F;
19569 for (uint32_t n = 17; n < 32; n++) {
19570 for (size_t k = 1; k <= 5; k += 2) {
19571 GemmMicrokernelTester()
19572 .mr(8)
19573 .nr(16)
19574 .kr(1)
19575 .sr(1)
19576 .m(8)
19577 .n(n)
19578 .k(k)
19579 .a_stride(7)
19580 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19581 }
19582 }
19583 }
19584
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_gt_16_subtile)19585 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_subtile) {
19586 TEST_REQUIRES_X86_AVX512F;
19587 for (uint32_t n = 17; n < 32; n++) {
19588 for (size_t k = 1; k <= 5; k += 2) {
19589 for (uint32_t m = 1; m <= 8; m++) {
19590 GemmMicrokernelTester()
19591 .mr(8)
19592 .nr(16)
19593 .kr(1)
19594 .sr(1)
19595 .m(m)
19596 .n(n)
19597 .k(k)
19598 .iterations(1)
19599 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19600 }
19601 }
19602 }
19603 }
19604
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_div_16)19605 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16) {
19606 TEST_REQUIRES_X86_AVX512F;
19607 for (uint32_t n = 32; n <= 48; n += 16) {
19608 for (size_t k = 1; k <= 5; k += 2) {
19609 GemmMicrokernelTester()
19610 .mr(8)
19611 .nr(16)
19612 .kr(1)
19613 .sr(1)
19614 .m(8)
19615 .n(n)
19616 .k(k)
19617 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19618 }
19619 }
19620 }
19621
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_div_16_strided_cn)19622 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
19623 TEST_REQUIRES_X86_AVX512F;
19624 for (uint32_t n = 32; n <= 48; n += 16) {
19625 for (size_t k = 1; k <= 5; k += 2) {
19626 GemmMicrokernelTester()
19627 .mr(8)
19628 .nr(16)
19629 .kr(1)
19630 .sr(1)
19631 .m(8)
19632 .n(n)
19633 .k(k)
19634 .cn_stride(19)
19635 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19636 }
19637 }
19638 }
19639
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_div_16_strided_a)19640 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_a) {
19641 TEST_REQUIRES_X86_AVX512F;
19642 for (uint32_t n = 32; n <= 48; n += 16) {
19643 for (size_t k = 1; k <= 5; k += 2) {
19644 GemmMicrokernelTester()
19645 .mr(8)
19646 .nr(16)
19647 .kr(1)
19648 .sr(1)
19649 .m(8)
19650 .n(n)
19651 .k(k)
19652 .a_stride(7)
19653 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19654 }
19655 }
19656 }
19657
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,n_div_16_subtile)19658 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_subtile) {
19659 TEST_REQUIRES_X86_AVX512F;
19660 for (uint32_t n = 32; n <= 48; n += 16) {
19661 for (size_t k = 1; k <= 5; k += 2) {
19662 for (uint32_t m = 1; m <= 8; m++) {
19663 GemmMicrokernelTester()
19664 .mr(8)
19665 .nr(16)
19666 .kr(1)
19667 .sr(1)
19668 .m(m)
19669 .n(n)
19670 .k(k)
19671 .iterations(1)
19672 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19673 }
19674 }
19675 }
19676 }
19677
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,strided_cm_subtile)19678 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, strided_cm_subtile) {
19679 TEST_REQUIRES_X86_AVX512F;
19680 for (size_t k = 1; k <= 5; k += 2) {
19681 for (uint32_t n = 1; n <= 16; n++) {
19682 for (uint32_t m = 1; m <= 8; m++) {
19683 GemmMicrokernelTester()
19684 .mr(8)
19685 .nr(16)
19686 .kr(1)
19687 .sr(1)
19688 .m(m)
19689 .n(n)
19690 .k(k)
19691 .cm_stride(19)
19692 .iterations(1)
19693 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19694 }
19695 }
19696 }
19697 }
19698
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,qmin)19699 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, qmin) {
19700 TEST_REQUIRES_X86_AVX512F;
19701 GemmMicrokernelTester()
19702 .mr(8)
19703 .nr(16)
19704 .kr(1)
19705 .sr(1)
19706 .m(8)
19707 .n(16)
19708 .k(1)
19709 .qmin(128)
19710 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19711 }
19712
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,qmax)19713 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, qmax) {
19714 TEST_REQUIRES_X86_AVX512F;
19715 GemmMicrokernelTester()
19716 .mr(8)
19717 .nr(16)
19718 .kr(1)
19719 .sr(1)
19720 .m(8)
19721 .n(16)
19722 .k(1)
19723 .qmax(128)
19724 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19725 }
19726
TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST,strided_cm)19727 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, strided_cm) {
19728 TEST_REQUIRES_X86_AVX512F;
19729 GemmMicrokernelTester()
19730 .mr(8)
19731 .nr(16)
19732 .kr(1)
19733 .sr(1)
19734 .m(8)
19735 .n(16)
19736 .k(1)
19737 .cm_stride(19)
19738 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19739 }
19740 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19741
19742
19743 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)19744 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
19745 GemmMicrokernelTester()
19746 .mr(1)
19747 .nr(8)
19748 .kr(1)
19749 .sr(1)
19750 .m(1)
19751 .n(8)
19752 .k(1)
19753 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19754 }
19755
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)19756 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
19757 GemmMicrokernelTester()
19758 .mr(1)
19759 .nr(8)
19760 .kr(1)
19761 .sr(1)
19762 .m(1)
19763 .n(8)
19764 .k(1)
19765 .cn_stride(11)
19766 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19767 }
19768
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_strided_a)19769 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
19770 GemmMicrokernelTester()
19771 .mr(1)
19772 .nr(8)
19773 .kr(1)
19774 .sr(1)
19775 .m(1)
19776 .n(8)
19777 .k(1)
19778 .a_stride(3)
19779 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19780 }
19781
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)19782 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
19783 for (uint32_t n = 1; n <= 8; n++) {
19784 for (uint32_t m = 1; m <= 1; m++) {
19785 GemmMicrokernelTester()
19786 .mr(1)
19787 .nr(8)
19788 .kr(1)
19789 .sr(1)
19790 .m(m)
19791 .n(n)
19792 .k(1)
19793 .iterations(1)
19794 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19795 }
19796 }
19797 }
19798
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)19799 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
19800 for (uint32_t m = 1; m <= 1; m++) {
19801 GemmMicrokernelTester()
19802 .mr(1)
19803 .nr(8)
19804 .kr(1)
19805 .sr(1)
19806 .m(m)
19807 .n(8)
19808 .k(1)
19809 .iterations(1)
19810 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19811 }
19812 }
19813
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)19814 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
19815 for (uint32_t n = 1; n <= 8; n++) {
19816 GemmMicrokernelTester()
19817 .mr(1)
19818 .nr(8)
19819 .kr(1)
19820 .sr(1)
19821 .m(1)
19822 .n(n)
19823 .k(1)
19824 .iterations(1)
19825 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19826 }
19827 }
19828
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)19829 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
19830 for (size_t k = 2; k < 10; k++) {
19831 GemmMicrokernelTester()
19832 .mr(1)
19833 .nr(8)
19834 .kr(1)
19835 .sr(1)
19836 .m(1)
19837 .n(8)
19838 .k(k)
19839 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19840 }
19841 }
19842
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_strided_a)19843 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
19844 for (size_t k = 2; k < 10; k++) {
19845 GemmMicrokernelTester()
19846 .mr(1)
19847 .nr(8)
19848 .kr(1)
19849 .sr(1)
19850 .m(1)
19851 .n(8)
19852 .k(k)
19853 .a_stride(11)
19854 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19855 }
19856 }
19857
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)19858 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
19859 for (size_t k = 2; k < 10; k++) {
19860 for (uint32_t n = 1; n <= 8; n++) {
19861 for (uint32_t m = 1; m <= 1; m++) {
19862 GemmMicrokernelTester()
19863 .mr(1)
19864 .nr(8)
19865 .kr(1)
19866 .sr(1)
19867 .m(m)
19868 .n(n)
19869 .k(k)
19870 .iterations(1)
19871 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19872 }
19873 }
19874 }
19875 }
19876
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)19877 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
19878 for (uint32_t n = 9; n < 16; n++) {
19879 for (size_t k = 1; k <= 5; k += 2) {
19880 GemmMicrokernelTester()
19881 .mr(1)
19882 .nr(8)
19883 .kr(1)
19884 .sr(1)
19885 .m(1)
19886 .n(n)
19887 .k(k)
19888 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19889 }
19890 }
19891 }
19892
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)19893 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
19894 for (uint32_t n = 9; n < 16; n++) {
19895 for (size_t k = 1; k <= 5; k += 2) {
19896 GemmMicrokernelTester()
19897 .mr(1)
19898 .nr(8)
19899 .kr(1)
19900 .sr(1)
19901 .m(1)
19902 .n(n)
19903 .k(k)
19904 .cn_stride(11)
19905 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19906 }
19907 }
19908 }
19909
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_a)19910 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
19911 for (uint32_t n = 9; n < 16; n++) {
19912 for (size_t k = 1; k <= 5; k += 2) {
19913 GemmMicrokernelTester()
19914 .mr(1)
19915 .nr(8)
19916 .kr(1)
19917 .sr(1)
19918 .m(1)
19919 .n(n)
19920 .k(k)
19921 .a_stride(7)
19922 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19923 }
19924 }
19925 }
19926
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)19927 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
19928 for (uint32_t n = 9; n < 16; n++) {
19929 for (size_t k = 1; k <= 5; k += 2) {
19930 for (uint32_t m = 1; m <= 1; m++) {
19931 GemmMicrokernelTester()
19932 .mr(1)
19933 .nr(8)
19934 .kr(1)
19935 .sr(1)
19936 .m(m)
19937 .n(n)
19938 .k(k)
19939 .iterations(1)
19940 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19941 }
19942 }
19943 }
19944 }
19945
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)19946 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
19947 for (uint32_t n = 16; n <= 24; n += 8) {
19948 for (size_t k = 1; k <= 5; k += 2) {
19949 GemmMicrokernelTester()
19950 .mr(1)
19951 .nr(8)
19952 .kr(1)
19953 .sr(1)
19954 .m(1)
19955 .n(n)
19956 .k(k)
19957 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19958 }
19959 }
19960 }
19961
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)19962 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
19963 for (uint32_t n = 16; n <= 24; n += 8) {
19964 for (size_t k = 1; k <= 5; k += 2) {
19965 GemmMicrokernelTester()
19966 .mr(1)
19967 .nr(8)
19968 .kr(1)
19969 .sr(1)
19970 .m(1)
19971 .n(n)
19972 .k(k)
19973 .cn_stride(11)
19974 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19975 }
19976 }
19977 }
19978
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_a)19979 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
19980 for (uint32_t n = 16; n <= 24; n += 8) {
19981 for (size_t k = 1; k <= 5; k += 2) {
19982 GemmMicrokernelTester()
19983 .mr(1)
19984 .nr(8)
19985 .kr(1)
19986 .sr(1)
19987 .m(1)
19988 .n(n)
19989 .k(k)
19990 .a_stride(7)
19991 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19992 }
19993 }
19994 }
19995
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)19996 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
19997 for (uint32_t n = 16; n <= 24; n += 8) {
19998 for (size_t k = 1; k <= 5; k += 2) {
19999 for (uint32_t m = 1; m <= 1; m++) {
20000 GemmMicrokernelTester()
20001 .mr(1)
20002 .nr(8)
20003 .kr(1)
20004 .sr(1)
20005 .m(m)
20006 .n(n)
20007 .k(k)
20008 .iterations(1)
20009 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20010 }
20011 }
20012 }
20013 }
20014
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)20015 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
20016 for (size_t k = 1; k <= 5; k += 2) {
20017 for (uint32_t n = 1; n <= 8; n++) {
20018 for (uint32_t m = 1; m <= 1; m++) {
20019 GemmMicrokernelTester()
20020 .mr(1)
20021 .nr(8)
20022 .kr(1)
20023 .sr(1)
20024 .m(m)
20025 .n(n)
20026 .k(k)
20027 .cm_stride(11)
20028 .iterations(1)
20029 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20030 }
20031 }
20032 }
20033 }
20034
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,qmin)20035 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
20036 GemmMicrokernelTester()
20037 .mr(1)
20038 .nr(8)
20039 .kr(1)
20040 .sr(1)
20041 .m(1)
20042 .n(8)
20043 .k(1)
20044 .qmin(128)
20045 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20046 }
20047
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,qmax)20048 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
20049 GemmMicrokernelTester()
20050 .mr(1)
20051 .nr(8)
20052 .kr(1)
20053 .sr(1)
20054 .m(1)
20055 .n(8)
20056 .k(1)
20057 .qmax(128)
20058 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20059 }
20060
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)20061 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
20062 GemmMicrokernelTester()
20063 .mr(1)
20064 .nr(8)
20065 .kr(1)
20066 .sr(1)
20067 .m(1)
20068 .n(8)
20069 .k(1)
20070 .cm_stride(11)
20071 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20072 }
20073 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20074
20075
20076 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4)20077 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
20078 GemmMicrokernelTester()
20079 .mr(1)
20080 .nr(8)
20081 .kr(1)
20082 .sr(1)
20083 .m(1)
20084 .n(8)
20085 .k(4)
20086 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20087 }
20088
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,strided_cn)20089 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cn) {
20090 GemmMicrokernelTester()
20091 .mr(1)
20092 .nr(8)
20093 .kr(1)
20094 .sr(1)
20095 .m(1)
20096 .n(8)
20097 .k(4)
20098 .cn_stride(11)
20099 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20100 }
20101
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4_strided_a)20102 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
20103 GemmMicrokernelTester()
20104 .mr(1)
20105 .nr(8)
20106 .kr(1)
20107 .sr(1)
20108 .m(1)
20109 .n(8)
20110 .k(4)
20111 .a_stride(7)
20112 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20113 }
20114
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)20115 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
20116 for (uint32_t n = 1; n <= 8; n++) {
20117 for (uint32_t m = 1; m <= 1; m++) {
20118 GemmMicrokernelTester()
20119 .mr(1)
20120 .nr(8)
20121 .kr(1)
20122 .sr(1)
20123 .m(m)
20124 .n(n)
20125 .k(4)
20126 .iterations(1)
20127 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20128 }
20129 }
20130 }
20131
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)20132 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
20133 for (uint32_t m = 1; m <= 1; m++) {
20134 GemmMicrokernelTester()
20135 .mr(1)
20136 .nr(8)
20137 .kr(1)
20138 .sr(1)
20139 .m(m)
20140 .n(8)
20141 .k(4)
20142 .iterations(1)
20143 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20144 }
20145 }
20146
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)20147 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
20148 for (uint32_t n = 1; n <= 8; n++) {
20149 GemmMicrokernelTester()
20150 .mr(1)
20151 .nr(8)
20152 .kr(1)
20153 .sr(1)
20154 .m(1)
20155 .n(n)
20156 .k(4)
20157 .iterations(1)
20158 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20159 }
20160 }
20161
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_lt_4)20162 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
20163 for (size_t k = 1; k < 4; k++) {
20164 GemmMicrokernelTester()
20165 .mr(1)
20166 .nr(8)
20167 .kr(1)
20168 .sr(1)
20169 .m(1)
20170 .n(8)
20171 .k(k)
20172 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20173 }
20174 }
20175
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_lt_4_strided_a)20176 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
20177 for (size_t k = 1; k < 4; k++) {
20178 GemmMicrokernelTester()
20179 .mr(1)
20180 .nr(8)
20181 .kr(1)
20182 .sr(1)
20183 .m(1)
20184 .n(8)
20185 .k(k)
20186 .a_stride(7)
20187 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20188 }
20189 }
20190
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)20191 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
20192 for (size_t k = 1; k < 4; k++) {
20193 for (uint32_t n = 1; n <= 8; n++) {
20194 for (uint32_t m = 1; m <= 1; m++) {
20195 GemmMicrokernelTester()
20196 .mr(1)
20197 .nr(8)
20198 .kr(1)
20199 .sr(1)
20200 .m(m)
20201 .n(n)
20202 .k(k)
20203 .iterations(1)
20204 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20205 }
20206 }
20207 }
20208 }
20209
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_gt_4)20210 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
20211 for (size_t k = 5; k < 8; k++) {
20212 GemmMicrokernelTester()
20213 .mr(1)
20214 .nr(8)
20215 .kr(1)
20216 .sr(1)
20217 .m(1)
20218 .n(8)
20219 .k(k)
20220 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20221 }
20222 }
20223
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_gt_4_strided_a)20224 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
20225 for (size_t k = 5; k < 8; k++) {
20226 GemmMicrokernelTester()
20227 .mr(1)
20228 .nr(8)
20229 .kr(1)
20230 .sr(1)
20231 .m(1)
20232 .n(8)
20233 .k(k)
20234 .a_stride(11)
20235 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20236 }
20237 }
20238
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)20239 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
20240 for (size_t k = 5; k < 8; k++) {
20241 for (uint32_t n = 1; n <= 8; n++) {
20242 for (uint32_t m = 1; m <= 1; m++) {
20243 GemmMicrokernelTester()
20244 .mr(1)
20245 .nr(8)
20246 .kr(1)
20247 .sr(1)
20248 .m(m)
20249 .n(n)
20250 .k(k)
20251 .iterations(1)
20252 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20253 }
20254 }
20255 }
20256 }
20257
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_div_4)20258 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4) {
20259 for (size_t k = 8; k <= 40; k += 4) {
20260 GemmMicrokernelTester()
20261 .mr(1)
20262 .nr(8)
20263 .kr(1)
20264 .sr(1)
20265 .m(1)
20266 .n(8)
20267 .k(k)
20268 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20269 }
20270 }
20271
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_div_4_strided_a)20272 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
20273 for (size_t k = 8; k <= 40; k += 4) {
20274 GemmMicrokernelTester()
20275 .mr(1)
20276 .nr(8)
20277 .kr(1)
20278 .sr(1)
20279 .m(1)
20280 .n(8)
20281 .k(k)
20282 .a_stride(43)
20283 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20284 }
20285 }
20286
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)20287 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
20288 for (size_t k = 8; k <= 40; k += 4) {
20289 for (uint32_t n = 1; n <= 8; n++) {
20290 for (uint32_t m = 1; m <= 1; m++) {
20291 GemmMicrokernelTester()
20292 .mr(1)
20293 .nr(8)
20294 .kr(1)
20295 .sr(1)
20296 .m(m)
20297 .n(n)
20298 .k(k)
20299 .iterations(1)
20300 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20301 }
20302 }
20303 }
20304 }
20305
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8)20306 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
20307 for (uint32_t n = 9; n < 16; n++) {
20308 for (size_t k = 1; k <= 20; k += 5) {
20309 GemmMicrokernelTester()
20310 .mr(1)
20311 .nr(8)
20312 .kr(1)
20313 .sr(1)
20314 .m(1)
20315 .n(n)
20316 .k(k)
20317 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20318 }
20319 }
20320 }
20321
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)20322 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
20323 for (uint32_t n = 9; n < 16; n++) {
20324 for (size_t k = 1; k <= 20; k += 5) {
20325 GemmMicrokernelTester()
20326 .mr(1)
20327 .nr(8)
20328 .kr(1)
20329 .sr(1)
20330 .m(1)
20331 .n(n)
20332 .k(k)
20333 .cn_stride(11)
20334 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20335 }
20336 }
20337 }
20338
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_a)20339 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
20340 for (uint32_t n = 9; n < 16; n++) {
20341 for (size_t k = 1; k <= 20; k += 5) {
20342 GemmMicrokernelTester()
20343 .mr(1)
20344 .nr(8)
20345 .kr(1)
20346 .sr(1)
20347 .m(1)
20348 .n(n)
20349 .k(k)
20350 .a_stride(23)
20351 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20352 }
20353 }
20354 }
20355
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)20356 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
20357 for (uint32_t n = 9; n < 16; n++) {
20358 for (size_t k = 1; k <= 20; k += 5) {
20359 for (uint32_t m = 1; m <= 1; m++) {
20360 GemmMicrokernelTester()
20361 .mr(1)
20362 .nr(8)
20363 .kr(1)
20364 .sr(1)
20365 .m(m)
20366 .n(n)
20367 .k(k)
20368 .iterations(1)
20369 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20370 }
20371 }
20372 }
20373 }
20374
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8)20375 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8) {
20376 for (uint32_t n = 16; n <= 24; n += 8) {
20377 for (size_t k = 1; k <= 20; k += 5) {
20378 GemmMicrokernelTester()
20379 .mr(1)
20380 .nr(8)
20381 .kr(1)
20382 .sr(1)
20383 .m(1)
20384 .n(n)
20385 .k(k)
20386 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20387 }
20388 }
20389 }
20390
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)20391 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
20392 for (uint32_t n = 16; n <= 24; n += 8) {
20393 for (size_t k = 1; k <= 20; k += 5) {
20394 GemmMicrokernelTester()
20395 .mr(1)
20396 .nr(8)
20397 .kr(1)
20398 .sr(1)
20399 .m(1)
20400 .n(n)
20401 .k(k)
20402 .cn_stride(11)
20403 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20404 }
20405 }
20406 }
20407
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_a)20408 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
20409 for (uint32_t n = 16; n <= 24; n += 8) {
20410 for (size_t k = 1; k <= 20; k += 5) {
20411 GemmMicrokernelTester()
20412 .mr(1)
20413 .nr(8)
20414 .kr(1)
20415 .sr(1)
20416 .m(1)
20417 .n(n)
20418 .k(k)
20419 .a_stride(23)
20420 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20421 }
20422 }
20423 }
20424
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)20425 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
20426 for (uint32_t n = 16; n <= 24; n += 8) {
20427 for (size_t k = 1; k <= 20; k += 5) {
20428 for (uint32_t m = 1; m <= 1; m++) {
20429 GemmMicrokernelTester()
20430 .mr(1)
20431 .nr(8)
20432 .kr(1)
20433 .sr(1)
20434 .m(m)
20435 .n(n)
20436 .k(k)
20437 .iterations(1)
20438 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20439 }
20440 }
20441 }
20442 }
20443
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)20444 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
20445 for (size_t k = 1; k <= 20; k += 5) {
20446 for (uint32_t n = 1; n <= 8; n++) {
20447 for (uint32_t m = 1; m <= 1; m++) {
20448 GemmMicrokernelTester()
20449 .mr(1)
20450 .nr(8)
20451 .kr(1)
20452 .sr(1)
20453 .m(m)
20454 .n(n)
20455 .k(k)
20456 .cm_stride(11)
20457 .iterations(1)
20458 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20459 }
20460 }
20461 }
20462 }
20463
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,qmin)20464 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmin) {
20465 GemmMicrokernelTester()
20466 .mr(1)
20467 .nr(8)
20468 .kr(1)
20469 .sr(1)
20470 .m(1)
20471 .n(8)
20472 .k(4)
20473 .qmin(128)
20474 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20475 }
20476
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,qmax)20477 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmax) {
20478 GemmMicrokernelTester()
20479 .mr(1)
20480 .nr(8)
20481 .kr(1)
20482 .sr(1)
20483 .m(1)
20484 .n(8)
20485 .k(4)
20486 .qmax(128)
20487 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20488 }
20489
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT,strided_cm)20490 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm) {
20491 GemmMicrokernelTester()
20492 .mr(1)
20493 .nr(8)
20494 .kr(1)
20495 .sr(1)
20496 .m(1)
20497 .n(8)
20498 .k(4)
20499 .cm_stride(11)
20500 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
20501 }
20502 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20503
20504
20505 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4)20506 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4) {
20507 GemmMicrokernelTester()
20508 .mr(1)
20509 .nr(8)
20510 .kr(1)
20511 .sr(1)
20512 .m(1)
20513 .n(8)
20514 .k(4)
20515 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20516 }
20517
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,strided_cn)20518 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cn) {
20519 GemmMicrokernelTester()
20520 .mr(1)
20521 .nr(8)
20522 .kr(1)
20523 .sr(1)
20524 .m(1)
20525 .n(8)
20526 .k(4)
20527 .cn_stride(11)
20528 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20529 }
20530
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4_strided_a)20531 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
20532 GemmMicrokernelTester()
20533 .mr(1)
20534 .nr(8)
20535 .kr(1)
20536 .sr(1)
20537 .m(1)
20538 .n(8)
20539 .k(4)
20540 .a_stride(7)
20541 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20542 }
20543
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)20544 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
20545 for (uint32_t n = 1; n <= 8; n++) {
20546 for (uint32_t m = 1; m <= 1; m++) {
20547 GemmMicrokernelTester()
20548 .mr(1)
20549 .nr(8)
20550 .kr(1)
20551 .sr(1)
20552 .m(m)
20553 .n(n)
20554 .k(4)
20555 .iterations(1)
20556 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20557 }
20558 }
20559 }
20560
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)20561 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
20562 for (uint32_t m = 1; m <= 1; m++) {
20563 GemmMicrokernelTester()
20564 .mr(1)
20565 .nr(8)
20566 .kr(1)
20567 .sr(1)
20568 .m(m)
20569 .n(8)
20570 .k(4)
20571 .iterations(1)
20572 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20573 }
20574 }
20575
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)20576 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
20577 for (uint32_t n = 1; n <= 8; n++) {
20578 GemmMicrokernelTester()
20579 .mr(1)
20580 .nr(8)
20581 .kr(1)
20582 .sr(1)
20583 .m(1)
20584 .n(n)
20585 .k(4)
20586 .iterations(1)
20587 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20588 }
20589 }
20590
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_lt_4)20591 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4) {
20592 for (size_t k = 1; k < 4; k++) {
20593 GemmMicrokernelTester()
20594 .mr(1)
20595 .nr(8)
20596 .kr(1)
20597 .sr(1)
20598 .m(1)
20599 .n(8)
20600 .k(k)
20601 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20602 }
20603 }
20604
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_lt_4_strided_a)20605 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
20606 for (size_t k = 1; k < 4; k++) {
20607 GemmMicrokernelTester()
20608 .mr(1)
20609 .nr(8)
20610 .kr(1)
20611 .sr(1)
20612 .m(1)
20613 .n(8)
20614 .k(k)
20615 .a_stride(7)
20616 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20617 }
20618 }
20619
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)20620 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
20621 for (size_t k = 1; k < 4; k++) {
20622 for (uint32_t n = 1; n <= 8; n++) {
20623 for (uint32_t m = 1; m <= 1; m++) {
20624 GemmMicrokernelTester()
20625 .mr(1)
20626 .nr(8)
20627 .kr(1)
20628 .sr(1)
20629 .m(m)
20630 .n(n)
20631 .k(k)
20632 .iterations(1)
20633 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20634 }
20635 }
20636 }
20637 }
20638
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_gt_4)20639 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4) {
20640 for (size_t k = 5; k < 8; k++) {
20641 GemmMicrokernelTester()
20642 .mr(1)
20643 .nr(8)
20644 .kr(1)
20645 .sr(1)
20646 .m(1)
20647 .n(8)
20648 .k(k)
20649 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20650 }
20651 }
20652
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_gt_4_strided_a)20653 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
20654 for (size_t k = 5; k < 8; k++) {
20655 GemmMicrokernelTester()
20656 .mr(1)
20657 .nr(8)
20658 .kr(1)
20659 .sr(1)
20660 .m(1)
20661 .n(8)
20662 .k(k)
20663 .a_stride(11)
20664 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20665 }
20666 }
20667
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)20668 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
20669 for (size_t k = 5; k < 8; k++) {
20670 for (uint32_t n = 1; n <= 8; n++) {
20671 for (uint32_t m = 1; m <= 1; m++) {
20672 GemmMicrokernelTester()
20673 .mr(1)
20674 .nr(8)
20675 .kr(1)
20676 .sr(1)
20677 .m(m)
20678 .n(n)
20679 .k(k)
20680 .iterations(1)
20681 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20682 }
20683 }
20684 }
20685 }
20686
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_div_4)20687 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4) {
20688 for (size_t k = 8; k <= 40; k += 4) {
20689 GemmMicrokernelTester()
20690 .mr(1)
20691 .nr(8)
20692 .kr(1)
20693 .sr(1)
20694 .m(1)
20695 .n(8)
20696 .k(k)
20697 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20698 }
20699 }
20700
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_div_4_strided_a)20701 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
20702 for (size_t k = 8; k <= 40; k += 4) {
20703 GemmMicrokernelTester()
20704 .mr(1)
20705 .nr(8)
20706 .kr(1)
20707 .sr(1)
20708 .m(1)
20709 .n(8)
20710 .k(k)
20711 .a_stride(43)
20712 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20713 }
20714 }
20715
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)20716 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
20717 for (size_t k = 8; k <= 40; k += 4) {
20718 for (uint32_t n = 1; n <= 8; n++) {
20719 for (uint32_t m = 1; m <= 1; m++) {
20720 GemmMicrokernelTester()
20721 .mr(1)
20722 .nr(8)
20723 .kr(1)
20724 .sr(1)
20725 .m(m)
20726 .n(n)
20727 .k(k)
20728 .iterations(1)
20729 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20730 }
20731 }
20732 }
20733 }
20734
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8)20735 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8) {
20736 for (uint32_t n = 9; n < 16; n++) {
20737 for (size_t k = 1; k <= 20; k += 5) {
20738 GemmMicrokernelTester()
20739 .mr(1)
20740 .nr(8)
20741 .kr(1)
20742 .sr(1)
20743 .m(1)
20744 .n(n)
20745 .k(k)
20746 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20747 }
20748 }
20749 }
20750
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)20751 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
20752 for (uint32_t n = 9; n < 16; n++) {
20753 for (size_t k = 1; k <= 20; k += 5) {
20754 GemmMicrokernelTester()
20755 .mr(1)
20756 .nr(8)
20757 .kr(1)
20758 .sr(1)
20759 .m(1)
20760 .n(n)
20761 .k(k)
20762 .cn_stride(11)
20763 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20764 }
20765 }
20766 }
20767
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_a)20768 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
20769 for (uint32_t n = 9; n < 16; n++) {
20770 for (size_t k = 1; k <= 20; k += 5) {
20771 GemmMicrokernelTester()
20772 .mr(1)
20773 .nr(8)
20774 .kr(1)
20775 .sr(1)
20776 .m(1)
20777 .n(n)
20778 .k(k)
20779 .a_stride(23)
20780 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20781 }
20782 }
20783 }
20784
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)20785 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
20786 for (uint32_t n = 9; n < 16; n++) {
20787 for (size_t k = 1; k <= 20; k += 5) {
20788 for (uint32_t m = 1; m <= 1; m++) {
20789 GemmMicrokernelTester()
20790 .mr(1)
20791 .nr(8)
20792 .kr(1)
20793 .sr(1)
20794 .m(m)
20795 .n(n)
20796 .k(k)
20797 .iterations(1)
20798 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20799 }
20800 }
20801 }
20802 }
20803
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8)20804 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8) {
20805 for (uint32_t n = 16; n <= 24; n += 8) {
20806 for (size_t k = 1; k <= 20; k += 5) {
20807 GemmMicrokernelTester()
20808 .mr(1)
20809 .nr(8)
20810 .kr(1)
20811 .sr(1)
20812 .m(1)
20813 .n(n)
20814 .k(k)
20815 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20816 }
20817 }
20818 }
20819
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)20820 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
20821 for (uint32_t n = 16; n <= 24; n += 8) {
20822 for (size_t k = 1; k <= 20; k += 5) {
20823 GemmMicrokernelTester()
20824 .mr(1)
20825 .nr(8)
20826 .kr(1)
20827 .sr(1)
20828 .m(1)
20829 .n(n)
20830 .k(k)
20831 .cn_stride(11)
20832 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20833 }
20834 }
20835 }
20836
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8_strided_a)20837 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
20838 for (uint32_t n = 16; n <= 24; n += 8) {
20839 for (size_t k = 1; k <= 20; k += 5) {
20840 GemmMicrokernelTester()
20841 .mr(1)
20842 .nr(8)
20843 .kr(1)
20844 .sr(1)
20845 .m(1)
20846 .n(n)
20847 .k(k)
20848 .a_stride(23)
20849 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20850 }
20851 }
20852 }
20853
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)20854 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
20855 for (uint32_t n = 16; n <= 24; n += 8) {
20856 for (size_t k = 1; k <= 20; k += 5) {
20857 for (uint32_t m = 1; m <= 1; m++) {
20858 GemmMicrokernelTester()
20859 .mr(1)
20860 .nr(8)
20861 .kr(1)
20862 .sr(1)
20863 .m(m)
20864 .n(n)
20865 .k(k)
20866 .iterations(1)
20867 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20868 }
20869 }
20870 }
20871 }
20872
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)20873 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
20874 for (size_t k = 1; k <= 20; k += 5) {
20875 for (uint32_t n = 1; n <= 8; n++) {
20876 for (uint32_t m = 1; m <= 1; m++) {
20877 GemmMicrokernelTester()
20878 .mr(1)
20879 .nr(8)
20880 .kr(1)
20881 .sr(1)
20882 .m(m)
20883 .n(n)
20884 .k(k)
20885 .cm_stride(11)
20886 .iterations(1)
20887 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20888 }
20889 }
20890 }
20891 }
20892
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,qmin)20893 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmin) {
20894 GemmMicrokernelTester()
20895 .mr(1)
20896 .nr(8)
20897 .kr(1)
20898 .sr(1)
20899 .m(1)
20900 .n(8)
20901 .k(4)
20902 .qmin(128)
20903 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20904 }
20905
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,qmax)20906 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmax) {
20907 GemmMicrokernelTester()
20908 .mr(1)
20909 .nr(8)
20910 .kr(1)
20911 .sr(1)
20912 .m(1)
20913 .n(8)
20914 .k(4)
20915 .qmax(128)
20916 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20917 }
20918
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT,strided_cm)20919 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm) {
20920 GemmMicrokernelTester()
20921 .mr(1)
20922 .nr(8)
20923 .kr(1)
20924 .sr(1)
20925 .m(1)
20926 .n(8)
20927 .k(4)
20928 .cm_stride(11)
20929 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
20930 }
20931 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20932
20933
20934 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)20935 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
20936 GemmMicrokernelTester()
20937 .mr(3)
20938 .nr(8)
20939 .kr(1)
20940 .sr(1)
20941 .m(3)
20942 .n(8)
20943 .k(1)
20944 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20945 }
20946
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)20947 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
20948 GemmMicrokernelTester()
20949 .mr(3)
20950 .nr(8)
20951 .kr(1)
20952 .sr(1)
20953 .m(3)
20954 .n(8)
20955 .k(1)
20956 .cn_stride(11)
20957 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20958 }
20959
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_strided_a)20960 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
20961 GemmMicrokernelTester()
20962 .mr(3)
20963 .nr(8)
20964 .kr(1)
20965 .sr(1)
20966 .m(3)
20967 .n(8)
20968 .k(1)
20969 .a_stride(3)
20970 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20971 }
20972
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)20973 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
20974 for (uint32_t n = 1; n <= 8; n++) {
20975 for (uint32_t m = 1; m <= 3; m++) {
20976 GemmMicrokernelTester()
20977 .mr(3)
20978 .nr(8)
20979 .kr(1)
20980 .sr(1)
20981 .m(m)
20982 .n(n)
20983 .k(1)
20984 .iterations(1)
20985 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20986 }
20987 }
20988 }
20989
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)20990 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
20991 for (uint32_t m = 1; m <= 3; m++) {
20992 GemmMicrokernelTester()
20993 .mr(3)
20994 .nr(8)
20995 .kr(1)
20996 .sr(1)
20997 .m(m)
20998 .n(8)
20999 .k(1)
21000 .iterations(1)
21001 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21002 }
21003 }
21004
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)21005 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
21006 for (uint32_t n = 1; n <= 8; n++) {
21007 GemmMicrokernelTester()
21008 .mr(3)
21009 .nr(8)
21010 .kr(1)
21011 .sr(1)
21012 .m(3)
21013 .n(n)
21014 .k(1)
21015 .iterations(1)
21016 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21017 }
21018 }
21019
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)21020 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
21021 for (size_t k = 2; k < 10; k++) {
21022 GemmMicrokernelTester()
21023 .mr(3)
21024 .nr(8)
21025 .kr(1)
21026 .sr(1)
21027 .m(3)
21028 .n(8)
21029 .k(k)
21030 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21031 }
21032 }
21033
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_strided_a)21034 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
21035 for (size_t k = 2; k < 10; k++) {
21036 GemmMicrokernelTester()
21037 .mr(3)
21038 .nr(8)
21039 .kr(1)
21040 .sr(1)
21041 .m(3)
21042 .n(8)
21043 .k(k)
21044 .a_stride(11)
21045 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21046 }
21047 }
21048
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)21049 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
21050 for (size_t k = 2; k < 10; k++) {
21051 for (uint32_t n = 1; n <= 8; n++) {
21052 for (uint32_t m = 1; m <= 3; m++) {
21053 GemmMicrokernelTester()
21054 .mr(3)
21055 .nr(8)
21056 .kr(1)
21057 .sr(1)
21058 .m(m)
21059 .n(n)
21060 .k(k)
21061 .iterations(1)
21062 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21063 }
21064 }
21065 }
21066 }
21067
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)21068 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
21069 for (uint32_t n = 9; n < 16; n++) {
21070 for (size_t k = 1; k <= 5; k += 2) {
21071 GemmMicrokernelTester()
21072 .mr(3)
21073 .nr(8)
21074 .kr(1)
21075 .sr(1)
21076 .m(3)
21077 .n(n)
21078 .k(k)
21079 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21080 }
21081 }
21082 }
21083
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)21084 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
21085 for (uint32_t n = 9; n < 16; n++) {
21086 for (size_t k = 1; k <= 5; k += 2) {
21087 GemmMicrokernelTester()
21088 .mr(3)
21089 .nr(8)
21090 .kr(1)
21091 .sr(1)
21092 .m(3)
21093 .n(n)
21094 .k(k)
21095 .cn_stride(11)
21096 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21097 }
21098 }
21099 }
21100
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_a)21101 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
21102 for (uint32_t n = 9; n < 16; n++) {
21103 for (size_t k = 1; k <= 5; k += 2) {
21104 GemmMicrokernelTester()
21105 .mr(3)
21106 .nr(8)
21107 .kr(1)
21108 .sr(1)
21109 .m(3)
21110 .n(n)
21111 .k(k)
21112 .a_stride(7)
21113 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21114 }
21115 }
21116 }
21117
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)21118 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
21119 for (uint32_t n = 9; n < 16; n++) {
21120 for (size_t k = 1; k <= 5; k += 2) {
21121 for (uint32_t m = 1; m <= 3; m++) {
21122 GemmMicrokernelTester()
21123 .mr(3)
21124 .nr(8)
21125 .kr(1)
21126 .sr(1)
21127 .m(m)
21128 .n(n)
21129 .k(k)
21130 .iterations(1)
21131 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21132 }
21133 }
21134 }
21135 }
21136
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)21137 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
21138 for (uint32_t n = 16; n <= 24; n += 8) {
21139 for (size_t k = 1; k <= 5; k += 2) {
21140 GemmMicrokernelTester()
21141 .mr(3)
21142 .nr(8)
21143 .kr(1)
21144 .sr(1)
21145 .m(3)
21146 .n(n)
21147 .k(k)
21148 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21149 }
21150 }
21151 }
21152
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)21153 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
21154 for (uint32_t n = 16; n <= 24; n += 8) {
21155 for (size_t k = 1; k <= 5; k += 2) {
21156 GemmMicrokernelTester()
21157 .mr(3)
21158 .nr(8)
21159 .kr(1)
21160 .sr(1)
21161 .m(3)
21162 .n(n)
21163 .k(k)
21164 .cn_stride(11)
21165 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21166 }
21167 }
21168 }
21169
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_a)21170 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
21171 for (uint32_t n = 16; n <= 24; n += 8) {
21172 for (size_t k = 1; k <= 5; k += 2) {
21173 GemmMicrokernelTester()
21174 .mr(3)
21175 .nr(8)
21176 .kr(1)
21177 .sr(1)
21178 .m(3)
21179 .n(n)
21180 .k(k)
21181 .a_stride(7)
21182 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21183 }
21184 }
21185 }
21186
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)21187 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
21188 for (uint32_t n = 16; n <= 24; n += 8) {
21189 for (size_t k = 1; k <= 5; k += 2) {
21190 for (uint32_t m = 1; m <= 3; m++) {
21191 GemmMicrokernelTester()
21192 .mr(3)
21193 .nr(8)
21194 .kr(1)
21195 .sr(1)
21196 .m(m)
21197 .n(n)
21198 .k(k)
21199 .iterations(1)
21200 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21201 }
21202 }
21203 }
21204 }
21205
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)21206 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
21207 for (size_t k = 1; k <= 5; k += 2) {
21208 for (uint32_t n = 1; n <= 8; n++) {
21209 for (uint32_t m = 1; m <= 3; m++) {
21210 GemmMicrokernelTester()
21211 .mr(3)
21212 .nr(8)
21213 .kr(1)
21214 .sr(1)
21215 .m(m)
21216 .n(n)
21217 .k(k)
21218 .cm_stride(11)
21219 .iterations(1)
21220 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21221 }
21222 }
21223 }
21224 }
21225
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,qmin)21226 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
21227 GemmMicrokernelTester()
21228 .mr(3)
21229 .nr(8)
21230 .kr(1)
21231 .sr(1)
21232 .m(3)
21233 .n(8)
21234 .k(1)
21235 .qmin(128)
21236 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21237 }
21238
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,qmax)21239 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
21240 GemmMicrokernelTester()
21241 .mr(3)
21242 .nr(8)
21243 .kr(1)
21244 .sr(1)
21245 .m(3)
21246 .n(8)
21247 .k(1)
21248 .qmax(128)
21249 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21250 }
21251
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)21252 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
21253 GemmMicrokernelTester()
21254 .mr(3)
21255 .nr(8)
21256 .kr(1)
21257 .sr(1)
21258 .m(3)
21259 .n(8)
21260 .k(1)
21261 .cm_stride(11)
21262 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21263 }
21264 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21265
21266
21267 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4)21268 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4) {
21269 GemmMicrokernelTester()
21270 .mr(3)
21271 .nr(8)
21272 .kr(1)
21273 .sr(4)
21274 .m(3)
21275 .n(8)
21276 .k(4)
21277 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21278 }
21279
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,strided_cn)21280 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, strided_cn) {
21281 GemmMicrokernelTester()
21282 .mr(3)
21283 .nr(8)
21284 .kr(1)
21285 .sr(4)
21286 .m(3)
21287 .n(8)
21288 .k(4)
21289 .cn_stride(11)
21290 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21291 }
21292
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4_strided_a)21293 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
21294 GemmMicrokernelTester()
21295 .mr(3)
21296 .nr(8)
21297 .kr(1)
21298 .sr(4)
21299 .m(3)
21300 .n(8)
21301 .k(4)
21302 .a_stride(7)
21303 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21304 }
21305
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4_subtile)21306 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
21307 for (uint32_t n = 1; n <= 8; n++) {
21308 for (uint32_t m = 1; m <= 3; m++) {
21309 GemmMicrokernelTester()
21310 .mr(3)
21311 .nr(8)
21312 .kr(1)
21313 .sr(4)
21314 .m(m)
21315 .n(n)
21316 .k(4)
21317 .iterations(1)
21318 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21319 }
21320 }
21321 }
21322
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)21323 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
21324 for (uint32_t m = 1; m <= 3; m++) {
21325 GemmMicrokernelTester()
21326 .mr(3)
21327 .nr(8)
21328 .kr(1)
21329 .sr(4)
21330 .m(m)
21331 .n(8)
21332 .k(4)
21333 .iterations(1)
21334 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21335 }
21336 }
21337
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)21338 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
21339 for (uint32_t n = 1; n <= 8; n++) {
21340 GemmMicrokernelTester()
21341 .mr(3)
21342 .nr(8)
21343 .kr(1)
21344 .sr(4)
21345 .m(3)
21346 .n(n)
21347 .k(4)
21348 .iterations(1)
21349 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21350 }
21351 }
21352
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_lt_4)21353 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4) {
21354 for (size_t k = 1; k < 4; k++) {
21355 GemmMicrokernelTester()
21356 .mr(3)
21357 .nr(8)
21358 .kr(1)
21359 .sr(4)
21360 .m(3)
21361 .n(8)
21362 .k(k)
21363 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21364 }
21365 }
21366
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_lt_4_strided_a)21367 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
21368 for (size_t k = 1; k < 4; k++) {
21369 GemmMicrokernelTester()
21370 .mr(3)
21371 .nr(8)
21372 .kr(1)
21373 .sr(4)
21374 .m(3)
21375 .n(8)
21376 .k(k)
21377 .a_stride(7)
21378 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21379 }
21380 }
21381
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_lt_4_subtile)21382 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
21383 for (size_t k = 1; k < 4; k++) {
21384 for (uint32_t n = 1; n <= 8; n++) {
21385 for (uint32_t m = 1; m <= 3; m++) {
21386 GemmMicrokernelTester()
21387 .mr(3)
21388 .nr(8)
21389 .kr(1)
21390 .sr(4)
21391 .m(m)
21392 .n(n)
21393 .k(k)
21394 .iterations(1)
21395 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21396 }
21397 }
21398 }
21399 }
21400
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_gt_4)21401 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4) {
21402 for (size_t k = 5; k < 8; k++) {
21403 GemmMicrokernelTester()
21404 .mr(3)
21405 .nr(8)
21406 .kr(1)
21407 .sr(4)
21408 .m(3)
21409 .n(8)
21410 .k(k)
21411 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21412 }
21413 }
21414
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_gt_4_strided_a)21415 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
21416 for (size_t k = 5; k < 8; k++) {
21417 GemmMicrokernelTester()
21418 .mr(3)
21419 .nr(8)
21420 .kr(1)
21421 .sr(4)
21422 .m(3)
21423 .n(8)
21424 .k(k)
21425 .a_stride(11)
21426 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21427 }
21428 }
21429
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_gt_4_subtile)21430 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
21431 for (size_t k = 5; k < 8; k++) {
21432 for (uint32_t n = 1; n <= 8; n++) {
21433 for (uint32_t m = 1; m <= 3; m++) {
21434 GemmMicrokernelTester()
21435 .mr(3)
21436 .nr(8)
21437 .kr(1)
21438 .sr(4)
21439 .m(m)
21440 .n(n)
21441 .k(k)
21442 .iterations(1)
21443 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21444 }
21445 }
21446 }
21447 }
21448
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_div_4)21449 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4) {
21450 for (size_t k = 8; k <= 40; k += 4) {
21451 GemmMicrokernelTester()
21452 .mr(3)
21453 .nr(8)
21454 .kr(1)
21455 .sr(4)
21456 .m(3)
21457 .n(8)
21458 .k(k)
21459 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21460 }
21461 }
21462
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_div_4_strided_a)21463 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
21464 for (size_t k = 8; k <= 40; k += 4) {
21465 GemmMicrokernelTester()
21466 .mr(3)
21467 .nr(8)
21468 .kr(1)
21469 .sr(4)
21470 .m(3)
21471 .n(8)
21472 .k(k)
21473 .a_stride(43)
21474 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21475 }
21476 }
21477
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,k_div_4_subtile)21478 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_subtile) {
21479 for (size_t k = 8; k <= 40; k += 4) {
21480 for (uint32_t n = 1; n <= 8; n++) {
21481 for (uint32_t m = 1; m <= 3; m++) {
21482 GemmMicrokernelTester()
21483 .mr(3)
21484 .nr(8)
21485 .kr(1)
21486 .sr(4)
21487 .m(m)
21488 .n(n)
21489 .k(k)
21490 .iterations(1)
21491 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21492 }
21493 }
21494 }
21495 }
21496
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8)21497 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8) {
21498 for (uint32_t n = 9; n < 16; n++) {
21499 for (size_t k = 1; k <= 20; k += 5) {
21500 GemmMicrokernelTester()
21501 .mr(3)
21502 .nr(8)
21503 .kr(1)
21504 .sr(4)
21505 .m(3)
21506 .n(n)
21507 .k(k)
21508 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21509 }
21510 }
21511 }
21512
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)21513 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
21514 for (uint32_t n = 9; n < 16; n++) {
21515 for (size_t k = 1; k <= 20; k += 5) {
21516 GemmMicrokernelTester()
21517 .mr(3)
21518 .nr(8)
21519 .kr(1)
21520 .sr(4)
21521 .m(3)
21522 .n(n)
21523 .k(k)
21524 .cn_stride(11)
21525 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21526 }
21527 }
21528 }
21529
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8_strided_a)21530 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
21531 for (uint32_t n = 9; n < 16; n++) {
21532 for (size_t k = 1; k <= 20; k += 5) {
21533 GemmMicrokernelTester()
21534 .mr(3)
21535 .nr(8)
21536 .kr(1)
21537 .sr(4)
21538 .m(3)
21539 .n(n)
21540 .k(k)
21541 .a_stride(23)
21542 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21543 }
21544 }
21545 }
21546
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_gt_8_subtile)21547 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
21548 for (uint32_t n = 9; n < 16; n++) {
21549 for (size_t k = 1; k <= 20; k += 5) {
21550 for (uint32_t m = 1; m <= 3; m++) {
21551 GemmMicrokernelTester()
21552 .mr(3)
21553 .nr(8)
21554 .kr(1)
21555 .sr(4)
21556 .m(m)
21557 .n(n)
21558 .k(k)
21559 .iterations(1)
21560 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21561 }
21562 }
21563 }
21564 }
21565
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8)21566 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8) {
21567 for (uint32_t n = 16; n <= 24; n += 8) {
21568 for (size_t k = 1; k <= 20; k += 5) {
21569 GemmMicrokernelTester()
21570 .mr(3)
21571 .nr(8)
21572 .kr(1)
21573 .sr(4)
21574 .m(3)
21575 .n(n)
21576 .k(k)
21577 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21578 }
21579 }
21580 }
21581
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8_strided_cn)21582 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
21583 for (uint32_t n = 16; n <= 24; n += 8) {
21584 for (size_t k = 1; k <= 20; k += 5) {
21585 GemmMicrokernelTester()
21586 .mr(3)
21587 .nr(8)
21588 .kr(1)
21589 .sr(4)
21590 .m(3)
21591 .n(n)
21592 .k(k)
21593 .cn_stride(11)
21594 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21595 }
21596 }
21597 }
21598
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8_strided_a)21599 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
21600 for (uint32_t n = 16; n <= 24; n += 8) {
21601 for (size_t k = 1; k <= 20; k += 5) {
21602 GemmMicrokernelTester()
21603 .mr(3)
21604 .nr(8)
21605 .kr(1)
21606 .sr(4)
21607 .m(3)
21608 .n(n)
21609 .k(k)
21610 .a_stride(23)
21611 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21612 }
21613 }
21614 }
21615
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,n_div_8_subtile)21616 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_subtile) {
21617 for (uint32_t n = 16; n <= 24; n += 8) {
21618 for (size_t k = 1; k <= 20; k += 5) {
21619 for (uint32_t m = 1; m <= 3; m++) {
21620 GemmMicrokernelTester()
21621 .mr(3)
21622 .nr(8)
21623 .kr(1)
21624 .sr(4)
21625 .m(m)
21626 .n(n)
21627 .k(k)
21628 .iterations(1)
21629 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21630 }
21631 }
21632 }
21633 }
21634
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,strided_cm_subtile)21635 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm_subtile) {
21636 for (size_t k = 1; k <= 20; k += 5) {
21637 for (uint32_t n = 1; n <= 8; n++) {
21638 for (uint32_t m = 1; m <= 3; m++) {
21639 GemmMicrokernelTester()
21640 .mr(3)
21641 .nr(8)
21642 .kr(1)
21643 .sr(4)
21644 .m(m)
21645 .n(n)
21646 .k(k)
21647 .cm_stride(11)
21648 .iterations(1)
21649 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21650 }
21651 }
21652 }
21653 }
21654
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,qmin)21655 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, qmin) {
21656 GemmMicrokernelTester()
21657 .mr(3)
21658 .nr(8)
21659 .kr(1)
21660 .sr(4)
21661 .m(3)
21662 .n(8)
21663 .k(4)
21664 .qmin(128)
21665 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21666 }
21667
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,qmax)21668 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, qmax) {
21669 GemmMicrokernelTester()
21670 .mr(3)
21671 .nr(8)
21672 .kr(1)
21673 .sr(4)
21674 .m(3)
21675 .n(8)
21676 .k(4)
21677 .qmax(128)
21678 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21679 }
21680
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM,strided_cm)21681 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm) {
21682 GemmMicrokernelTester()
21683 .mr(3)
21684 .nr(8)
21685 .kr(1)
21686 .sr(4)
21687 .m(3)
21688 .n(8)
21689 .k(4)
21690 .cm_stride(11)
21691 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21692 }
21693 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21694
21695
21696 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4)21697 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4) {
21698 GemmMicrokernelTester()
21699 .mr(3)
21700 .nr(8)
21701 .kr(1)
21702 .sr(4)
21703 .m(3)
21704 .n(8)
21705 .k(4)
21706 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21707 }
21708
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,strided_cn)21709 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, strided_cn) {
21710 GemmMicrokernelTester()
21711 .mr(3)
21712 .nr(8)
21713 .kr(1)
21714 .sr(4)
21715 .m(3)
21716 .n(8)
21717 .k(4)
21718 .cn_stride(11)
21719 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21720 }
21721
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4_strided_a)21722 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
21723 GemmMicrokernelTester()
21724 .mr(3)
21725 .nr(8)
21726 .kr(1)
21727 .sr(4)
21728 .m(3)
21729 .n(8)
21730 .k(4)
21731 .a_stride(7)
21732 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21733 }
21734
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4_subtile)21735 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile) {
21736 for (uint32_t n = 1; n <= 8; n++) {
21737 for (uint32_t m = 1; m <= 3; m++) {
21738 GemmMicrokernelTester()
21739 .mr(3)
21740 .nr(8)
21741 .kr(1)
21742 .sr(4)
21743 .m(m)
21744 .n(n)
21745 .k(4)
21746 .iterations(1)
21747 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21748 }
21749 }
21750 }
21751
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4_subtile_m)21752 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
21753 for (uint32_t m = 1; m <= 3; m++) {
21754 GemmMicrokernelTester()
21755 .mr(3)
21756 .nr(8)
21757 .kr(1)
21758 .sr(4)
21759 .m(m)
21760 .n(8)
21761 .k(4)
21762 .iterations(1)
21763 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21764 }
21765 }
21766
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_eq_4_subtile_n)21767 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
21768 for (uint32_t n = 1; n <= 8; n++) {
21769 GemmMicrokernelTester()
21770 .mr(3)
21771 .nr(8)
21772 .kr(1)
21773 .sr(4)
21774 .m(3)
21775 .n(n)
21776 .k(4)
21777 .iterations(1)
21778 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21779 }
21780 }
21781
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_lt_4)21782 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4) {
21783 for (size_t k = 1; k < 4; k++) {
21784 GemmMicrokernelTester()
21785 .mr(3)
21786 .nr(8)
21787 .kr(1)
21788 .sr(4)
21789 .m(3)
21790 .n(8)
21791 .k(k)
21792 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21793 }
21794 }
21795
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_lt_4_strided_a)21796 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
21797 for (size_t k = 1; k < 4; k++) {
21798 GemmMicrokernelTester()
21799 .mr(3)
21800 .nr(8)
21801 .kr(1)
21802 .sr(4)
21803 .m(3)
21804 .n(8)
21805 .k(k)
21806 .a_stride(7)
21807 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21808 }
21809 }
21810
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_lt_4_subtile)21811 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_subtile) {
21812 for (size_t k = 1; k < 4; k++) {
21813 for (uint32_t n = 1; n <= 8; n++) {
21814 for (uint32_t m = 1; m <= 3; m++) {
21815 GemmMicrokernelTester()
21816 .mr(3)
21817 .nr(8)
21818 .kr(1)
21819 .sr(4)
21820 .m(m)
21821 .n(n)
21822 .k(k)
21823 .iterations(1)
21824 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21825 }
21826 }
21827 }
21828 }
21829
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_gt_4)21830 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4) {
21831 for (size_t k = 5; k < 8; k++) {
21832 GemmMicrokernelTester()
21833 .mr(3)
21834 .nr(8)
21835 .kr(1)
21836 .sr(4)
21837 .m(3)
21838 .n(8)
21839 .k(k)
21840 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21841 }
21842 }
21843
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_gt_4_strided_a)21844 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
21845 for (size_t k = 5; k < 8; k++) {
21846 GemmMicrokernelTester()
21847 .mr(3)
21848 .nr(8)
21849 .kr(1)
21850 .sr(4)
21851 .m(3)
21852 .n(8)
21853 .k(k)
21854 .a_stride(11)
21855 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21856 }
21857 }
21858
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_gt_4_subtile)21859 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_subtile) {
21860 for (size_t k = 5; k < 8; k++) {
21861 for (uint32_t n = 1; n <= 8; n++) {
21862 for (uint32_t m = 1; m <= 3; m++) {
21863 GemmMicrokernelTester()
21864 .mr(3)
21865 .nr(8)
21866 .kr(1)
21867 .sr(4)
21868 .m(m)
21869 .n(n)
21870 .k(k)
21871 .iterations(1)
21872 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21873 }
21874 }
21875 }
21876 }
21877
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_div_4)21878 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_div_4) {
21879 for (size_t k = 8; k <= 40; k += 4) {
21880 GemmMicrokernelTester()
21881 .mr(3)
21882 .nr(8)
21883 .kr(1)
21884 .sr(4)
21885 .m(3)
21886 .n(8)
21887 .k(k)
21888 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21889 }
21890 }
21891
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_div_4_strided_a)21892 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_strided_a) {
21893 for (size_t k = 8; k <= 40; k += 4) {
21894 GemmMicrokernelTester()
21895 .mr(3)
21896 .nr(8)
21897 .kr(1)
21898 .sr(4)
21899 .m(3)
21900 .n(8)
21901 .k(k)
21902 .a_stride(43)
21903 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21904 }
21905 }
21906
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,k_div_4_subtile)21907 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_subtile) {
21908 for (size_t k = 8; k <= 40; k += 4) {
21909 for (uint32_t n = 1; n <= 8; n++) {
21910 for (uint32_t m = 1; m <= 3; m++) {
21911 GemmMicrokernelTester()
21912 .mr(3)
21913 .nr(8)
21914 .kr(1)
21915 .sr(4)
21916 .m(m)
21917 .n(n)
21918 .k(k)
21919 .iterations(1)
21920 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21921 }
21922 }
21923 }
21924 }
21925
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8)21926 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8) {
21927 for (uint32_t n = 9; n < 16; n++) {
21928 for (size_t k = 1; k <= 20; k += 5) {
21929 GemmMicrokernelTester()
21930 .mr(3)
21931 .nr(8)
21932 .kr(1)
21933 .sr(4)
21934 .m(3)
21935 .n(n)
21936 .k(k)
21937 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21938 }
21939 }
21940 }
21941
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8_strided_cn)21942 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
21943 for (uint32_t n = 9; n < 16; n++) {
21944 for (size_t k = 1; k <= 20; k += 5) {
21945 GemmMicrokernelTester()
21946 .mr(3)
21947 .nr(8)
21948 .kr(1)
21949 .sr(4)
21950 .m(3)
21951 .n(n)
21952 .k(k)
21953 .cn_stride(11)
21954 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21955 }
21956 }
21957 }
21958
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8_strided_a)21959 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
21960 for (uint32_t n = 9; n < 16; n++) {
21961 for (size_t k = 1; k <= 20; k += 5) {
21962 GemmMicrokernelTester()
21963 .mr(3)
21964 .nr(8)
21965 .kr(1)
21966 .sr(4)
21967 .m(3)
21968 .n(n)
21969 .k(k)
21970 .a_stride(23)
21971 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21972 }
21973 }
21974 }
21975
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_gt_8_subtile)21976 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_subtile) {
21977 for (uint32_t n = 9; n < 16; n++) {
21978 for (size_t k = 1; k <= 20; k += 5) {
21979 for (uint32_t m = 1; m <= 3; m++) {
21980 GemmMicrokernelTester()
21981 .mr(3)
21982 .nr(8)
21983 .kr(1)
21984 .sr(4)
21985 .m(m)
21986 .n(n)
21987 .k(k)
21988 .iterations(1)
21989 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
21990 }
21991 }
21992 }
21993 }
21994
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_div_8)21995 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8) {
21996 for (uint32_t n = 16; n <= 24; n += 8) {
21997 for (size_t k = 1; k <= 20; k += 5) {
21998 GemmMicrokernelTester()
21999 .mr(3)
22000 .nr(8)
22001 .kr(1)
22002 .sr(4)
22003 .m(3)
22004 .n(n)
22005 .k(k)
22006 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22007 }
22008 }
22009 }
22010
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_div_8_strided_cn)22011 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
22012 for (uint32_t n = 16; n <= 24; n += 8) {
22013 for (size_t k = 1; k <= 20; k += 5) {
22014 GemmMicrokernelTester()
22015 .mr(3)
22016 .nr(8)
22017 .kr(1)
22018 .sr(4)
22019 .m(3)
22020 .n(n)
22021 .k(k)
22022 .cn_stride(11)
22023 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22024 }
22025 }
22026 }
22027
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_div_8_strided_a)22028 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_a) {
22029 for (uint32_t n = 16; n <= 24; n += 8) {
22030 for (size_t k = 1; k <= 20; k += 5) {
22031 GemmMicrokernelTester()
22032 .mr(3)
22033 .nr(8)
22034 .kr(1)
22035 .sr(4)
22036 .m(3)
22037 .n(n)
22038 .k(k)
22039 .a_stride(23)
22040 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22041 }
22042 }
22043 }
22044
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,n_div_8_subtile)22045 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_subtile) {
22046 for (uint32_t n = 16; n <= 24; n += 8) {
22047 for (size_t k = 1; k <= 20; k += 5) {
22048 for (uint32_t m = 1; m <= 3; m++) {
22049 GemmMicrokernelTester()
22050 .mr(3)
22051 .nr(8)
22052 .kr(1)
22053 .sr(4)
22054 .m(m)
22055 .n(n)
22056 .k(k)
22057 .iterations(1)
22058 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22059 }
22060 }
22061 }
22062 }
22063
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,strided_cm_subtile)22064 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, strided_cm_subtile) {
22065 for (size_t k = 1; k <= 20; k += 5) {
22066 for (uint32_t n = 1; n <= 8; n++) {
22067 for (uint32_t m = 1; m <= 3; m++) {
22068 GemmMicrokernelTester()
22069 .mr(3)
22070 .nr(8)
22071 .kr(1)
22072 .sr(4)
22073 .m(m)
22074 .n(n)
22075 .k(k)
22076 .cm_stride(11)
22077 .iterations(1)
22078 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22079 }
22080 }
22081 }
22082 }
22083
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,qmin)22084 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, qmin) {
22085 GemmMicrokernelTester()
22086 .mr(3)
22087 .nr(8)
22088 .kr(1)
22089 .sr(4)
22090 .m(3)
22091 .n(8)
22092 .k(4)
22093 .qmin(128)
22094 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22095 }
22096
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,qmax)22097 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, qmax) {
22098 GemmMicrokernelTester()
22099 .mr(3)
22100 .nr(8)
22101 .kr(1)
22102 .sr(4)
22103 .m(3)
22104 .n(8)
22105 .k(4)
22106 .qmax(128)
22107 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22108 }
22109
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86,strided_cm)22110 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, strided_cm) {
22111 GemmMicrokernelTester()
22112 .mr(3)
22113 .nr(8)
22114 .kr(1)
22115 .sr(4)
22116 .m(3)
22117 .n(8)
22118 .k(4)
22119 .cm_stride(11)
22120 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22121 }
22122 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22123
22124
22125 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)22126 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
22127 GemmMicrokernelTester()
22128 .mr(4)
22129 .nr(8)
22130 .kr(1)
22131 .sr(1)
22132 .m(4)
22133 .n(8)
22134 .k(1)
22135 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22136 }
22137
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)22138 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
22139 GemmMicrokernelTester()
22140 .mr(4)
22141 .nr(8)
22142 .kr(1)
22143 .sr(1)
22144 .m(4)
22145 .n(8)
22146 .k(1)
22147 .cn_stride(11)
22148 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22149 }
22150
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_strided_a)22151 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
22152 GemmMicrokernelTester()
22153 .mr(4)
22154 .nr(8)
22155 .kr(1)
22156 .sr(1)
22157 .m(4)
22158 .n(8)
22159 .k(1)
22160 .a_stride(3)
22161 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22162 }
22163
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)22164 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
22165 for (uint32_t n = 1; n <= 8; n++) {
22166 for (uint32_t m = 1; m <= 4; m++) {
22167 GemmMicrokernelTester()
22168 .mr(4)
22169 .nr(8)
22170 .kr(1)
22171 .sr(1)
22172 .m(m)
22173 .n(n)
22174 .k(1)
22175 .iterations(1)
22176 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22177 }
22178 }
22179 }
22180
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)22181 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
22182 for (uint32_t m = 1; m <= 4; m++) {
22183 GemmMicrokernelTester()
22184 .mr(4)
22185 .nr(8)
22186 .kr(1)
22187 .sr(1)
22188 .m(m)
22189 .n(8)
22190 .k(1)
22191 .iterations(1)
22192 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22193 }
22194 }
22195
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)22196 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
22197 for (uint32_t n = 1; n <= 8; n++) {
22198 GemmMicrokernelTester()
22199 .mr(4)
22200 .nr(8)
22201 .kr(1)
22202 .sr(1)
22203 .m(4)
22204 .n(n)
22205 .k(1)
22206 .iterations(1)
22207 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22208 }
22209 }
22210
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)22211 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
22212 for (size_t k = 2; k < 10; k++) {
22213 GemmMicrokernelTester()
22214 .mr(4)
22215 .nr(8)
22216 .kr(1)
22217 .sr(1)
22218 .m(4)
22219 .n(8)
22220 .k(k)
22221 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22222 }
22223 }
22224
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_strided_a)22225 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
22226 for (size_t k = 2; k < 10; k++) {
22227 GemmMicrokernelTester()
22228 .mr(4)
22229 .nr(8)
22230 .kr(1)
22231 .sr(1)
22232 .m(4)
22233 .n(8)
22234 .k(k)
22235 .a_stride(11)
22236 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22237 }
22238 }
22239
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)22240 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
22241 for (size_t k = 2; k < 10; k++) {
22242 for (uint32_t n = 1; n <= 8; n++) {
22243 for (uint32_t m = 1; m <= 4; m++) {
22244 GemmMicrokernelTester()
22245 .mr(4)
22246 .nr(8)
22247 .kr(1)
22248 .sr(1)
22249 .m(m)
22250 .n(n)
22251 .k(k)
22252 .iterations(1)
22253 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22254 }
22255 }
22256 }
22257 }
22258
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)22259 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
22260 for (uint32_t n = 9; n < 16; n++) {
22261 for (size_t k = 1; k <= 5; k += 2) {
22262 GemmMicrokernelTester()
22263 .mr(4)
22264 .nr(8)
22265 .kr(1)
22266 .sr(1)
22267 .m(4)
22268 .n(n)
22269 .k(k)
22270 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22271 }
22272 }
22273 }
22274
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)22275 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
22276 for (uint32_t n = 9; n < 16; n++) {
22277 for (size_t k = 1; k <= 5; k += 2) {
22278 GemmMicrokernelTester()
22279 .mr(4)
22280 .nr(8)
22281 .kr(1)
22282 .sr(1)
22283 .m(4)
22284 .n(n)
22285 .k(k)
22286 .cn_stride(11)
22287 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22288 }
22289 }
22290 }
22291
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_a)22292 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
22293 for (uint32_t n = 9; n < 16; n++) {
22294 for (size_t k = 1; k <= 5; k += 2) {
22295 GemmMicrokernelTester()
22296 .mr(4)
22297 .nr(8)
22298 .kr(1)
22299 .sr(1)
22300 .m(4)
22301 .n(n)
22302 .k(k)
22303 .a_stride(7)
22304 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22305 }
22306 }
22307 }
22308
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)22309 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
22310 for (uint32_t n = 9; n < 16; n++) {
22311 for (size_t k = 1; k <= 5; k += 2) {
22312 for (uint32_t m = 1; m <= 4; m++) {
22313 GemmMicrokernelTester()
22314 .mr(4)
22315 .nr(8)
22316 .kr(1)
22317 .sr(1)
22318 .m(m)
22319 .n(n)
22320 .k(k)
22321 .iterations(1)
22322 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22323 }
22324 }
22325 }
22326 }
22327
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)22328 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
22329 for (uint32_t n = 16; n <= 24; n += 8) {
22330 for (size_t k = 1; k <= 5; k += 2) {
22331 GemmMicrokernelTester()
22332 .mr(4)
22333 .nr(8)
22334 .kr(1)
22335 .sr(1)
22336 .m(4)
22337 .n(n)
22338 .k(k)
22339 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22340 }
22341 }
22342 }
22343
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)22344 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
22345 for (uint32_t n = 16; n <= 24; n += 8) {
22346 for (size_t k = 1; k <= 5; k += 2) {
22347 GemmMicrokernelTester()
22348 .mr(4)
22349 .nr(8)
22350 .kr(1)
22351 .sr(1)
22352 .m(4)
22353 .n(n)
22354 .k(k)
22355 .cn_stride(11)
22356 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22357 }
22358 }
22359 }
22360
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_a)22361 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
22362 for (uint32_t n = 16; n <= 24; n += 8) {
22363 for (size_t k = 1; k <= 5; k += 2) {
22364 GemmMicrokernelTester()
22365 .mr(4)
22366 .nr(8)
22367 .kr(1)
22368 .sr(1)
22369 .m(4)
22370 .n(n)
22371 .k(k)
22372 .a_stride(7)
22373 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22374 }
22375 }
22376 }
22377
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)22378 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
22379 for (uint32_t n = 16; n <= 24; n += 8) {
22380 for (size_t k = 1; k <= 5; k += 2) {
22381 for (uint32_t m = 1; m <= 4; m++) {
22382 GemmMicrokernelTester()
22383 .mr(4)
22384 .nr(8)
22385 .kr(1)
22386 .sr(1)
22387 .m(m)
22388 .n(n)
22389 .k(k)
22390 .iterations(1)
22391 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22392 }
22393 }
22394 }
22395 }
22396
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)22397 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
22398 for (size_t k = 1; k <= 5; k += 2) {
22399 for (uint32_t n = 1; n <= 8; n++) {
22400 for (uint32_t m = 1; m <= 4; m++) {
22401 GemmMicrokernelTester()
22402 .mr(4)
22403 .nr(8)
22404 .kr(1)
22405 .sr(1)
22406 .m(m)
22407 .n(n)
22408 .k(k)
22409 .cm_stride(11)
22410 .iterations(1)
22411 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22412 }
22413 }
22414 }
22415 }
22416
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,qmin)22417 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
22418 GemmMicrokernelTester()
22419 .mr(4)
22420 .nr(8)
22421 .kr(1)
22422 .sr(1)
22423 .m(4)
22424 .n(8)
22425 .k(1)
22426 .qmin(128)
22427 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22428 }
22429
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,qmax)22430 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
22431 GemmMicrokernelTester()
22432 .mr(4)
22433 .nr(8)
22434 .kr(1)
22435 .sr(1)
22436 .m(4)
22437 .n(8)
22438 .k(1)
22439 .qmax(128)
22440 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22441 }
22442
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)22443 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
22444 GemmMicrokernelTester()
22445 .mr(4)
22446 .nr(8)
22447 .kr(1)
22448 .sr(1)
22449 .m(4)
22450 .n(8)
22451 .k(1)
22452 .cm_stride(11)
22453 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22454 }
22455 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22456
22457
22458 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)22459 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
22460 GemmMicrokernelTester()
22461 .mr(5)
22462 .nr(8)
22463 .kr(1)
22464 .sr(1)
22465 .m(5)
22466 .n(8)
22467 .k(1)
22468 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22469 }
22470
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,strided_cn)22471 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
22472 GemmMicrokernelTester()
22473 .mr(5)
22474 .nr(8)
22475 .kr(1)
22476 .sr(1)
22477 .m(5)
22478 .n(8)
22479 .k(1)
22480 .cn_stride(11)
22481 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22482 }
22483
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_strided_a)22484 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
22485 GemmMicrokernelTester()
22486 .mr(5)
22487 .nr(8)
22488 .kr(1)
22489 .sr(1)
22490 .m(5)
22491 .n(8)
22492 .k(1)
22493 .a_stride(3)
22494 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22495 }
22496
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)22497 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
22498 for (uint32_t n = 1; n <= 8; n++) {
22499 for (uint32_t m = 1; m <= 5; m++) {
22500 GemmMicrokernelTester()
22501 .mr(5)
22502 .nr(8)
22503 .kr(1)
22504 .sr(1)
22505 .m(m)
22506 .n(n)
22507 .k(1)
22508 .iterations(1)
22509 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22510 }
22511 }
22512 }
22513
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)22514 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
22515 for (uint32_t m = 1; m <= 5; m++) {
22516 GemmMicrokernelTester()
22517 .mr(5)
22518 .nr(8)
22519 .kr(1)
22520 .sr(1)
22521 .m(m)
22522 .n(8)
22523 .k(1)
22524 .iterations(1)
22525 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22526 }
22527 }
22528
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)22529 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
22530 for (uint32_t n = 1; n <= 8; n++) {
22531 GemmMicrokernelTester()
22532 .mr(5)
22533 .nr(8)
22534 .kr(1)
22535 .sr(1)
22536 .m(5)
22537 .n(n)
22538 .k(1)
22539 .iterations(1)
22540 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22541 }
22542 }
22543
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)22544 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
22545 for (size_t k = 2; k < 10; k++) {
22546 GemmMicrokernelTester()
22547 .mr(5)
22548 .nr(8)
22549 .kr(1)
22550 .sr(1)
22551 .m(5)
22552 .n(8)
22553 .k(k)
22554 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22555 }
22556 }
22557
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_strided_a)22558 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
22559 for (size_t k = 2; k < 10; k++) {
22560 GemmMicrokernelTester()
22561 .mr(5)
22562 .nr(8)
22563 .kr(1)
22564 .sr(1)
22565 .m(5)
22566 .n(8)
22567 .k(k)
22568 .a_stride(11)
22569 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22570 }
22571 }
22572
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)22573 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
22574 for (size_t k = 2; k < 10; k++) {
22575 for (uint32_t n = 1; n <= 8; n++) {
22576 for (uint32_t m = 1; m <= 5; m++) {
22577 GemmMicrokernelTester()
22578 .mr(5)
22579 .nr(8)
22580 .kr(1)
22581 .sr(1)
22582 .m(m)
22583 .n(n)
22584 .k(k)
22585 .iterations(1)
22586 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22587 }
22588 }
22589 }
22590 }
22591
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)22592 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
22593 for (uint32_t n = 9; n < 16; n++) {
22594 for (size_t k = 1; k <= 5; k += 2) {
22595 GemmMicrokernelTester()
22596 .mr(5)
22597 .nr(8)
22598 .kr(1)
22599 .sr(1)
22600 .m(5)
22601 .n(n)
22602 .k(k)
22603 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22604 }
22605 }
22606 }
22607
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)22608 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
22609 for (uint32_t n = 9; n < 16; n++) {
22610 for (size_t k = 1; k <= 5; k += 2) {
22611 GemmMicrokernelTester()
22612 .mr(5)
22613 .nr(8)
22614 .kr(1)
22615 .sr(1)
22616 .m(5)
22617 .n(n)
22618 .k(k)
22619 .cn_stride(11)
22620 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22621 }
22622 }
22623 }
22624
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_a)22625 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
22626 for (uint32_t n = 9; n < 16; n++) {
22627 for (size_t k = 1; k <= 5; k += 2) {
22628 GemmMicrokernelTester()
22629 .mr(5)
22630 .nr(8)
22631 .kr(1)
22632 .sr(1)
22633 .m(5)
22634 .n(n)
22635 .k(k)
22636 .a_stride(7)
22637 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22638 }
22639 }
22640 }
22641
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)22642 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
22643 for (uint32_t n = 9; n < 16; n++) {
22644 for (size_t k = 1; k <= 5; k += 2) {
22645 for (uint32_t m = 1; m <= 5; m++) {
22646 GemmMicrokernelTester()
22647 .mr(5)
22648 .nr(8)
22649 .kr(1)
22650 .sr(1)
22651 .m(m)
22652 .n(n)
22653 .k(k)
22654 .iterations(1)
22655 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22656 }
22657 }
22658 }
22659 }
22660
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8)22661 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
22662 for (uint32_t n = 16; n <= 24; n += 8) {
22663 for (size_t k = 1; k <= 5; k += 2) {
22664 GemmMicrokernelTester()
22665 .mr(5)
22666 .nr(8)
22667 .kr(1)
22668 .sr(1)
22669 .m(5)
22670 .n(n)
22671 .k(k)
22672 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22673 }
22674 }
22675 }
22676
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)22677 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
22678 for (uint32_t n = 16; n <= 24; n += 8) {
22679 for (size_t k = 1; k <= 5; k += 2) {
22680 GemmMicrokernelTester()
22681 .mr(5)
22682 .nr(8)
22683 .kr(1)
22684 .sr(1)
22685 .m(5)
22686 .n(n)
22687 .k(k)
22688 .cn_stride(11)
22689 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22690 }
22691 }
22692 }
22693
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_a)22694 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
22695 for (uint32_t n = 16; n <= 24; n += 8) {
22696 for (size_t k = 1; k <= 5; k += 2) {
22697 GemmMicrokernelTester()
22698 .mr(5)
22699 .nr(8)
22700 .kr(1)
22701 .sr(1)
22702 .m(5)
22703 .n(n)
22704 .k(k)
22705 .a_stride(7)
22706 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22707 }
22708 }
22709 }
22710
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)22711 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
22712 for (uint32_t n = 16; n <= 24; n += 8) {
22713 for (size_t k = 1; k <= 5; k += 2) {
22714 for (uint32_t m = 1; m <= 5; m++) {
22715 GemmMicrokernelTester()
22716 .mr(5)
22717 .nr(8)
22718 .kr(1)
22719 .sr(1)
22720 .m(m)
22721 .n(n)
22722 .k(k)
22723 .iterations(1)
22724 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22725 }
22726 }
22727 }
22728 }
22729
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)22730 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
22731 for (size_t k = 1; k <= 5; k += 2) {
22732 for (uint32_t n = 1; n <= 8; n++) {
22733 for (uint32_t m = 1; m <= 5; m++) {
22734 GemmMicrokernelTester()
22735 .mr(5)
22736 .nr(8)
22737 .kr(1)
22738 .sr(1)
22739 .m(m)
22740 .n(n)
22741 .k(k)
22742 .cm_stride(11)
22743 .iterations(1)
22744 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22745 }
22746 }
22747 }
22748 }
22749
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,qmin)22750 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmin) {
22751 GemmMicrokernelTester()
22752 .mr(5)
22753 .nr(8)
22754 .kr(1)
22755 .sr(1)
22756 .m(5)
22757 .n(8)
22758 .k(1)
22759 .qmin(128)
22760 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22761 }
22762
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,qmax)22763 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmax) {
22764 GemmMicrokernelTester()
22765 .mr(5)
22766 .nr(8)
22767 .kr(1)
22768 .sr(1)
22769 .m(5)
22770 .n(8)
22771 .k(1)
22772 .qmax(128)
22773 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22774 }
22775
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,strided_cm)22776 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
22777 GemmMicrokernelTester()
22778 .mr(5)
22779 .nr(8)
22780 .kr(1)
22781 .sr(1)
22782 .m(5)
22783 .n(8)
22784 .k(1)
22785 .cm_stride(11)
22786 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22787 }
22788 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22789
22790
22791 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)22792 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
22793 GemmMicrokernelTester()
22794 .mr(6)
22795 .nr(8)
22796 .kr(1)
22797 .sr(1)
22798 .m(6)
22799 .n(8)
22800 .k(1)
22801 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22802 }
22803
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)22804 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
22805 GemmMicrokernelTester()
22806 .mr(6)
22807 .nr(8)
22808 .kr(1)
22809 .sr(1)
22810 .m(6)
22811 .n(8)
22812 .k(1)
22813 .cn_stride(11)
22814 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22815 }
22816
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_strided_a)22817 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
22818 GemmMicrokernelTester()
22819 .mr(6)
22820 .nr(8)
22821 .kr(1)
22822 .sr(1)
22823 .m(6)
22824 .n(8)
22825 .k(1)
22826 .a_stride(3)
22827 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22828 }
22829
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)22830 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
22831 for (uint32_t n = 1; n <= 8; n++) {
22832 for (uint32_t m = 1; m <= 6; m++) {
22833 GemmMicrokernelTester()
22834 .mr(6)
22835 .nr(8)
22836 .kr(1)
22837 .sr(1)
22838 .m(m)
22839 .n(n)
22840 .k(1)
22841 .iterations(1)
22842 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22843 }
22844 }
22845 }
22846
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)22847 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
22848 for (uint32_t m = 1; m <= 6; m++) {
22849 GemmMicrokernelTester()
22850 .mr(6)
22851 .nr(8)
22852 .kr(1)
22853 .sr(1)
22854 .m(m)
22855 .n(8)
22856 .k(1)
22857 .iterations(1)
22858 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22859 }
22860 }
22861
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)22862 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
22863 for (uint32_t n = 1; n <= 8; n++) {
22864 GemmMicrokernelTester()
22865 .mr(6)
22866 .nr(8)
22867 .kr(1)
22868 .sr(1)
22869 .m(6)
22870 .n(n)
22871 .k(1)
22872 .iterations(1)
22873 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22874 }
22875 }
22876
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)22877 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
22878 for (size_t k = 2; k < 10; k++) {
22879 GemmMicrokernelTester()
22880 .mr(6)
22881 .nr(8)
22882 .kr(1)
22883 .sr(1)
22884 .m(6)
22885 .n(8)
22886 .k(k)
22887 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22888 }
22889 }
22890
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_strided_a)22891 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
22892 for (size_t k = 2; k < 10; k++) {
22893 GemmMicrokernelTester()
22894 .mr(6)
22895 .nr(8)
22896 .kr(1)
22897 .sr(1)
22898 .m(6)
22899 .n(8)
22900 .k(k)
22901 .a_stride(11)
22902 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22903 }
22904 }
22905
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)22906 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
22907 for (size_t k = 2; k < 10; k++) {
22908 for (uint32_t n = 1; n <= 8; n++) {
22909 for (uint32_t m = 1; m <= 6; m++) {
22910 GemmMicrokernelTester()
22911 .mr(6)
22912 .nr(8)
22913 .kr(1)
22914 .sr(1)
22915 .m(m)
22916 .n(n)
22917 .k(k)
22918 .iterations(1)
22919 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22920 }
22921 }
22922 }
22923 }
22924
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)22925 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
22926 for (uint32_t n = 9; n < 16; n++) {
22927 for (size_t k = 1; k <= 5; k += 2) {
22928 GemmMicrokernelTester()
22929 .mr(6)
22930 .nr(8)
22931 .kr(1)
22932 .sr(1)
22933 .m(6)
22934 .n(n)
22935 .k(k)
22936 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22937 }
22938 }
22939 }
22940
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)22941 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
22942 for (uint32_t n = 9; n < 16; n++) {
22943 for (size_t k = 1; k <= 5; k += 2) {
22944 GemmMicrokernelTester()
22945 .mr(6)
22946 .nr(8)
22947 .kr(1)
22948 .sr(1)
22949 .m(6)
22950 .n(n)
22951 .k(k)
22952 .cn_stride(11)
22953 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22954 }
22955 }
22956 }
22957
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_a)22958 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
22959 for (uint32_t n = 9; n < 16; n++) {
22960 for (size_t k = 1; k <= 5; k += 2) {
22961 GemmMicrokernelTester()
22962 .mr(6)
22963 .nr(8)
22964 .kr(1)
22965 .sr(1)
22966 .m(6)
22967 .n(n)
22968 .k(k)
22969 .a_stride(7)
22970 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22971 }
22972 }
22973 }
22974
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)22975 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
22976 for (uint32_t n = 9; n < 16; n++) {
22977 for (size_t k = 1; k <= 5; k += 2) {
22978 for (uint32_t m = 1; m <= 6; m++) {
22979 GemmMicrokernelTester()
22980 .mr(6)
22981 .nr(8)
22982 .kr(1)
22983 .sr(1)
22984 .m(m)
22985 .n(n)
22986 .k(k)
22987 .iterations(1)
22988 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22989 }
22990 }
22991 }
22992 }
22993
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)22994 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
22995 for (uint32_t n = 16; n <= 24; n += 8) {
22996 for (size_t k = 1; k <= 5; k += 2) {
22997 GemmMicrokernelTester()
22998 .mr(6)
22999 .nr(8)
23000 .kr(1)
23001 .sr(1)
23002 .m(6)
23003 .n(n)
23004 .k(k)
23005 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23006 }
23007 }
23008 }
23009
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)23010 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
23011 for (uint32_t n = 16; n <= 24; n += 8) {
23012 for (size_t k = 1; k <= 5; k += 2) {
23013 GemmMicrokernelTester()
23014 .mr(6)
23015 .nr(8)
23016 .kr(1)
23017 .sr(1)
23018 .m(6)
23019 .n(n)
23020 .k(k)
23021 .cn_stride(11)
23022 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23023 }
23024 }
23025 }
23026
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_a)23027 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
23028 for (uint32_t n = 16; n <= 24; n += 8) {
23029 for (size_t k = 1; k <= 5; k += 2) {
23030 GemmMicrokernelTester()
23031 .mr(6)
23032 .nr(8)
23033 .kr(1)
23034 .sr(1)
23035 .m(6)
23036 .n(n)
23037 .k(k)
23038 .a_stride(7)
23039 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23040 }
23041 }
23042 }
23043
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)23044 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
23045 for (uint32_t n = 16; n <= 24; n += 8) {
23046 for (size_t k = 1; k <= 5; k += 2) {
23047 for (uint32_t m = 1; m <= 6; m++) {
23048 GemmMicrokernelTester()
23049 .mr(6)
23050 .nr(8)
23051 .kr(1)
23052 .sr(1)
23053 .m(m)
23054 .n(n)
23055 .k(k)
23056 .iterations(1)
23057 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23058 }
23059 }
23060 }
23061 }
23062
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)23063 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
23064 for (size_t k = 1; k <= 5; k += 2) {
23065 for (uint32_t n = 1; n <= 8; n++) {
23066 for (uint32_t m = 1; m <= 6; m++) {
23067 GemmMicrokernelTester()
23068 .mr(6)
23069 .nr(8)
23070 .kr(1)
23071 .sr(1)
23072 .m(m)
23073 .n(n)
23074 .k(k)
23075 .cm_stride(11)
23076 .iterations(1)
23077 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23078 }
23079 }
23080 }
23081 }
23082
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,qmin)23083 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
23084 GemmMicrokernelTester()
23085 .mr(6)
23086 .nr(8)
23087 .kr(1)
23088 .sr(1)
23089 .m(6)
23090 .n(8)
23091 .k(1)
23092 .qmin(128)
23093 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23094 }
23095
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,qmax)23096 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
23097 GemmMicrokernelTester()
23098 .mr(6)
23099 .nr(8)
23100 .kr(1)
23101 .sr(1)
23102 .m(6)
23103 .n(8)
23104 .k(1)
23105 .qmax(128)
23106 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23107 }
23108
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)23109 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
23110 GemmMicrokernelTester()
23111 .mr(6)
23112 .nr(8)
23113 .kr(1)
23114 .sr(1)
23115 .m(6)
23116 .n(8)
23117 .k(1)
23118 .cm_stride(11)
23119 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23120 }
23121 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23122
23123
23124 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4)23125 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
23126 GemmMicrokernelTester()
23127 .mr(6)
23128 .nr(8)
23129 .kr(1)
23130 .sr(1)
23131 .m(6)
23132 .n(8)
23133 .k(4)
23134 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23135 }
23136
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,strided_cn)23137 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cn) {
23138 GemmMicrokernelTester()
23139 .mr(6)
23140 .nr(8)
23141 .kr(1)
23142 .sr(1)
23143 .m(6)
23144 .n(8)
23145 .k(4)
23146 .cn_stride(11)
23147 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23148 }
23149
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4_strided_a)23150 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
23151 GemmMicrokernelTester()
23152 .mr(6)
23153 .nr(8)
23154 .kr(1)
23155 .sr(1)
23156 .m(6)
23157 .n(8)
23158 .k(4)
23159 .a_stride(7)
23160 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23161 }
23162
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)23163 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
23164 for (uint32_t n = 1; n <= 8; n++) {
23165 for (uint32_t m = 1; m <= 6; m++) {
23166 GemmMicrokernelTester()
23167 .mr(6)
23168 .nr(8)
23169 .kr(1)
23170 .sr(1)
23171 .m(m)
23172 .n(n)
23173 .k(4)
23174 .iterations(1)
23175 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23176 }
23177 }
23178 }
23179
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)23180 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
23181 for (uint32_t m = 1; m <= 6; m++) {
23182 GemmMicrokernelTester()
23183 .mr(6)
23184 .nr(8)
23185 .kr(1)
23186 .sr(1)
23187 .m(m)
23188 .n(8)
23189 .k(4)
23190 .iterations(1)
23191 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23192 }
23193 }
23194
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)23195 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
23196 for (uint32_t n = 1; n <= 8; n++) {
23197 GemmMicrokernelTester()
23198 .mr(6)
23199 .nr(8)
23200 .kr(1)
23201 .sr(1)
23202 .m(6)
23203 .n(n)
23204 .k(4)
23205 .iterations(1)
23206 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23207 }
23208 }
23209
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_lt_4)23210 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
23211 for (size_t k = 1; k < 4; k++) {
23212 GemmMicrokernelTester()
23213 .mr(6)
23214 .nr(8)
23215 .kr(1)
23216 .sr(1)
23217 .m(6)
23218 .n(8)
23219 .k(k)
23220 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23221 }
23222 }
23223
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_lt_4_strided_a)23224 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
23225 for (size_t k = 1; k < 4; k++) {
23226 GemmMicrokernelTester()
23227 .mr(6)
23228 .nr(8)
23229 .kr(1)
23230 .sr(1)
23231 .m(6)
23232 .n(8)
23233 .k(k)
23234 .a_stride(7)
23235 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23236 }
23237 }
23238
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)23239 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
23240 for (size_t k = 1; k < 4; k++) {
23241 for (uint32_t n = 1; n <= 8; n++) {
23242 for (uint32_t m = 1; m <= 6; m++) {
23243 GemmMicrokernelTester()
23244 .mr(6)
23245 .nr(8)
23246 .kr(1)
23247 .sr(1)
23248 .m(m)
23249 .n(n)
23250 .k(k)
23251 .iterations(1)
23252 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23253 }
23254 }
23255 }
23256 }
23257
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_gt_4)23258 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
23259 for (size_t k = 5; k < 8; k++) {
23260 GemmMicrokernelTester()
23261 .mr(6)
23262 .nr(8)
23263 .kr(1)
23264 .sr(1)
23265 .m(6)
23266 .n(8)
23267 .k(k)
23268 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23269 }
23270 }
23271
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_gt_4_strided_a)23272 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
23273 for (size_t k = 5; k < 8; k++) {
23274 GemmMicrokernelTester()
23275 .mr(6)
23276 .nr(8)
23277 .kr(1)
23278 .sr(1)
23279 .m(6)
23280 .n(8)
23281 .k(k)
23282 .a_stride(11)
23283 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23284 }
23285 }
23286
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)23287 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
23288 for (size_t k = 5; k < 8; k++) {
23289 for (uint32_t n = 1; n <= 8; n++) {
23290 for (uint32_t m = 1; m <= 6; m++) {
23291 GemmMicrokernelTester()
23292 .mr(6)
23293 .nr(8)
23294 .kr(1)
23295 .sr(1)
23296 .m(m)
23297 .n(n)
23298 .k(k)
23299 .iterations(1)
23300 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23301 }
23302 }
23303 }
23304 }
23305
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_div_4)23306 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4) {
23307 for (size_t k = 8; k <= 40; k += 4) {
23308 GemmMicrokernelTester()
23309 .mr(6)
23310 .nr(8)
23311 .kr(1)
23312 .sr(1)
23313 .m(6)
23314 .n(8)
23315 .k(k)
23316 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23317 }
23318 }
23319
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_div_4_strided_a)23320 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
23321 for (size_t k = 8; k <= 40; k += 4) {
23322 GemmMicrokernelTester()
23323 .mr(6)
23324 .nr(8)
23325 .kr(1)
23326 .sr(1)
23327 .m(6)
23328 .n(8)
23329 .k(k)
23330 .a_stride(43)
23331 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23332 }
23333 }
23334
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)23335 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
23336 for (size_t k = 8; k <= 40; k += 4) {
23337 for (uint32_t n = 1; n <= 8; n++) {
23338 for (uint32_t m = 1; m <= 6; m++) {
23339 GemmMicrokernelTester()
23340 .mr(6)
23341 .nr(8)
23342 .kr(1)
23343 .sr(1)
23344 .m(m)
23345 .n(n)
23346 .k(k)
23347 .iterations(1)
23348 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23349 }
23350 }
23351 }
23352 }
23353
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8)23354 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
23355 for (uint32_t n = 9; n < 16; n++) {
23356 for (size_t k = 1; k <= 20; k += 5) {
23357 GemmMicrokernelTester()
23358 .mr(6)
23359 .nr(8)
23360 .kr(1)
23361 .sr(1)
23362 .m(6)
23363 .n(n)
23364 .k(k)
23365 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23366 }
23367 }
23368 }
23369
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)23370 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
23371 for (uint32_t n = 9; n < 16; n++) {
23372 for (size_t k = 1; k <= 20; k += 5) {
23373 GemmMicrokernelTester()
23374 .mr(6)
23375 .nr(8)
23376 .kr(1)
23377 .sr(1)
23378 .m(6)
23379 .n(n)
23380 .k(k)
23381 .cn_stride(11)
23382 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23383 }
23384 }
23385 }
23386
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_a)23387 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
23388 for (uint32_t n = 9; n < 16; n++) {
23389 for (size_t k = 1; k <= 20; k += 5) {
23390 GemmMicrokernelTester()
23391 .mr(6)
23392 .nr(8)
23393 .kr(1)
23394 .sr(1)
23395 .m(6)
23396 .n(n)
23397 .k(k)
23398 .a_stride(23)
23399 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23400 }
23401 }
23402 }
23403
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)23404 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
23405 for (uint32_t n = 9; n < 16; n++) {
23406 for (size_t k = 1; k <= 20; k += 5) {
23407 for (uint32_t m = 1; m <= 6; m++) {
23408 GemmMicrokernelTester()
23409 .mr(6)
23410 .nr(8)
23411 .kr(1)
23412 .sr(1)
23413 .m(m)
23414 .n(n)
23415 .k(k)
23416 .iterations(1)
23417 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23418 }
23419 }
23420 }
23421 }
23422
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8)23423 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8) {
23424 for (uint32_t n = 16; n <= 24; n += 8) {
23425 for (size_t k = 1; k <= 20; k += 5) {
23426 GemmMicrokernelTester()
23427 .mr(6)
23428 .nr(8)
23429 .kr(1)
23430 .sr(1)
23431 .m(6)
23432 .n(n)
23433 .k(k)
23434 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23435 }
23436 }
23437 }
23438
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)23439 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
23440 for (uint32_t n = 16; n <= 24; n += 8) {
23441 for (size_t k = 1; k <= 20; k += 5) {
23442 GemmMicrokernelTester()
23443 .mr(6)
23444 .nr(8)
23445 .kr(1)
23446 .sr(1)
23447 .m(6)
23448 .n(n)
23449 .k(k)
23450 .cn_stride(11)
23451 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23452 }
23453 }
23454 }
23455
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_a)23456 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
23457 for (uint32_t n = 16; n <= 24; n += 8) {
23458 for (size_t k = 1; k <= 20; k += 5) {
23459 GemmMicrokernelTester()
23460 .mr(6)
23461 .nr(8)
23462 .kr(1)
23463 .sr(1)
23464 .m(6)
23465 .n(n)
23466 .k(k)
23467 .a_stride(23)
23468 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23469 }
23470 }
23471 }
23472
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)23473 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
23474 for (uint32_t n = 16; n <= 24; n += 8) {
23475 for (size_t k = 1; k <= 20; k += 5) {
23476 for (uint32_t m = 1; m <= 6; m++) {
23477 GemmMicrokernelTester()
23478 .mr(6)
23479 .nr(8)
23480 .kr(1)
23481 .sr(1)
23482 .m(m)
23483 .n(n)
23484 .k(k)
23485 .iterations(1)
23486 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23487 }
23488 }
23489 }
23490 }
23491
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)23492 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
23493 for (size_t k = 1; k <= 20; k += 5) {
23494 for (uint32_t n = 1; n <= 8; n++) {
23495 for (uint32_t m = 1; m <= 6; m++) {
23496 GemmMicrokernelTester()
23497 .mr(6)
23498 .nr(8)
23499 .kr(1)
23500 .sr(1)
23501 .m(m)
23502 .n(n)
23503 .k(k)
23504 .cm_stride(11)
23505 .iterations(1)
23506 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23507 }
23508 }
23509 }
23510 }
23511
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,qmin)23512 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmin) {
23513 GemmMicrokernelTester()
23514 .mr(6)
23515 .nr(8)
23516 .kr(1)
23517 .sr(1)
23518 .m(6)
23519 .n(8)
23520 .k(4)
23521 .qmin(128)
23522 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23523 }
23524
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,qmax)23525 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmax) {
23526 GemmMicrokernelTester()
23527 .mr(6)
23528 .nr(8)
23529 .kr(1)
23530 .sr(1)
23531 .m(6)
23532 .n(8)
23533 .k(4)
23534 .qmax(128)
23535 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23536 }
23537
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT,strided_cm)23538 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm) {
23539 GemmMicrokernelTester()
23540 .mr(6)
23541 .nr(8)
23542 .kr(1)
23543 .sr(1)
23544 .m(6)
23545 .n(8)
23546 .k(4)
23547 .cm_stride(11)
23548 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23549 }
23550 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23551
23552
23553 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4)23554 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4) {
23555 GemmMicrokernelTester()
23556 .mr(6)
23557 .nr(8)
23558 .kr(1)
23559 .sr(1)
23560 .m(6)
23561 .n(8)
23562 .k(4)
23563 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23564 }
23565
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,strided_cn)23566 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cn) {
23567 GemmMicrokernelTester()
23568 .mr(6)
23569 .nr(8)
23570 .kr(1)
23571 .sr(1)
23572 .m(6)
23573 .n(8)
23574 .k(4)
23575 .cn_stride(11)
23576 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23577 }
23578
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4_strided_a)23579 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
23580 GemmMicrokernelTester()
23581 .mr(6)
23582 .nr(8)
23583 .kr(1)
23584 .sr(1)
23585 .m(6)
23586 .n(8)
23587 .k(4)
23588 .a_stride(7)
23589 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23590 }
23591
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)23592 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
23593 for (uint32_t n = 1; n <= 8; n++) {
23594 for (uint32_t m = 1; m <= 6; m++) {
23595 GemmMicrokernelTester()
23596 .mr(6)
23597 .nr(8)
23598 .kr(1)
23599 .sr(1)
23600 .m(m)
23601 .n(n)
23602 .k(4)
23603 .iterations(1)
23604 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23605 }
23606 }
23607 }
23608
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)23609 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
23610 for (uint32_t m = 1; m <= 6; m++) {
23611 GemmMicrokernelTester()
23612 .mr(6)
23613 .nr(8)
23614 .kr(1)
23615 .sr(1)
23616 .m(m)
23617 .n(8)
23618 .k(4)
23619 .iterations(1)
23620 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23621 }
23622 }
23623
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)23624 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
23625 for (uint32_t n = 1; n <= 8; n++) {
23626 GemmMicrokernelTester()
23627 .mr(6)
23628 .nr(8)
23629 .kr(1)
23630 .sr(1)
23631 .m(6)
23632 .n(n)
23633 .k(4)
23634 .iterations(1)
23635 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23636 }
23637 }
23638
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_lt_4)23639 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4) {
23640 for (size_t k = 1; k < 4; k++) {
23641 GemmMicrokernelTester()
23642 .mr(6)
23643 .nr(8)
23644 .kr(1)
23645 .sr(1)
23646 .m(6)
23647 .n(8)
23648 .k(k)
23649 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23650 }
23651 }
23652
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_lt_4_strided_a)23653 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
23654 for (size_t k = 1; k < 4; k++) {
23655 GemmMicrokernelTester()
23656 .mr(6)
23657 .nr(8)
23658 .kr(1)
23659 .sr(1)
23660 .m(6)
23661 .n(8)
23662 .k(k)
23663 .a_stride(7)
23664 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23665 }
23666 }
23667
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)23668 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
23669 for (size_t k = 1; k < 4; k++) {
23670 for (uint32_t n = 1; n <= 8; n++) {
23671 for (uint32_t m = 1; m <= 6; m++) {
23672 GemmMicrokernelTester()
23673 .mr(6)
23674 .nr(8)
23675 .kr(1)
23676 .sr(1)
23677 .m(m)
23678 .n(n)
23679 .k(k)
23680 .iterations(1)
23681 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23682 }
23683 }
23684 }
23685 }
23686
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_gt_4)23687 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4) {
23688 for (size_t k = 5; k < 8; k++) {
23689 GemmMicrokernelTester()
23690 .mr(6)
23691 .nr(8)
23692 .kr(1)
23693 .sr(1)
23694 .m(6)
23695 .n(8)
23696 .k(k)
23697 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23698 }
23699 }
23700
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_gt_4_strided_a)23701 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
23702 for (size_t k = 5; k < 8; k++) {
23703 GemmMicrokernelTester()
23704 .mr(6)
23705 .nr(8)
23706 .kr(1)
23707 .sr(1)
23708 .m(6)
23709 .n(8)
23710 .k(k)
23711 .a_stride(11)
23712 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23713 }
23714 }
23715
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)23716 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
23717 for (size_t k = 5; k < 8; k++) {
23718 for (uint32_t n = 1; n <= 8; n++) {
23719 for (uint32_t m = 1; m <= 6; m++) {
23720 GemmMicrokernelTester()
23721 .mr(6)
23722 .nr(8)
23723 .kr(1)
23724 .sr(1)
23725 .m(m)
23726 .n(n)
23727 .k(k)
23728 .iterations(1)
23729 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23730 }
23731 }
23732 }
23733 }
23734
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_div_4)23735 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4) {
23736 for (size_t k = 8; k <= 40; k += 4) {
23737 GemmMicrokernelTester()
23738 .mr(6)
23739 .nr(8)
23740 .kr(1)
23741 .sr(1)
23742 .m(6)
23743 .n(8)
23744 .k(k)
23745 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23746 }
23747 }
23748
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_div_4_strided_a)23749 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
23750 for (size_t k = 8; k <= 40; k += 4) {
23751 GemmMicrokernelTester()
23752 .mr(6)
23753 .nr(8)
23754 .kr(1)
23755 .sr(1)
23756 .m(6)
23757 .n(8)
23758 .k(k)
23759 .a_stride(43)
23760 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23761 }
23762 }
23763
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)23764 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
23765 for (size_t k = 8; k <= 40; k += 4) {
23766 for (uint32_t n = 1; n <= 8; n++) {
23767 for (uint32_t m = 1; m <= 6; m++) {
23768 GemmMicrokernelTester()
23769 .mr(6)
23770 .nr(8)
23771 .kr(1)
23772 .sr(1)
23773 .m(m)
23774 .n(n)
23775 .k(k)
23776 .iterations(1)
23777 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23778 }
23779 }
23780 }
23781 }
23782
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8)23783 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8) {
23784 for (uint32_t n = 9; n < 16; n++) {
23785 for (size_t k = 1; k <= 20; k += 5) {
23786 GemmMicrokernelTester()
23787 .mr(6)
23788 .nr(8)
23789 .kr(1)
23790 .sr(1)
23791 .m(6)
23792 .n(n)
23793 .k(k)
23794 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23795 }
23796 }
23797 }
23798
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)23799 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
23800 for (uint32_t n = 9; n < 16; n++) {
23801 for (size_t k = 1; k <= 20; k += 5) {
23802 GemmMicrokernelTester()
23803 .mr(6)
23804 .nr(8)
23805 .kr(1)
23806 .sr(1)
23807 .m(6)
23808 .n(n)
23809 .k(k)
23810 .cn_stride(11)
23811 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23812 }
23813 }
23814 }
23815
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_a)23816 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
23817 for (uint32_t n = 9; n < 16; n++) {
23818 for (size_t k = 1; k <= 20; k += 5) {
23819 GemmMicrokernelTester()
23820 .mr(6)
23821 .nr(8)
23822 .kr(1)
23823 .sr(1)
23824 .m(6)
23825 .n(n)
23826 .k(k)
23827 .a_stride(23)
23828 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23829 }
23830 }
23831 }
23832
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)23833 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
23834 for (uint32_t n = 9; n < 16; n++) {
23835 for (size_t k = 1; k <= 20; k += 5) {
23836 for (uint32_t m = 1; m <= 6; m++) {
23837 GemmMicrokernelTester()
23838 .mr(6)
23839 .nr(8)
23840 .kr(1)
23841 .sr(1)
23842 .m(m)
23843 .n(n)
23844 .k(k)
23845 .iterations(1)
23846 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23847 }
23848 }
23849 }
23850 }
23851
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8)23852 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8) {
23853 for (uint32_t n = 16; n <= 24; n += 8) {
23854 for (size_t k = 1; k <= 20; k += 5) {
23855 GemmMicrokernelTester()
23856 .mr(6)
23857 .nr(8)
23858 .kr(1)
23859 .sr(1)
23860 .m(6)
23861 .n(n)
23862 .k(k)
23863 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23864 }
23865 }
23866 }
23867
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)23868 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
23869 for (uint32_t n = 16; n <= 24; n += 8) {
23870 for (size_t k = 1; k <= 20; k += 5) {
23871 GemmMicrokernelTester()
23872 .mr(6)
23873 .nr(8)
23874 .kr(1)
23875 .sr(1)
23876 .m(6)
23877 .n(n)
23878 .k(k)
23879 .cn_stride(11)
23880 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23881 }
23882 }
23883 }
23884
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8_strided_a)23885 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
23886 for (uint32_t n = 16; n <= 24; n += 8) {
23887 for (size_t k = 1; k <= 20; k += 5) {
23888 GemmMicrokernelTester()
23889 .mr(6)
23890 .nr(8)
23891 .kr(1)
23892 .sr(1)
23893 .m(6)
23894 .n(n)
23895 .k(k)
23896 .a_stride(23)
23897 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23898 }
23899 }
23900 }
23901
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)23902 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
23903 for (uint32_t n = 16; n <= 24; n += 8) {
23904 for (size_t k = 1; k <= 20; k += 5) {
23905 for (uint32_t m = 1; m <= 6; m++) {
23906 GemmMicrokernelTester()
23907 .mr(6)
23908 .nr(8)
23909 .kr(1)
23910 .sr(1)
23911 .m(m)
23912 .n(n)
23913 .k(k)
23914 .iterations(1)
23915 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23916 }
23917 }
23918 }
23919 }
23920
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)23921 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
23922 for (size_t k = 1; k <= 20; k += 5) {
23923 for (uint32_t n = 1; n <= 8; n++) {
23924 for (uint32_t m = 1; m <= 6; m++) {
23925 GemmMicrokernelTester()
23926 .mr(6)
23927 .nr(8)
23928 .kr(1)
23929 .sr(1)
23930 .m(m)
23931 .n(n)
23932 .k(k)
23933 .cm_stride(11)
23934 .iterations(1)
23935 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23936 }
23937 }
23938 }
23939 }
23940
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,qmin)23941 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmin) {
23942 GemmMicrokernelTester()
23943 .mr(6)
23944 .nr(8)
23945 .kr(1)
23946 .sr(1)
23947 .m(6)
23948 .n(8)
23949 .k(4)
23950 .qmin(128)
23951 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23952 }
23953
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,qmax)23954 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmax) {
23955 GemmMicrokernelTester()
23956 .mr(6)
23957 .nr(8)
23958 .kr(1)
23959 .sr(1)
23960 .m(6)
23961 .n(8)
23962 .k(4)
23963 .qmax(128)
23964 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23965 }
23966
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT,strided_cm)23967 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm) {
23968 GemmMicrokernelTester()
23969 .mr(6)
23970 .nr(8)
23971 .kr(1)
23972 .sr(1)
23973 .m(6)
23974 .n(8)
23975 .k(4)
23976 .cm_stride(11)
23977 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23978 }
23979 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23980
23981
23982 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4)23983 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4) {
23984 GemmMicrokernelTester()
23985 .mr(6)
23986 .nr(8)
23987 .kr(1)
23988 .sr(4)
23989 .m(6)
23990 .n(8)
23991 .k(4)
23992 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23993 }
23994
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,strided_cn)23995 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, strided_cn) {
23996 GemmMicrokernelTester()
23997 .mr(6)
23998 .nr(8)
23999 .kr(1)
24000 .sr(4)
24001 .m(6)
24002 .n(8)
24003 .k(4)
24004 .cn_stride(11)
24005 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24006 }
24007
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4_strided_a)24008 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
24009 GemmMicrokernelTester()
24010 .mr(6)
24011 .nr(8)
24012 .kr(1)
24013 .sr(4)
24014 .m(6)
24015 .n(8)
24016 .k(4)
24017 .a_stride(7)
24018 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24019 }
24020
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4_subtile)24021 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
24022 for (uint32_t n = 1; n <= 8; n++) {
24023 for (uint32_t m = 1; m <= 6; m++) {
24024 GemmMicrokernelTester()
24025 .mr(6)
24026 .nr(8)
24027 .kr(1)
24028 .sr(4)
24029 .m(m)
24030 .n(n)
24031 .k(4)
24032 .iterations(1)
24033 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24034 }
24035 }
24036 }
24037
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)24038 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
24039 for (uint32_t m = 1; m <= 6; m++) {
24040 GemmMicrokernelTester()
24041 .mr(6)
24042 .nr(8)
24043 .kr(1)
24044 .sr(4)
24045 .m(m)
24046 .n(8)
24047 .k(4)
24048 .iterations(1)
24049 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24050 }
24051 }
24052
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)24053 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
24054 for (uint32_t n = 1; n <= 8; n++) {
24055 GemmMicrokernelTester()
24056 .mr(6)
24057 .nr(8)
24058 .kr(1)
24059 .sr(4)
24060 .m(6)
24061 .n(n)
24062 .k(4)
24063 .iterations(1)
24064 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24065 }
24066 }
24067
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_lt_4)24068 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4) {
24069 for (size_t k = 1; k < 4; k++) {
24070 GemmMicrokernelTester()
24071 .mr(6)
24072 .nr(8)
24073 .kr(1)
24074 .sr(4)
24075 .m(6)
24076 .n(8)
24077 .k(k)
24078 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24079 }
24080 }
24081
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_lt_4_strided_a)24082 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
24083 for (size_t k = 1; k < 4; k++) {
24084 GemmMicrokernelTester()
24085 .mr(6)
24086 .nr(8)
24087 .kr(1)
24088 .sr(4)
24089 .m(6)
24090 .n(8)
24091 .k(k)
24092 .a_stride(7)
24093 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24094 }
24095 }
24096
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_lt_4_subtile)24097 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
24098 for (size_t k = 1; k < 4; k++) {
24099 for (uint32_t n = 1; n <= 8; n++) {
24100 for (uint32_t m = 1; m <= 6; m++) {
24101 GemmMicrokernelTester()
24102 .mr(6)
24103 .nr(8)
24104 .kr(1)
24105 .sr(4)
24106 .m(m)
24107 .n(n)
24108 .k(k)
24109 .iterations(1)
24110 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24111 }
24112 }
24113 }
24114 }
24115
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_gt_4)24116 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4) {
24117 for (size_t k = 5; k < 8; k++) {
24118 GemmMicrokernelTester()
24119 .mr(6)
24120 .nr(8)
24121 .kr(1)
24122 .sr(4)
24123 .m(6)
24124 .n(8)
24125 .k(k)
24126 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24127 }
24128 }
24129
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_gt_4_strided_a)24130 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
24131 for (size_t k = 5; k < 8; k++) {
24132 GemmMicrokernelTester()
24133 .mr(6)
24134 .nr(8)
24135 .kr(1)
24136 .sr(4)
24137 .m(6)
24138 .n(8)
24139 .k(k)
24140 .a_stride(11)
24141 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24142 }
24143 }
24144
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_gt_4_subtile)24145 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
24146 for (size_t k = 5; k < 8; k++) {
24147 for (uint32_t n = 1; n <= 8; n++) {
24148 for (uint32_t m = 1; m <= 6; m++) {
24149 GemmMicrokernelTester()
24150 .mr(6)
24151 .nr(8)
24152 .kr(1)
24153 .sr(4)
24154 .m(m)
24155 .n(n)
24156 .k(k)
24157 .iterations(1)
24158 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24159 }
24160 }
24161 }
24162 }
24163
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_div_4)24164 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4) {
24165 for (size_t k = 8; k <= 40; k += 4) {
24166 GemmMicrokernelTester()
24167 .mr(6)
24168 .nr(8)
24169 .kr(1)
24170 .sr(4)
24171 .m(6)
24172 .n(8)
24173 .k(k)
24174 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24175 }
24176 }
24177
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_div_4_strided_a)24178 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
24179 for (size_t k = 8; k <= 40; k += 4) {
24180 GemmMicrokernelTester()
24181 .mr(6)
24182 .nr(8)
24183 .kr(1)
24184 .sr(4)
24185 .m(6)
24186 .n(8)
24187 .k(k)
24188 .a_stride(43)
24189 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24190 }
24191 }
24192
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,k_div_4_subtile)24193 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_subtile) {
24194 for (size_t k = 8; k <= 40; k += 4) {
24195 for (uint32_t n = 1; n <= 8; n++) {
24196 for (uint32_t m = 1; m <= 6; m++) {
24197 GemmMicrokernelTester()
24198 .mr(6)
24199 .nr(8)
24200 .kr(1)
24201 .sr(4)
24202 .m(m)
24203 .n(n)
24204 .k(k)
24205 .iterations(1)
24206 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24207 }
24208 }
24209 }
24210 }
24211
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8)24212 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8) {
24213 for (uint32_t n = 9; n < 16; n++) {
24214 for (size_t k = 1; k <= 20; k += 5) {
24215 GemmMicrokernelTester()
24216 .mr(6)
24217 .nr(8)
24218 .kr(1)
24219 .sr(4)
24220 .m(6)
24221 .n(n)
24222 .k(k)
24223 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24224 }
24225 }
24226 }
24227
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)24228 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
24229 for (uint32_t n = 9; n < 16; n++) {
24230 for (size_t k = 1; k <= 20; k += 5) {
24231 GemmMicrokernelTester()
24232 .mr(6)
24233 .nr(8)
24234 .kr(1)
24235 .sr(4)
24236 .m(6)
24237 .n(n)
24238 .k(k)
24239 .cn_stride(11)
24240 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24241 }
24242 }
24243 }
24244
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8_strided_a)24245 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
24246 for (uint32_t n = 9; n < 16; n++) {
24247 for (size_t k = 1; k <= 20; k += 5) {
24248 GemmMicrokernelTester()
24249 .mr(6)
24250 .nr(8)
24251 .kr(1)
24252 .sr(4)
24253 .m(6)
24254 .n(n)
24255 .k(k)
24256 .a_stride(23)
24257 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24258 }
24259 }
24260 }
24261
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_gt_8_subtile)24262 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
24263 for (uint32_t n = 9; n < 16; n++) {
24264 for (size_t k = 1; k <= 20; k += 5) {
24265 for (uint32_t m = 1; m <= 6; m++) {
24266 GemmMicrokernelTester()
24267 .mr(6)
24268 .nr(8)
24269 .kr(1)
24270 .sr(4)
24271 .m(m)
24272 .n(n)
24273 .k(k)
24274 .iterations(1)
24275 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24276 }
24277 }
24278 }
24279 }
24280
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8)24281 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8) {
24282 for (uint32_t n = 16; n <= 24; n += 8) {
24283 for (size_t k = 1; k <= 20; k += 5) {
24284 GemmMicrokernelTester()
24285 .mr(6)
24286 .nr(8)
24287 .kr(1)
24288 .sr(4)
24289 .m(6)
24290 .n(n)
24291 .k(k)
24292 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24293 }
24294 }
24295 }
24296
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8_strided_cn)24297 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
24298 for (uint32_t n = 16; n <= 24; n += 8) {
24299 for (size_t k = 1; k <= 20; k += 5) {
24300 GemmMicrokernelTester()
24301 .mr(6)
24302 .nr(8)
24303 .kr(1)
24304 .sr(4)
24305 .m(6)
24306 .n(n)
24307 .k(k)
24308 .cn_stride(11)
24309 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24310 }
24311 }
24312 }
24313
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8_strided_a)24314 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
24315 for (uint32_t n = 16; n <= 24; n += 8) {
24316 for (size_t k = 1; k <= 20; k += 5) {
24317 GemmMicrokernelTester()
24318 .mr(6)
24319 .nr(8)
24320 .kr(1)
24321 .sr(4)
24322 .m(6)
24323 .n(n)
24324 .k(k)
24325 .a_stride(23)
24326 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24327 }
24328 }
24329 }
24330
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,n_div_8_subtile)24331 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_subtile) {
24332 for (uint32_t n = 16; n <= 24; n += 8) {
24333 for (size_t k = 1; k <= 20; k += 5) {
24334 for (uint32_t m = 1; m <= 6; m++) {
24335 GemmMicrokernelTester()
24336 .mr(6)
24337 .nr(8)
24338 .kr(1)
24339 .sr(4)
24340 .m(m)
24341 .n(n)
24342 .k(k)
24343 .iterations(1)
24344 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24345 }
24346 }
24347 }
24348 }
24349
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,strided_cm_subtile)24350 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm_subtile) {
24351 for (size_t k = 1; k <= 20; k += 5) {
24352 for (uint32_t n = 1; n <= 8; n++) {
24353 for (uint32_t m = 1; m <= 6; m++) {
24354 GemmMicrokernelTester()
24355 .mr(6)
24356 .nr(8)
24357 .kr(1)
24358 .sr(4)
24359 .m(m)
24360 .n(n)
24361 .k(k)
24362 .cm_stride(11)
24363 .iterations(1)
24364 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24365 }
24366 }
24367 }
24368 }
24369
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,qmin)24370 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, qmin) {
24371 GemmMicrokernelTester()
24372 .mr(6)
24373 .nr(8)
24374 .kr(1)
24375 .sr(4)
24376 .m(6)
24377 .n(8)
24378 .k(4)
24379 .qmin(128)
24380 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24381 }
24382
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,qmax)24383 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, qmax) {
24384 GemmMicrokernelTester()
24385 .mr(6)
24386 .nr(8)
24387 .kr(1)
24388 .sr(4)
24389 .m(6)
24390 .n(8)
24391 .k(4)
24392 .qmax(128)
24393 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24394 }
24395
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM,strided_cm)24396 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm) {
24397 GemmMicrokernelTester()
24398 .mr(6)
24399 .nr(8)
24400 .kr(1)
24401 .sr(4)
24402 .m(6)
24403 .n(8)
24404 .k(4)
24405 .cm_stride(11)
24406 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24407 }
24408 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24409
24410
24411 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4)24412 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4) {
24413 GemmMicrokernelTester()
24414 .mr(6)
24415 .nr(8)
24416 .kr(1)
24417 .sr(4)
24418 .m(6)
24419 .n(8)
24420 .k(4)
24421 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24422 }
24423
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,strided_cn)24424 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, strided_cn) {
24425 GemmMicrokernelTester()
24426 .mr(6)
24427 .nr(8)
24428 .kr(1)
24429 .sr(4)
24430 .m(6)
24431 .n(8)
24432 .k(4)
24433 .cn_stride(11)
24434 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24435 }
24436
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4_strided_a)24437 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
24438 GemmMicrokernelTester()
24439 .mr(6)
24440 .nr(8)
24441 .kr(1)
24442 .sr(4)
24443 .m(6)
24444 .n(8)
24445 .k(4)
24446 .a_stride(7)
24447 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24448 }
24449
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4_subtile)24450 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile) {
24451 for (uint32_t n = 1; n <= 8; n++) {
24452 for (uint32_t m = 1; m <= 6; m++) {
24453 GemmMicrokernelTester()
24454 .mr(6)
24455 .nr(8)
24456 .kr(1)
24457 .sr(4)
24458 .m(m)
24459 .n(n)
24460 .k(4)
24461 .iterations(1)
24462 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24463 }
24464 }
24465 }
24466
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4_subtile_m)24467 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
24468 for (uint32_t m = 1; m <= 6; m++) {
24469 GemmMicrokernelTester()
24470 .mr(6)
24471 .nr(8)
24472 .kr(1)
24473 .sr(4)
24474 .m(m)
24475 .n(8)
24476 .k(4)
24477 .iterations(1)
24478 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24479 }
24480 }
24481
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_eq_4_subtile_n)24482 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
24483 for (uint32_t n = 1; n <= 8; n++) {
24484 GemmMicrokernelTester()
24485 .mr(6)
24486 .nr(8)
24487 .kr(1)
24488 .sr(4)
24489 .m(6)
24490 .n(n)
24491 .k(4)
24492 .iterations(1)
24493 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24494 }
24495 }
24496
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_lt_4)24497 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4) {
24498 for (size_t k = 1; k < 4; k++) {
24499 GemmMicrokernelTester()
24500 .mr(6)
24501 .nr(8)
24502 .kr(1)
24503 .sr(4)
24504 .m(6)
24505 .n(8)
24506 .k(k)
24507 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24508 }
24509 }
24510
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_lt_4_strided_a)24511 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
24512 for (size_t k = 1; k < 4; k++) {
24513 GemmMicrokernelTester()
24514 .mr(6)
24515 .nr(8)
24516 .kr(1)
24517 .sr(4)
24518 .m(6)
24519 .n(8)
24520 .k(k)
24521 .a_stride(7)
24522 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24523 }
24524 }
24525
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_lt_4_subtile)24526 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_subtile) {
24527 for (size_t k = 1; k < 4; k++) {
24528 for (uint32_t n = 1; n <= 8; n++) {
24529 for (uint32_t m = 1; m <= 6; m++) {
24530 GemmMicrokernelTester()
24531 .mr(6)
24532 .nr(8)
24533 .kr(1)
24534 .sr(4)
24535 .m(m)
24536 .n(n)
24537 .k(k)
24538 .iterations(1)
24539 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24540 }
24541 }
24542 }
24543 }
24544
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_gt_4)24545 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4) {
24546 for (size_t k = 5; k < 8; k++) {
24547 GemmMicrokernelTester()
24548 .mr(6)
24549 .nr(8)
24550 .kr(1)
24551 .sr(4)
24552 .m(6)
24553 .n(8)
24554 .k(k)
24555 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24556 }
24557 }
24558
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_gt_4_strided_a)24559 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
24560 for (size_t k = 5; k < 8; k++) {
24561 GemmMicrokernelTester()
24562 .mr(6)
24563 .nr(8)
24564 .kr(1)
24565 .sr(4)
24566 .m(6)
24567 .n(8)
24568 .k(k)
24569 .a_stride(11)
24570 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24571 }
24572 }
24573
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_gt_4_subtile)24574 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_subtile) {
24575 for (size_t k = 5; k < 8; k++) {
24576 for (uint32_t n = 1; n <= 8; n++) {
24577 for (uint32_t m = 1; m <= 6; m++) {
24578 GemmMicrokernelTester()
24579 .mr(6)
24580 .nr(8)
24581 .kr(1)
24582 .sr(4)
24583 .m(m)
24584 .n(n)
24585 .k(k)
24586 .iterations(1)
24587 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24588 }
24589 }
24590 }
24591 }
24592
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_div_4)24593 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_div_4) {
24594 for (size_t k = 8; k <= 40; k += 4) {
24595 GemmMicrokernelTester()
24596 .mr(6)
24597 .nr(8)
24598 .kr(1)
24599 .sr(4)
24600 .m(6)
24601 .n(8)
24602 .k(k)
24603 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24604 }
24605 }
24606
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_div_4_strided_a)24607 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_strided_a) {
24608 for (size_t k = 8; k <= 40; k += 4) {
24609 GemmMicrokernelTester()
24610 .mr(6)
24611 .nr(8)
24612 .kr(1)
24613 .sr(4)
24614 .m(6)
24615 .n(8)
24616 .k(k)
24617 .a_stride(43)
24618 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24619 }
24620 }
24621
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,k_div_4_subtile)24622 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_subtile) {
24623 for (size_t k = 8; k <= 40; k += 4) {
24624 for (uint32_t n = 1; n <= 8; n++) {
24625 for (uint32_t m = 1; m <= 6; m++) {
24626 GemmMicrokernelTester()
24627 .mr(6)
24628 .nr(8)
24629 .kr(1)
24630 .sr(4)
24631 .m(m)
24632 .n(n)
24633 .k(k)
24634 .iterations(1)
24635 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24636 }
24637 }
24638 }
24639 }
24640
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8)24641 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8) {
24642 for (uint32_t n = 9; n < 16; n++) {
24643 for (size_t k = 1; k <= 20; k += 5) {
24644 GemmMicrokernelTester()
24645 .mr(6)
24646 .nr(8)
24647 .kr(1)
24648 .sr(4)
24649 .m(6)
24650 .n(n)
24651 .k(k)
24652 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24653 }
24654 }
24655 }
24656
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8_strided_cn)24657 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
24658 for (uint32_t n = 9; n < 16; n++) {
24659 for (size_t k = 1; k <= 20; k += 5) {
24660 GemmMicrokernelTester()
24661 .mr(6)
24662 .nr(8)
24663 .kr(1)
24664 .sr(4)
24665 .m(6)
24666 .n(n)
24667 .k(k)
24668 .cn_stride(11)
24669 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24670 }
24671 }
24672 }
24673
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8_strided_a)24674 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
24675 for (uint32_t n = 9; n < 16; n++) {
24676 for (size_t k = 1; k <= 20; k += 5) {
24677 GemmMicrokernelTester()
24678 .mr(6)
24679 .nr(8)
24680 .kr(1)
24681 .sr(4)
24682 .m(6)
24683 .n(n)
24684 .k(k)
24685 .a_stride(23)
24686 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24687 }
24688 }
24689 }
24690
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_gt_8_subtile)24691 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_subtile) {
24692 for (uint32_t n = 9; n < 16; n++) {
24693 for (size_t k = 1; k <= 20; k += 5) {
24694 for (uint32_t m = 1; m <= 6; m++) {
24695 GemmMicrokernelTester()
24696 .mr(6)
24697 .nr(8)
24698 .kr(1)
24699 .sr(4)
24700 .m(m)
24701 .n(n)
24702 .k(k)
24703 .iterations(1)
24704 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24705 }
24706 }
24707 }
24708 }
24709
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_div_8)24710 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8) {
24711 for (uint32_t n = 16; n <= 24; n += 8) {
24712 for (size_t k = 1; k <= 20; k += 5) {
24713 GemmMicrokernelTester()
24714 .mr(6)
24715 .nr(8)
24716 .kr(1)
24717 .sr(4)
24718 .m(6)
24719 .n(n)
24720 .k(k)
24721 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24722 }
24723 }
24724 }
24725
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_div_8_strided_cn)24726 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
24727 for (uint32_t n = 16; n <= 24; n += 8) {
24728 for (size_t k = 1; k <= 20; k += 5) {
24729 GemmMicrokernelTester()
24730 .mr(6)
24731 .nr(8)
24732 .kr(1)
24733 .sr(4)
24734 .m(6)
24735 .n(n)
24736 .k(k)
24737 .cn_stride(11)
24738 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24739 }
24740 }
24741 }
24742
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_div_8_strided_a)24743 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_a) {
24744 for (uint32_t n = 16; n <= 24; n += 8) {
24745 for (size_t k = 1; k <= 20; k += 5) {
24746 GemmMicrokernelTester()
24747 .mr(6)
24748 .nr(8)
24749 .kr(1)
24750 .sr(4)
24751 .m(6)
24752 .n(n)
24753 .k(k)
24754 .a_stride(23)
24755 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24756 }
24757 }
24758 }
24759
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,n_div_8_subtile)24760 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_subtile) {
24761 for (uint32_t n = 16; n <= 24; n += 8) {
24762 for (size_t k = 1; k <= 20; k += 5) {
24763 for (uint32_t m = 1; m <= 6; m++) {
24764 GemmMicrokernelTester()
24765 .mr(6)
24766 .nr(8)
24767 .kr(1)
24768 .sr(4)
24769 .m(m)
24770 .n(n)
24771 .k(k)
24772 .iterations(1)
24773 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24774 }
24775 }
24776 }
24777 }
24778
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,strided_cm_subtile)24779 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, strided_cm_subtile) {
24780 for (size_t k = 1; k <= 20; k += 5) {
24781 for (uint32_t n = 1; n <= 8; n++) {
24782 for (uint32_t m = 1; m <= 6; m++) {
24783 GemmMicrokernelTester()
24784 .mr(6)
24785 .nr(8)
24786 .kr(1)
24787 .sr(4)
24788 .m(m)
24789 .n(n)
24790 .k(k)
24791 .cm_stride(11)
24792 .iterations(1)
24793 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24794 }
24795 }
24796 }
24797 }
24798
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,qmin)24799 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, qmin) {
24800 GemmMicrokernelTester()
24801 .mr(6)
24802 .nr(8)
24803 .kr(1)
24804 .sr(4)
24805 .m(6)
24806 .n(8)
24807 .k(4)
24808 .qmin(128)
24809 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24810 }
24811
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,qmax)24812 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, qmax) {
24813 GemmMicrokernelTester()
24814 .mr(6)
24815 .nr(8)
24816 .kr(1)
24817 .sr(4)
24818 .m(6)
24819 .n(8)
24820 .k(4)
24821 .qmax(128)
24822 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24823 }
24824
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86,strided_cm)24825 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, strided_cm) {
24826 GemmMicrokernelTester()
24827 .mr(6)
24828 .nr(8)
24829 .kr(1)
24830 .sr(4)
24831 .m(6)
24832 .n(8)
24833 .k(4)
24834 .cm_stride(11)
24835 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24836 }
24837 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24838
24839
24840 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)24841 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
24842 GemmMicrokernelTester()
24843 .mr(1)
24844 .nr(8)
24845 .kr(1)
24846 .sr(1)
24847 .m(1)
24848 .n(8)
24849 .k(1)
24850 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24851 }
24852
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)24853 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
24854 GemmMicrokernelTester()
24855 .mr(1)
24856 .nr(8)
24857 .kr(1)
24858 .sr(1)
24859 .m(1)
24860 .n(8)
24861 .k(1)
24862 .cn_stride(11)
24863 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24864 }
24865
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_strided_a)24866 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_strided_a) {
24867 GemmMicrokernelTester()
24868 .mr(1)
24869 .nr(8)
24870 .kr(1)
24871 .sr(1)
24872 .m(1)
24873 .n(8)
24874 .k(1)
24875 .a_stride(3)
24876 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24877 }
24878
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)24879 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
24880 for (uint32_t n = 1; n <= 8; n++) {
24881 for (uint32_t m = 1; m <= 1; m++) {
24882 GemmMicrokernelTester()
24883 .mr(1)
24884 .nr(8)
24885 .kr(1)
24886 .sr(1)
24887 .m(m)
24888 .n(n)
24889 .k(1)
24890 .iterations(1)
24891 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24892 }
24893 }
24894 }
24895
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)24896 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
24897 for (uint32_t m = 1; m <= 1; m++) {
24898 GemmMicrokernelTester()
24899 .mr(1)
24900 .nr(8)
24901 .kr(1)
24902 .sr(1)
24903 .m(m)
24904 .n(8)
24905 .k(1)
24906 .iterations(1)
24907 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24908 }
24909 }
24910
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)24911 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
24912 for (uint32_t n = 1; n <= 8; n++) {
24913 GemmMicrokernelTester()
24914 .mr(1)
24915 .nr(8)
24916 .kr(1)
24917 .sr(1)
24918 .m(1)
24919 .n(n)
24920 .k(1)
24921 .iterations(1)
24922 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24923 }
24924 }
24925
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)24926 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
24927 for (size_t k = 2; k < 10; k++) {
24928 GemmMicrokernelTester()
24929 .mr(1)
24930 .nr(8)
24931 .kr(1)
24932 .sr(1)
24933 .m(1)
24934 .n(8)
24935 .k(k)
24936 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24937 }
24938 }
24939
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_strided_a)24940 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_strided_a) {
24941 for (size_t k = 2; k < 10; k++) {
24942 GemmMicrokernelTester()
24943 .mr(1)
24944 .nr(8)
24945 .kr(1)
24946 .sr(1)
24947 .m(1)
24948 .n(8)
24949 .k(k)
24950 .a_stride(11)
24951 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24952 }
24953 }
24954
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)24955 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
24956 for (size_t k = 2; k < 10; k++) {
24957 for (uint32_t n = 1; n <= 8; n++) {
24958 for (uint32_t m = 1; m <= 1; m++) {
24959 GemmMicrokernelTester()
24960 .mr(1)
24961 .nr(8)
24962 .kr(1)
24963 .sr(1)
24964 .m(m)
24965 .n(n)
24966 .k(k)
24967 .iterations(1)
24968 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24969 }
24970 }
24971 }
24972 }
24973
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)24974 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
24975 for (uint32_t n = 9; n < 16; n++) {
24976 for (size_t k = 1; k <= 5; k += 2) {
24977 GemmMicrokernelTester()
24978 .mr(1)
24979 .nr(8)
24980 .kr(1)
24981 .sr(1)
24982 .m(1)
24983 .n(n)
24984 .k(k)
24985 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24986 }
24987 }
24988 }
24989
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)24990 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
24991 for (uint32_t n = 9; n < 16; n++) {
24992 for (size_t k = 1; k <= 5; k += 2) {
24993 GemmMicrokernelTester()
24994 .mr(1)
24995 .nr(8)
24996 .kr(1)
24997 .sr(1)
24998 .m(1)
24999 .n(n)
25000 .k(k)
25001 .cn_stride(11)
25002 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25003 }
25004 }
25005 }
25006
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_a)25007 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_a) {
25008 for (uint32_t n = 9; n < 16; n++) {
25009 for (size_t k = 1; k <= 5; k += 2) {
25010 GemmMicrokernelTester()
25011 .mr(1)
25012 .nr(8)
25013 .kr(1)
25014 .sr(1)
25015 .m(1)
25016 .n(n)
25017 .k(k)
25018 .a_stride(7)
25019 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25020 }
25021 }
25022 }
25023
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)25024 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
25025 for (uint32_t n = 9; n < 16; n++) {
25026 for (size_t k = 1; k <= 5; k += 2) {
25027 for (uint32_t m = 1; m <= 1; m++) {
25028 GemmMicrokernelTester()
25029 .mr(1)
25030 .nr(8)
25031 .kr(1)
25032 .sr(1)
25033 .m(m)
25034 .n(n)
25035 .k(k)
25036 .iterations(1)
25037 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25038 }
25039 }
25040 }
25041 }
25042
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)25043 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
25044 for (uint32_t n = 16; n <= 24; n += 8) {
25045 for (size_t k = 1; k <= 5; k += 2) {
25046 GemmMicrokernelTester()
25047 .mr(1)
25048 .nr(8)
25049 .kr(1)
25050 .sr(1)
25051 .m(1)
25052 .n(n)
25053 .k(k)
25054 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25055 }
25056 }
25057 }
25058
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)25059 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
25060 for (uint32_t n = 16; n <= 24; n += 8) {
25061 for (size_t k = 1; k <= 5; k += 2) {
25062 GemmMicrokernelTester()
25063 .mr(1)
25064 .nr(8)
25065 .kr(1)
25066 .sr(1)
25067 .m(1)
25068 .n(n)
25069 .k(k)
25070 .cn_stride(11)
25071 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25072 }
25073 }
25074 }
25075
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_a)25076 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_a) {
25077 for (uint32_t n = 16; n <= 24; n += 8) {
25078 for (size_t k = 1; k <= 5; k += 2) {
25079 GemmMicrokernelTester()
25080 .mr(1)
25081 .nr(8)
25082 .kr(1)
25083 .sr(1)
25084 .m(1)
25085 .n(n)
25086 .k(k)
25087 .a_stride(7)
25088 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25089 }
25090 }
25091 }
25092
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)25093 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
25094 for (uint32_t n = 16; n <= 24; n += 8) {
25095 for (size_t k = 1; k <= 5; k += 2) {
25096 for (uint32_t m = 1; m <= 1; m++) {
25097 GemmMicrokernelTester()
25098 .mr(1)
25099 .nr(8)
25100 .kr(1)
25101 .sr(1)
25102 .m(m)
25103 .n(n)
25104 .k(k)
25105 .iterations(1)
25106 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25107 }
25108 }
25109 }
25110 }
25111
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)25112 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
25113 for (size_t k = 1; k <= 5; k += 2) {
25114 for (uint32_t n = 1; n <= 8; n++) {
25115 for (uint32_t m = 1; m <= 1; m++) {
25116 GemmMicrokernelTester()
25117 .mr(1)
25118 .nr(8)
25119 .kr(1)
25120 .sr(1)
25121 .m(m)
25122 .n(n)
25123 .k(k)
25124 .cm_stride(11)
25125 .iterations(1)
25126 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25127 }
25128 }
25129 }
25130 }
25131
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)25132 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
25133 GemmMicrokernelTester()
25134 .mr(1)
25135 .nr(8)
25136 .kr(1)
25137 .sr(1)
25138 .m(1)
25139 .n(8)
25140 .k(1)
25141 .qmin(128)
25142 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25143 }
25144
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)25145 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
25146 GemmMicrokernelTester()
25147 .mr(1)
25148 .nr(8)
25149 .kr(1)
25150 .sr(1)
25151 .m(1)
25152 .n(8)
25153 .k(1)
25154 .qmax(128)
25155 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25156 }
25157
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)25158 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
25159 GemmMicrokernelTester()
25160 .mr(1)
25161 .nr(8)
25162 .kr(1)
25163 .sr(1)
25164 .m(1)
25165 .n(8)
25166 .k(1)
25167 .cm_stride(11)
25168 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25169 }
25170 #endif // XNN_ARCH_WASMRELAXEDSIMD
25171
25172
25173 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)25174 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
25175 GemmMicrokernelTester()
25176 .mr(3)
25177 .nr(8)
25178 .kr(1)
25179 .sr(1)
25180 .m(3)
25181 .n(8)
25182 .k(1)
25183 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25184 }
25185
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)25186 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
25187 GemmMicrokernelTester()
25188 .mr(3)
25189 .nr(8)
25190 .kr(1)
25191 .sr(1)
25192 .m(3)
25193 .n(8)
25194 .k(1)
25195 .cn_stride(11)
25196 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25197 }
25198
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_strided_a)25199 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_strided_a) {
25200 GemmMicrokernelTester()
25201 .mr(3)
25202 .nr(8)
25203 .kr(1)
25204 .sr(1)
25205 .m(3)
25206 .n(8)
25207 .k(1)
25208 .a_stride(3)
25209 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25210 }
25211
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)25212 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
25213 for (uint32_t n = 1; n <= 8; n++) {
25214 for (uint32_t m = 1; m <= 3; m++) {
25215 GemmMicrokernelTester()
25216 .mr(3)
25217 .nr(8)
25218 .kr(1)
25219 .sr(1)
25220 .m(m)
25221 .n(n)
25222 .k(1)
25223 .iterations(1)
25224 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25225 }
25226 }
25227 }
25228
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)25229 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
25230 for (uint32_t m = 1; m <= 3; m++) {
25231 GemmMicrokernelTester()
25232 .mr(3)
25233 .nr(8)
25234 .kr(1)
25235 .sr(1)
25236 .m(m)
25237 .n(8)
25238 .k(1)
25239 .iterations(1)
25240 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25241 }
25242 }
25243
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)25244 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
25245 for (uint32_t n = 1; n <= 8; n++) {
25246 GemmMicrokernelTester()
25247 .mr(3)
25248 .nr(8)
25249 .kr(1)
25250 .sr(1)
25251 .m(3)
25252 .n(n)
25253 .k(1)
25254 .iterations(1)
25255 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25256 }
25257 }
25258
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)25259 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
25260 for (size_t k = 2; k < 10; k++) {
25261 GemmMicrokernelTester()
25262 .mr(3)
25263 .nr(8)
25264 .kr(1)
25265 .sr(1)
25266 .m(3)
25267 .n(8)
25268 .k(k)
25269 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25270 }
25271 }
25272
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_strided_a)25273 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_strided_a) {
25274 for (size_t k = 2; k < 10; k++) {
25275 GemmMicrokernelTester()
25276 .mr(3)
25277 .nr(8)
25278 .kr(1)
25279 .sr(1)
25280 .m(3)
25281 .n(8)
25282 .k(k)
25283 .a_stride(11)
25284 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25285 }
25286 }
25287
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)25288 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
25289 for (size_t k = 2; k < 10; k++) {
25290 for (uint32_t n = 1; n <= 8; n++) {
25291 for (uint32_t m = 1; m <= 3; m++) {
25292 GemmMicrokernelTester()
25293 .mr(3)
25294 .nr(8)
25295 .kr(1)
25296 .sr(1)
25297 .m(m)
25298 .n(n)
25299 .k(k)
25300 .iterations(1)
25301 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25302 }
25303 }
25304 }
25305 }
25306
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)25307 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
25308 for (uint32_t n = 9; n < 16; n++) {
25309 for (size_t k = 1; k <= 5; k += 2) {
25310 GemmMicrokernelTester()
25311 .mr(3)
25312 .nr(8)
25313 .kr(1)
25314 .sr(1)
25315 .m(3)
25316 .n(n)
25317 .k(k)
25318 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25319 }
25320 }
25321 }
25322
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)25323 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
25324 for (uint32_t n = 9; n < 16; n++) {
25325 for (size_t k = 1; k <= 5; k += 2) {
25326 GemmMicrokernelTester()
25327 .mr(3)
25328 .nr(8)
25329 .kr(1)
25330 .sr(1)
25331 .m(3)
25332 .n(n)
25333 .k(k)
25334 .cn_stride(11)
25335 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25336 }
25337 }
25338 }
25339
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_a)25340 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_a) {
25341 for (uint32_t n = 9; n < 16; n++) {
25342 for (size_t k = 1; k <= 5; k += 2) {
25343 GemmMicrokernelTester()
25344 .mr(3)
25345 .nr(8)
25346 .kr(1)
25347 .sr(1)
25348 .m(3)
25349 .n(n)
25350 .k(k)
25351 .a_stride(7)
25352 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25353 }
25354 }
25355 }
25356
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)25357 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
25358 for (uint32_t n = 9; n < 16; n++) {
25359 for (size_t k = 1; k <= 5; k += 2) {
25360 for (uint32_t m = 1; m <= 3; m++) {
25361 GemmMicrokernelTester()
25362 .mr(3)
25363 .nr(8)
25364 .kr(1)
25365 .sr(1)
25366 .m(m)
25367 .n(n)
25368 .k(k)
25369 .iterations(1)
25370 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25371 }
25372 }
25373 }
25374 }
25375
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)25376 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
25377 for (uint32_t n = 16; n <= 24; n += 8) {
25378 for (size_t k = 1; k <= 5; k += 2) {
25379 GemmMicrokernelTester()
25380 .mr(3)
25381 .nr(8)
25382 .kr(1)
25383 .sr(1)
25384 .m(3)
25385 .n(n)
25386 .k(k)
25387 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25388 }
25389 }
25390 }
25391
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)25392 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
25393 for (uint32_t n = 16; n <= 24; n += 8) {
25394 for (size_t k = 1; k <= 5; k += 2) {
25395 GemmMicrokernelTester()
25396 .mr(3)
25397 .nr(8)
25398 .kr(1)
25399 .sr(1)
25400 .m(3)
25401 .n(n)
25402 .k(k)
25403 .cn_stride(11)
25404 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25405 }
25406 }
25407 }
25408
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_a)25409 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_a) {
25410 for (uint32_t n = 16; n <= 24; n += 8) {
25411 for (size_t k = 1; k <= 5; k += 2) {
25412 GemmMicrokernelTester()
25413 .mr(3)
25414 .nr(8)
25415 .kr(1)
25416 .sr(1)
25417 .m(3)
25418 .n(n)
25419 .k(k)
25420 .a_stride(7)
25421 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25422 }
25423 }
25424 }
25425
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)25426 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
25427 for (uint32_t n = 16; n <= 24; n += 8) {
25428 for (size_t k = 1; k <= 5; k += 2) {
25429 for (uint32_t m = 1; m <= 3; m++) {
25430 GemmMicrokernelTester()
25431 .mr(3)
25432 .nr(8)
25433 .kr(1)
25434 .sr(1)
25435 .m(m)
25436 .n(n)
25437 .k(k)
25438 .iterations(1)
25439 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25440 }
25441 }
25442 }
25443 }
25444
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)25445 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
25446 for (size_t k = 1; k <= 5; k += 2) {
25447 for (uint32_t n = 1; n <= 8; n++) {
25448 for (uint32_t m = 1; m <= 3; m++) {
25449 GemmMicrokernelTester()
25450 .mr(3)
25451 .nr(8)
25452 .kr(1)
25453 .sr(1)
25454 .m(m)
25455 .n(n)
25456 .k(k)
25457 .cm_stride(11)
25458 .iterations(1)
25459 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25460 }
25461 }
25462 }
25463 }
25464
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)25465 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
25466 GemmMicrokernelTester()
25467 .mr(3)
25468 .nr(8)
25469 .kr(1)
25470 .sr(1)
25471 .m(3)
25472 .n(8)
25473 .k(1)
25474 .qmin(128)
25475 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25476 }
25477
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)25478 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
25479 GemmMicrokernelTester()
25480 .mr(3)
25481 .nr(8)
25482 .kr(1)
25483 .sr(1)
25484 .m(3)
25485 .n(8)
25486 .k(1)
25487 .qmax(128)
25488 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25489 }
25490
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)25491 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
25492 GemmMicrokernelTester()
25493 .mr(3)
25494 .nr(8)
25495 .kr(1)
25496 .sr(1)
25497 .m(3)
25498 .n(8)
25499 .k(1)
25500 .cm_stride(11)
25501 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25502 }
25503 #endif // XNN_ARCH_WASMRELAXEDSIMD
25504
25505
25506 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)25507 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
25508 GemmMicrokernelTester()
25509 .mr(3)
25510 .nr(8)
25511 .kr(1)
25512 .sr(1)
25513 .m(3)
25514 .n(8)
25515 .k(4)
25516 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25517 }
25518
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,strided_cn)25519 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
25520 GemmMicrokernelTester()
25521 .mr(3)
25522 .nr(8)
25523 .kr(1)
25524 .sr(1)
25525 .m(3)
25526 .n(8)
25527 .k(4)
25528 .cn_stride(11)
25529 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25530 }
25531
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_strided_a)25532 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_strided_a) {
25533 GemmMicrokernelTester()
25534 .mr(3)
25535 .nr(8)
25536 .kr(1)
25537 .sr(1)
25538 .m(3)
25539 .n(8)
25540 .k(4)
25541 .a_stride(7)
25542 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25543 }
25544
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)25545 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
25546 for (uint32_t n = 1; n <= 8; n++) {
25547 for (uint32_t m = 1; m <= 3; m++) {
25548 GemmMicrokernelTester()
25549 .mr(3)
25550 .nr(8)
25551 .kr(1)
25552 .sr(1)
25553 .m(m)
25554 .n(n)
25555 .k(4)
25556 .iterations(1)
25557 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25558 }
25559 }
25560 }
25561
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)25562 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
25563 for (uint32_t m = 1; m <= 3; m++) {
25564 GemmMicrokernelTester()
25565 .mr(3)
25566 .nr(8)
25567 .kr(1)
25568 .sr(1)
25569 .m(m)
25570 .n(8)
25571 .k(4)
25572 .iterations(1)
25573 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25574 }
25575 }
25576
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)25577 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
25578 for (uint32_t n = 1; n <= 8; n++) {
25579 GemmMicrokernelTester()
25580 .mr(3)
25581 .nr(8)
25582 .kr(1)
25583 .sr(1)
25584 .m(3)
25585 .n(n)
25586 .k(4)
25587 .iterations(1)
25588 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25589 }
25590 }
25591
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)25592 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
25593 for (size_t k = 1; k < 4; k++) {
25594 GemmMicrokernelTester()
25595 .mr(3)
25596 .nr(8)
25597 .kr(1)
25598 .sr(1)
25599 .m(3)
25600 .n(8)
25601 .k(k)
25602 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25603 }
25604 }
25605
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_strided_a)25606 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_strided_a) {
25607 for (size_t k = 1; k < 4; k++) {
25608 GemmMicrokernelTester()
25609 .mr(3)
25610 .nr(8)
25611 .kr(1)
25612 .sr(1)
25613 .m(3)
25614 .n(8)
25615 .k(k)
25616 .a_stride(7)
25617 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25618 }
25619 }
25620
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)25621 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
25622 for (size_t k = 1; k < 4; k++) {
25623 for (uint32_t n = 1; n <= 8; n++) {
25624 for (uint32_t m = 1; m <= 3; m++) {
25625 GemmMicrokernelTester()
25626 .mr(3)
25627 .nr(8)
25628 .kr(1)
25629 .sr(1)
25630 .m(m)
25631 .n(n)
25632 .k(k)
25633 .iterations(1)
25634 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25635 }
25636 }
25637 }
25638 }
25639
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)25640 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
25641 for (size_t k = 5; k < 8; k++) {
25642 GemmMicrokernelTester()
25643 .mr(3)
25644 .nr(8)
25645 .kr(1)
25646 .sr(1)
25647 .m(3)
25648 .n(8)
25649 .k(k)
25650 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25651 }
25652 }
25653
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_strided_a)25654 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_strided_a) {
25655 for (size_t k = 5; k < 8; k++) {
25656 GemmMicrokernelTester()
25657 .mr(3)
25658 .nr(8)
25659 .kr(1)
25660 .sr(1)
25661 .m(3)
25662 .n(8)
25663 .k(k)
25664 .a_stride(11)
25665 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25666 }
25667 }
25668
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)25669 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
25670 for (size_t k = 5; k < 8; k++) {
25671 for (uint32_t n = 1; n <= 8; n++) {
25672 for (uint32_t m = 1; m <= 3; m++) {
25673 GemmMicrokernelTester()
25674 .mr(3)
25675 .nr(8)
25676 .kr(1)
25677 .sr(1)
25678 .m(m)
25679 .n(n)
25680 .k(k)
25681 .iterations(1)
25682 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25683 }
25684 }
25685 }
25686 }
25687
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_div_4)25688 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
25689 for (size_t k = 8; k <= 40; k += 4) {
25690 GemmMicrokernelTester()
25691 .mr(3)
25692 .nr(8)
25693 .kr(1)
25694 .sr(1)
25695 .m(3)
25696 .n(8)
25697 .k(k)
25698 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25699 }
25700 }
25701
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_div_4_strided_a)25702 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_div_4_strided_a) {
25703 for (size_t k = 8; k <= 40; k += 4) {
25704 GemmMicrokernelTester()
25705 .mr(3)
25706 .nr(8)
25707 .kr(1)
25708 .sr(1)
25709 .m(3)
25710 .n(8)
25711 .k(k)
25712 .a_stride(43)
25713 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25714 }
25715 }
25716
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)25717 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
25718 for (size_t k = 8; k <= 40; k += 4) {
25719 for (uint32_t n = 1; n <= 8; n++) {
25720 for (uint32_t m = 1; m <= 3; m++) {
25721 GemmMicrokernelTester()
25722 .mr(3)
25723 .nr(8)
25724 .kr(1)
25725 .sr(1)
25726 .m(m)
25727 .n(n)
25728 .k(k)
25729 .iterations(1)
25730 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25731 }
25732 }
25733 }
25734 }
25735
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)25736 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
25737 for (uint32_t n = 9; n < 16; n++) {
25738 for (size_t k = 1; k <= 20; k += 5) {
25739 GemmMicrokernelTester()
25740 .mr(3)
25741 .nr(8)
25742 .kr(1)
25743 .sr(1)
25744 .m(3)
25745 .n(n)
25746 .k(k)
25747 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25748 }
25749 }
25750 }
25751
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)25752 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
25753 for (uint32_t n = 9; n < 16; n++) {
25754 for (size_t k = 1; k <= 20; k += 5) {
25755 GemmMicrokernelTester()
25756 .mr(3)
25757 .nr(8)
25758 .kr(1)
25759 .sr(1)
25760 .m(3)
25761 .n(n)
25762 .k(k)
25763 .cn_stride(11)
25764 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25765 }
25766 }
25767 }
25768
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_a)25769 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_a) {
25770 for (uint32_t n = 9; n < 16; n++) {
25771 for (size_t k = 1; k <= 20; k += 5) {
25772 GemmMicrokernelTester()
25773 .mr(3)
25774 .nr(8)
25775 .kr(1)
25776 .sr(1)
25777 .m(3)
25778 .n(n)
25779 .k(k)
25780 .a_stride(23)
25781 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25782 }
25783 }
25784 }
25785
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)25786 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
25787 for (uint32_t n = 9; n < 16; n++) {
25788 for (size_t k = 1; k <= 20; k += 5) {
25789 for (uint32_t m = 1; m <= 3; m++) {
25790 GemmMicrokernelTester()
25791 .mr(3)
25792 .nr(8)
25793 .kr(1)
25794 .sr(1)
25795 .m(m)
25796 .n(n)
25797 .k(k)
25798 .iterations(1)
25799 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25800 }
25801 }
25802 }
25803 }
25804
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8)25805 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
25806 for (uint32_t n = 16; n <= 24; n += 8) {
25807 for (size_t k = 1; k <= 20; k += 5) {
25808 GemmMicrokernelTester()
25809 .mr(3)
25810 .nr(8)
25811 .kr(1)
25812 .sr(1)
25813 .m(3)
25814 .n(n)
25815 .k(k)
25816 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25817 }
25818 }
25819 }
25820
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)25821 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
25822 for (uint32_t n = 16; n <= 24; n += 8) {
25823 for (size_t k = 1; k <= 20; k += 5) {
25824 GemmMicrokernelTester()
25825 .mr(3)
25826 .nr(8)
25827 .kr(1)
25828 .sr(1)
25829 .m(3)
25830 .n(n)
25831 .k(k)
25832 .cn_stride(11)
25833 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25834 }
25835 }
25836 }
25837
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_a)25838 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_a) {
25839 for (uint32_t n = 16; n <= 24; n += 8) {
25840 for (size_t k = 1; k <= 20; k += 5) {
25841 GemmMicrokernelTester()
25842 .mr(3)
25843 .nr(8)
25844 .kr(1)
25845 .sr(1)
25846 .m(3)
25847 .n(n)
25848 .k(k)
25849 .a_stride(23)
25850 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25851 }
25852 }
25853 }
25854
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)25855 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
25856 for (uint32_t n = 16; n <= 24; n += 8) {
25857 for (size_t k = 1; k <= 20; k += 5) {
25858 for (uint32_t m = 1; m <= 3; m++) {
25859 GemmMicrokernelTester()
25860 .mr(3)
25861 .nr(8)
25862 .kr(1)
25863 .sr(1)
25864 .m(m)
25865 .n(n)
25866 .k(k)
25867 .iterations(1)
25868 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25869 }
25870 }
25871 }
25872 }
25873
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)25874 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
25875 for (size_t k = 1; k <= 20; k += 5) {
25876 for (uint32_t n = 1; n <= 8; n++) {
25877 for (uint32_t m = 1; m <= 3; m++) {
25878 GemmMicrokernelTester()
25879 .mr(3)
25880 .nr(8)
25881 .kr(1)
25882 .sr(1)
25883 .m(m)
25884 .n(n)
25885 .k(k)
25886 .cm_stride(11)
25887 .iterations(1)
25888 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25889 }
25890 }
25891 }
25892 }
25893
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,qmin)25894 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, qmin) {
25895 GemmMicrokernelTester()
25896 .mr(3)
25897 .nr(8)
25898 .kr(1)
25899 .sr(1)
25900 .m(3)
25901 .n(8)
25902 .k(4)
25903 .qmin(128)
25904 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25905 }
25906
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,qmax)25907 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, qmax) {
25908 GemmMicrokernelTester()
25909 .mr(3)
25910 .nr(8)
25911 .kr(1)
25912 .sr(1)
25913 .m(3)
25914 .n(8)
25915 .k(4)
25916 .qmax(128)
25917 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25918 }
25919
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,strided_cm)25920 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
25921 GemmMicrokernelTester()
25922 .mr(3)
25923 .nr(8)
25924 .kr(1)
25925 .sr(1)
25926 .m(3)
25927 .n(8)
25928 .k(4)
25929 .cm_stride(11)
25930 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
25931 }
25932 #endif // XNN_ARCH_WASMRELAXEDSIMD
25933
25934
25935 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)25936 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
25937 GemmMicrokernelTester()
25938 .mr(3)
25939 .nr(8)
25940 .kr(1)
25941 .sr(4)
25942 .m(3)
25943 .n(8)
25944 .k(4)
25945 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
25946 }
25947
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,strided_cn)25948 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
25949 GemmMicrokernelTester()
25950 .mr(3)
25951 .nr(8)
25952 .kr(1)
25953 .sr(4)
25954 .m(3)
25955 .n(8)
25956 .k(4)
25957 .cn_stride(11)
25958 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
25959 }
25960
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)25961 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
25962 GemmMicrokernelTester()
25963 .mr(3)
25964 .nr(8)
25965 .kr(1)
25966 .sr(4)
25967 .m(3)
25968 .n(8)
25969 .k(4)
25970 .a_stride(7)
25971 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
25972 }
25973
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)25974 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
25975 for (uint32_t n = 1; n <= 8; n++) {
25976 for (uint32_t m = 1; m <= 3; m++) {
25977 GemmMicrokernelTester()
25978 .mr(3)
25979 .nr(8)
25980 .kr(1)
25981 .sr(4)
25982 .m(m)
25983 .n(n)
25984 .k(4)
25985 .iterations(1)
25986 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
25987 }
25988 }
25989 }
25990
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)25991 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
25992 for (uint32_t m = 1; m <= 3; m++) {
25993 GemmMicrokernelTester()
25994 .mr(3)
25995 .nr(8)
25996 .kr(1)
25997 .sr(4)
25998 .m(m)
25999 .n(8)
26000 .k(4)
26001 .iterations(1)
26002 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26003 }
26004 }
26005
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)26006 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
26007 for (uint32_t n = 1; n <= 8; n++) {
26008 GemmMicrokernelTester()
26009 .mr(3)
26010 .nr(8)
26011 .kr(1)
26012 .sr(4)
26013 .m(3)
26014 .n(n)
26015 .k(4)
26016 .iterations(1)
26017 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26018 }
26019 }
26020
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)26021 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
26022 for (size_t k = 1; k < 4; k++) {
26023 GemmMicrokernelTester()
26024 .mr(3)
26025 .nr(8)
26026 .kr(1)
26027 .sr(4)
26028 .m(3)
26029 .n(8)
26030 .k(k)
26031 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26032 }
26033 }
26034
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)26035 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
26036 for (size_t k = 1; k < 4; k++) {
26037 GemmMicrokernelTester()
26038 .mr(3)
26039 .nr(8)
26040 .kr(1)
26041 .sr(4)
26042 .m(3)
26043 .n(8)
26044 .k(k)
26045 .a_stride(7)
26046 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26047 }
26048 }
26049
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)26050 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
26051 for (size_t k = 1; k < 4; k++) {
26052 for (uint32_t n = 1; n <= 8; n++) {
26053 for (uint32_t m = 1; m <= 3; m++) {
26054 GemmMicrokernelTester()
26055 .mr(3)
26056 .nr(8)
26057 .kr(1)
26058 .sr(4)
26059 .m(m)
26060 .n(n)
26061 .k(k)
26062 .iterations(1)
26063 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26064 }
26065 }
26066 }
26067 }
26068
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)26069 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
26070 for (size_t k = 5; k < 8; k++) {
26071 GemmMicrokernelTester()
26072 .mr(3)
26073 .nr(8)
26074 .kr(1)
26075 .sr(4)
26076 .m(3)
26077 .n(8)
26078 .k(k)
26079 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26080 }
26081 }
26082
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)26083 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
26084 for (size_t k = 5; k < 8; k++) {
26085 GemmMicrokernelTester()
26086 .mr(3)
26087 .nr(8)
26088 .kr(1)
26089 .sr(4)
26090 .m(3)
26091 .n(8)
26092 .k(k)
26093 .a_stride(11)
26094 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26095 }
26096 }
26097
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)26098 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
26099 for (size_t k = 5; k < 8; k++) {
26100 for (uint32_t n = 1; n <= 8; n++) {
26101 for (uint32_t m = 1; m <= 3; m++) {
26102 GemmMicrokernelTester()
26103 .mr(3)
26104 .nr(8)
26105 .kr(1)
26106 .sr(4)
26107 .m(m)
26108 .n(n)
26109 .k(k)
26110 .iterations(1)
26111 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26112 }
26113 }
26114 }
26115 }
26116
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4)26117 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
26118 for (size_t k = 8; k <= 40; k += 4) {
26119 GemmMicrokernelTester()
26120 .mr(3)
26121 .nr(8)
26122 .kr(1)
26123 .sr(4)
26124 .m(3)
26125 .n(8)
26126 .k(k)
26127 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26128 }
26129 }
26130
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)26131 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
26132 for (size_t k = 8; k <= 40; k += 4) {
26133 GemmMicrokernelTester()
26134 .mr(3)
26135 .nr(8)
26136 .kr(1)
26137 .sr(4)
26138 .m(3)
26139 .n(8)
26140 .k(k)
26141 .a_stride(43)
26142 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26143 }
26144 }
26145
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)26146 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
26147 for (size_t k = 8; k <= 40; k += 4) {
26148 for (uint32_t n = 1; n <= 8; n++) {
26149 for (uint32_t m = 1; m <= 3; m++) {
26150 GemmMicrokernelTester()
26151 .mr(3)
26152 .nr(8)
26153 .kr(1)
26154 .sr(4)
26155 .m(m)
26156 .n(n)
26157 .k(k)
26158 .iterations(1)
26159 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26160 }
26161 }
26162 }
26163 }
26164
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)26165 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
26166 for (uint32_t n = 9; n < 16; n++) {
26167 for (size_t k = 1; k <= 20; k += 5) {
26168 GemmMicrokernelTester()
26169 .mr(3)
26170 .nr(8)
26171 .kr(1)
26172 .sr(4)
26173 .m(3)
26174 .n(n)
26175 .k(k)
26176 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26177 }
26178 }
26179 }
26180
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)26181 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
26182 for (uint32_t n = 9; n < 16; n++) {
26183 for (size_t k = 1; k <= 20; k += 5) {
26184 GemmMicrokernelTester()
26185 .mr(3)
26186 .nr(8)
26187 .kr(1)
26188 .sr(4)
26189 .m(3)
26190 .n(n)
26191 .k(k)
26192 .cn_stride(11)
26193 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26194 }
26195 }
26196 }
26197
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)26198 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
26199 for (uint32_t n = 9; n < 16; n++) {
26200 for (size_t k = 1; k <= 20; k += 5) {
26201 GemmMicrokernelTester()
26202 .mr(3)
26203 .nr(8)
26204 .kr(1)
26205 .sr(4)
26206 .m(3)
26207 .n(n)
26208 .k(k)
26209 .a_stride(23)
26210 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26211 }
26212 }
26213 }
26214
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)26215 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
26216 for (uint32_t n = 9; n < 16; n++) {
26217 for (size_t k = 1; k <= 20; k += 5) {
26218 for (uint32_t m = 1; m <= 3; m++) {
26219 GemmMicrokernelTester()
26220 .mr(3)
26221 .nr(8)
26222 .kr(1)
26223 .sr(4)
26224 .m(m)
26225 .n(n)
26226 .k(k)
26227 .iterations(1)
26228 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26229 }
26230 }
26231 }
26232 }
26233
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8)26234 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
26235 for (uint32_t n = 16; n <= 24; n += 8) {
26236 for (size_t k = 1; k <= 20; k += 5) {
26237 GemmMicrokernelTester()
26238 .mr(3)
26239 .nr(8)
26240 .kr(1)
26241 .sr(4)
26242 .m(3)
26243 .n(n)
26244 .k(k)
26245 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26246 }
26247 }
26248 }
26249
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)26250 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
26251 for (uint32_t n = 16; n <= 24; n += 8) {
26252 for (size_t k = 1; k <= 20; k += 5) {
26253 GemmMicrokernelTester()
26254 .mr(3)
26255 .nr(8)
26256 .kr(1)
26257 .sr(4)
26258 .m(3)
26259 .n(n)
26260 .k(k)
26261 .cn_stride(11)
26262 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26263 }
26264 }
26265 }
26266
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)26267 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
26268 for (uint32_t n = 16; n <= 24; n += 8) {
26269 for (size_t k = 1; k <= 20; k += 5) {
26270 GemmMicrokernelTester()
26271 .mr(3)
26272 .nr(8)
26273 .kr(1)
26274 .sr(4)
26275 .m(3)
26276 .n(n)
26277 .k(k)
26278 .a_stride(23)
26279 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26280 }
26281 }
26282 }
26283
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)26284 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
26285 for (uint32_t n = 16; n <= 24; n += 8) {
26286 for (size_t k = 1; k <= 20; k += 5) {
26287 for (uint32_t m = 1; m <= 3; m++) {
26288 GemmMicrokernelTester()
26289 .mr(3)
26290 .nr(8)
26291 .kr(1)
26292 .sr(4)
26293 .m(m)
26294 .n(n)
26295 .k(k)
26296 .iterations(1)
26297 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26298 }
26299 }
26300 }
26301 }
26302
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)26303 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
26304 for (size_t k = 1; k <= 20; k += 5) {
26305 for (uint32_t n = 1; n <= 8; n++) {
26306 for (uint32_t m = 1; m <= 3; m++) {
26307 GemmMicrokernelTester()
26308 .mr(3)
26309 .nr(8)
26310 .kr(1)
26311 .sr(4)
26312 .m(m)
26313 .n(n)
26314 .k(k)
26315 .cm_stride(11)
26316 .iterations(1)
26317 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26318 }
26319 }
26320 }
26321 }
26322
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,qmin)26323 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, qmin) {
26324 GemmMicrokernelTester()
26325 .mr(3)
26326 .nr(8)
26327 .kr(1)
26328 .sr(4)
26329 .m(3)
26330 .n(8)
26331 .k(4)
26332 .qmin(128)
26333 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26334 }
26335
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,qmax)26336 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, qmax) {
26337 GemmMicrokernelTester()
26338 .mr(3)
26339 .nr(8)
26340 .kr(1)
26341 .sr(4)
26342 .m(3)
26343 .n(8)
26344 .k(4)
26345 .qmax(128)
26346 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26347 }
26348
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm)26349 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
26350 GemmMicrokernelTester()
26351 .mr(3)
26352 .nr(8)
26353 .kr(1)
26354 .sr(4)
26355 .m(3)
26356 .n(8)
26357 .k(4)
26358 .cm_stride(11)
26359 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
26360 }
26361 #endif // XNN_ARCH_WASMRELAXEDSIMD
26362
26363
26364 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)26365 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
26366 GemmMicrokernelTester()
26367 .mr(4)
26368 .nr(8)
26369 .kr(1)
26370 .sr(1)
26371 .m(4)
26372 .n(8)
26373 .k(1)
26374 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26375 }
26376
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)26377 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
26378 GemmMicrokernelTester()
26379 .mr(4)
26380 .nr(8)
26381 .kr(1)
26382 .sr(1)
26383 .m(4)
26384 .n(8)
26385 .k(1)
26386 .cn_stride(11)
26387 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26388 }
26389
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_strided_a)26390 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_strided_a) {
26391 GemmMicrokernelTester()
26392 .mr(4)
26393 .nr(8)
26394 .kr(1)
26395 .sr(1)
26396 .m(4)
26397 .n(8)
26398 .k(1)
26399 .a_stride(3)
26400 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26401 }
26402
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)26403 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
26404 for (uint32_t n = 1; n <= 8; n++) {
26405 for (uint32_t m = 1; m <= 4; m++) {
26406 GemmMicrokernelTester()
26407 .mr(4)
26408 .nr(8)
26409 .kr(1)
26410 .sr(1)
26411 .m(m)
26412 .n(n)
26413 .k(1)
26414 .iterations(1)
26415 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26416 }
26417 }
26418 }
26419
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)26420 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
26421 for (uint32_t m = 1; m <= 4; m++) {
26422 GemmMicrokernelTester()
26423 .mr(4)
26424 .nr(8)
26425 .kr(1)
26426 .sr(1)
26427 .m(m)
26428 .n(8)
26429 .k(1)
26430 .iterations(1)
26431 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26432 }
26433 }
26434
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)26435 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
26436 for (uint32_t n = 1; n <= 8; n++) {
26437 GemmMicrokernelTester()
26438 .mr(4)
26439 .nr(8)
26440 .kr(1)
26441 .sr(1)
26442 .m(4)
26443 .n(n)
26444 .k(1)
26445 .iterations(1)
26446 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26447 }
26448 }
26449
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)26450 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
26451 for (size_t k = 2; k < 10; k++) {
26452 GemmMicrokernelTester()
26453 .mr(4)
26454 .nr(8)
26455 .kr(1)
26456 .sr(1)
26457 .m(4)
26458 .n(8)
26459 .k(k)
26460 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26461 }
26462 }
26463
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_strided_a)26464 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_strided_a) {
26465 for (size_t k = 2; k < 10; k++) {
26466 GemmMicrokernelTester()
26467 .mr(4)
26468 .nr(8)
26469 .kr(1)
26470 .sr(1)
26471 .m(4)
26472 .n(8)
26473 .k(k)
26474 .a_stride(11)
26475 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26476 }
26477 }
26478
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)26479 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
26480 for (size_t k = 2; k < 10; k++) {
26481 for (uint32_t n = 1; n <= 8; n++) {
26482 for (uint32_t m = 1; m <= 4; m++) {
26483 GemmMicrokernelTester()
26484 .mr(4)
26485 .nr(8)
26486 .kr(1)
26487 .sr(1)
26488 .m(m)
26489 .n(n)
26490 .k(k)
26491 .iterations(1)
26492 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26493 }
26494 }
26495 }
26496 }
26497
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)26498 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
26499 for (uint32_t n = 9; n < 16; n++) {
26500 for (size_t k = 1; k <= 5; k += 2) {
26501 GemmMicrokernelTester()
26502 .mr(4)
26503 .nr(8)
26504 .kr(1)
26505 .sr(1)
26506 .m(4)
26507 .n(n)
26508 .k(k)
26509 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26510 }
26511 }
26512 }
26513
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)26514 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
26515 for (uint32_t n = 9; n < 16; n++) {
26516 for (size_t k = 1; k <= 5; k += 2) {
26517 GemmMicrokernelTester()
26518 .mr(4)
26519 .nr(8)
26520 .kr(1)
26521 .sr(1)
26522 .m(4)
26523 .n(n)
26524 .k(k)
26525 .cn_stride(11)
26526 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26527 }
26528 }
26529 }
26530
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_a)26531 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_a) {
26532 for (uint32_t n = 9; n < 16; n++) {
26533 for (size_t k = 1; k <= 5; k += 2) {
26534 GemmMicrokernelTester()
26535 .mr(4)
26536 .nr(8)
26537 .kr(1)
26538 .sr(1)
26539 .m(4)
26540 .n(n)
26541 .k(k)
26542 .a_stride(7)
26543 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26544 }
26545 }
26546 }
26547
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)26548 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
26549 for (uint32_t n = 9; n < 16; n++) {
26550 for (size_t k = 1; k <= 5; k += 2) {
26551 for (uint32_t m = 1; m <= 4; m++) {
26552 GemmMicrokernelTester()
26553 .mr(4)
26554 .nr(8)
26555 .kr(1)
26556 .sr(1)
26557 .m(m)
26558 .n(n)
26559 .k(k)
26560 .iterations(1)
26561 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26562 }
26563 }
26564 }
26565 }
26566
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)26567 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
26568 for (uint32_t n = 16; n <= 24; n += 8) {
26569 for (size_t k = 1; k <= 5; k += 2) {
26570 GemmMicrokernelTester()
26571 .mr(4)
26572 .nr(8)
26573 .kr(1)
26574 .sr(1)
26575 .m(4)
26576 .n(n)
26577 .k(k)
26578 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26579 }
26580 }
26581 }
26582
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)26583 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
26584 for (uint32_t n = 16; n <= 24; n += 8) {
26585 for (size_t k = 1; k <= 5; k += 2) {
26586 GemmMicrokernelTester()
26587 .mr(4)
26588 .nr(8)
26589 .kr(1)
26590 .sr(1)
26591 .m(4)
26592 .n(n)
26593 .k(k)
26594 .cn_stride(11)
26595 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26596 }
26597 }
26598 }
26599
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_a)26600 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_a) {
26601 for (uint32_t n = 16; n <= 24; n += 8) {
26602 for (size_t k = 1; k <= 5; k += 2) {
26603 GemmMicrokernelTester()
26604 .mr(4)
26605 .nr(8)
26606 .kr(1)
26607 .sr(1)
26608 .m(4)
26609 .n(n)
26610 .k(k)
26611 .a_stride(7)
26612 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26613 }
26614 }
26615 }
26616
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)26617 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
26618 for (uint32_t n = 16; n <= 24; n += 8) {
26619 for (size_t k = 1; k <= 5; k += 2) {
26620 for (uint32_t m = 1; m <= 4; m++) {
26621 GemmMicrokernelTester()
26622 .mr(4)
26623 .nr(8)
26624 .kr(1)
26625 .sr(1)
26626 .m(m)
26627 .n(n)
26628 .k(k)
26629 .iterations(1)
26630 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26631 }
26632 }
26633 }
26634 }
26635
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)26636 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
26637 for (size_t k = 1; k <= 5; k += 2) {
26638 for (uint32_t n = 1; n <= 8; n++) {
26639 for (uint32_t m = 1; m <= 4; m++) {
26640 GemmMicrokernelTester()
26641 .mr(4)
26642 .nr(8)
26643 .kr(1)
26644 .sr(1)
26645 .m(m)
26646 .n(n)
26647 .k(k)
26648 .cm_stride(11)
26649 .iterations(1)
26650 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26651 }
26652 }
26653 }
26654 }
26655
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)26656 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
26657 GemmMicrokernelTester()
26658 .mr(4)
26659 .nr(8)
26660 .kr(1)
26661 .sr(1)
26662 .m(4)
26663 .n(8)
26664 .k(1)
26665 .qmin(128)
26666 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26667 }
26668
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)26669 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
26670 GemmMicrokernelTester()
26671 .mr(4)
26672 .nr(8)
26673 .kr(1)
26674 .sr(1)
26675 .m(4)
26676 .n(8)
26677 .k(1)
26678 .qmax(128)
26679 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26680 }
26681
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)26682 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
26683 GemmMicrokernelTester()
26684 .mr(4)
26685 .nr(8)
26686 .kr(1)
26687 .sr(1)
26688 .m(4)
26689 .n(8)
26690 .k(1)
26691 .cm_stride(11)
26692 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26693 }
26694 #endif // XNN_ARCH_WASMRELAXEDSIMD
26695
26696
26697 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)26698 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
26699 GemmMicrokernelTester()
26700 .mr(4)
26701 .nr(8)
26702 .kr(1)
26703 .sr(1)
26704 .m(4)
26705 .n(8)
26706 .k(4)
26707 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26708 }
26709
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,strided_cn)26710 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
26711 GemmMicrokernelTester()
26712 .mr(4)
26713 .nr(8)
26714 .kr(1)
26715 .sr(1)
26716 .m(4)
26717 .n(8)
26718 .k(4)
26719 .cn_stride(11)
26720 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26721 }
26722
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_strided_a)26723 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_strided_a) {
26724 GemmMicrokernelTester()
26725 .mr(4)
26726 .nr(8)
26727 .kr(1)
26728 .sr(1)
26729 .m(4)
26730 .n(8)
26731 .k(4)
26732 .a_stride(7)
26733 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26734 }
26735
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)26736 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
26737 for (uint32_t n = 1; n <= 8; n++) {
26738 for (uint32_t m = 1; m <= 4; m++) {
26739 GemmMicrokernelTester()
26740 .mr(4)
26741 .nr(8)
26742 .kr(1)
26743 .sr(1)
26744 .m(m)
26745 .n(n)
26746 .k(4)
26747 .iterations(1)
26748 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26749 }
26750 }
26751 }
26752
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)26753 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
26754 for (uint32_t m = 1; m <= 4; m++) {
26755 GemmMicrokernelTester()
26756 .mr(4)
26757 .nr(8)
26758 .kr(1)
26759 .sr(1)
26760 .m(m)
26761 .n(8)
26762 .k(4)
26763 .iterations(1)
26764 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26765 }
26766 }
26767
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)26768 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
26769 for (uint32_t n = 1; n <= 8; n++) {
26770 GemmMicrokernelTester()
26771 .mr(4)
26772 .nr(8)
26773 .kr(1)
26774 .sr(1)
26775 .m(4)
26776 .n(n)
26777 .k(4)
26778 .iterations(1)
26779 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26780 }
26781 }
26782
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)26783 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
26784 for (size_t k = 1; k < 4; k++) {
26785 GemmMicrokernelTester()
26786 .mr(4)
26787 .nr(8)
26788 .kr(1)
26789 .sr(1)
26790 .m(4)
26791 .n(8)
26792 .k(k)
26793 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26794 }
26795 }
26796
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_strided_a)26797 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_strided_a) {
26798 for (size_t k = 1; k < 4; k++) {
26799 GemmMicrokernelTester()
26800 .mr(4)
26801 .nr(8)
26802 .kr(1)
26803 .sr(1)
26804 .m(4)
26805 .n(8)
26806 .k(k)
26807 .a_stride(7)
26808 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26809 }
26810 }
26811
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)26812 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
26813 for (size_t k = 1; k < 4; k++) {
26814 for (uint32_t n = 1; n <= 8; n++) {
26815 for (uint32_t m = 1; m <= 4; m++) {
26816 GemmMicrokernelTester()
26817 .mr(4)
26818 .nr(8)
26819 .kr(1)
26820 .sr(1)
26821 .m(m)
26822 .n(n)
26823 .k(k)
26824 .iterations(1)
26825 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26826 }
26827 }
26828 }
26829 }
26830
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)26831 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
26832 for (size_t k = 5; k < 8; k++) {
26833 GemmMicrokernelTester()
26834 .mr(4)
26835 .nr(8)
26836 .kr(1)
26837 .sr(1)
26838 .m(4)
26839 .n(8)
26840 .k(k)
26841 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26842 }
26843 }
26844
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_strided_a)26845 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_strided_a) {
26846 for (size_t k = 5; k < 8; k++) {
26847 GemmMicrokernelTester()
26848 .mr(4)
26849 .nr(8)
26850 .kr(1)
26851 .sr(1)
26852 .m(4)
26853 .n(8)
26854 .k(k)
26855 .a_stride(11)
26856 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26857 }
26858 }
26859
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)26860 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
26861 for (size_t k = 5; k < 8; k++) {
26862 for (uint32_t n = 1; n <= 8; n++) {
26863 for (uint32_t m = 1; m <= 4; m++) {
26864 GemmMicrokernelTester()
26865 .mr(4)
26866 .nr(8)
26867 .kr(1)
26868 .sr(1)
26869 .m(m)
26870 .n(n)
26871 .k(k)
26872 .iterations(1)
26873 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26874 }
26875 }
26876 }
26877 }
26878
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_div_4)26879 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
26880 for (size_t k = 8; k <= 40; k += 4) {
26881 GemmMicrokernelTester()
26882 .mr(4)
26883 .nr(8)
26884 .kr(1)
26885 .sr(1)
26886 .m(4)
26887 .n(8)
26888 .k(k)
26889 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26890 }
26891 }
26892
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_div_4_strided_a)26893 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_div_4_strided_a) {
26894 for (size_t k = 8; k <= 40; k += 4) {
26895 GemmMicrokernelTester()
26896 .mr(4)
26897 .nr(8)
26898 .kr(1)
26899 .sr(1)
26900 .m(4)
26901 .n(8)
26902 .k(k)
26903 .a_stride(43)
26904 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26905 }
26906 }
26907
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)26908 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
26909 for (size_t k = 8; k <= 40; k += 4) {
26910 for (uint32_t n = 1; n <= 8; n++) {
26911 for (uint32_t m = 1; m <= 4; m++) {
26912 GemmMicrokernelTester()
26913 .mr(4)
26914 .nr(8)
26915 .kr(1)
26916 .sr(1)
26917 .m(m)
26918 .n(n)
26919 .k(k)
26920 .iterations(1)
26921 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26922 }
26923 }
26924 }
26925 }
26926
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)26927 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
26928 for (uint32_t n = 9; n < 16; n++) {
26929 for (size_t k = 1; k <= 20; k += 5) {
26930 GemmMicrokernelTester()
26931 .mr(4)
26932 .nr(8)
26933 .kr(1)
26934 .sr(1)
26935 .m(4)
26936 .n(n)
26937 .k(k)
26938 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26939 }
26940 }
26941 }
26942
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)26943 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
26944 for (uint32_t n = 9; n < 16; n++) {
26945 for (size_t k = 1; k <= 20; k += 5) {
26946 GemmMicrokernelTester()
26947 .mr(4)
26948 .nr(8)
26949 .kr(1)
26950 .sr(1)
26951 .m(4)
26952 .n(n)
26953 .k(k)
26954 .cn_stride(11)
26955 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26956 }
26957 }
26958 }
26959
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_a)26960 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_a) {
26961 for (uint32_t n = 9; n < 16; n++) {
26962 for (size_t k = 1; k <= 20; k += 5) {
26963 GemmMicrokernelTester()
26964 .mr(4)
26965 .nr(8)
26966 .kr(1)
26967 .sr(1)
26968 .m(4)
26969 .n(n)
26970 .k(k)
26971 .a_stride(23)
26972 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26973 }
26974 }
26975 }
26976
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)26977 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
26978 for (uint32_t n = 9; n < 16; n++) {
26979 for (size_t k = 1; k <= 20; k += 5) {
26980 for (uint32_t m = 1; m <= 4; m++) {
26981 GemmMicrokernelTester()
26982 .mr(4)
26983 .nr(8)
26984 .kr(1)
26985 .sr(1)
26986 .m(m)
26987 .n(n)
26988 .k(k)
26989 .iterations(1)
26990 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
26991 }
26992 }
26993 }
26994 }
26995
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8)26996 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
26997 for (uint32_t n = 16; n <= 24; n += 8) {
26998 for (size_t k = 1; k <= 20; k += 5) {
26999 GemmMicrokernelTester()
27000 .mr(4)
27001 .nr(8)
27002 .kr(1)
27003 .sr(1)
27004 .m(4)
27005 .n(n)
27006 .k(k)
27007 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27008 }
27009 }
27010 }
27011
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)27012 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
27013 for (uint32_t n = 16; n <= 24; n += 8) {
27014 for (size_t k = 1; k <= 20; k += 5) {
27015 GemmMicrokernelTester()
27016 .mr(4)
27017 .nr(8)
27018 .kr(1)
27019 .sr(1)
27020 .m(4)
27021 .n(n)
27022 .k(k)
27023 .cn_stride(11)
27024 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27025 }
27026 }
27027 }
27028
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_a)27029 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_a) {
27030 for (uint32_t n = 16; n <= 24; n += 8) {
27031 for (size_t k = 1; k <= 20; k += 5) {
27032 GemmMicrokernelTester()
27033 .mr(4)
27034 .nr(8)
27035 .kr(1)
27036 .sr(1)
27037 .m(4)
27038 .n(n)
27039 .k(k)
27040 .a_stride(23)
27041 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27042 }
27043 }
27044 }
27045
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)27046 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
27047 for (uint32_t n = 16; n <= 24; n += 8) {
27048 for (size_t k = 1; k <= 20; k += 5) {
27049 for (uint32_t m = 1; m <= 4; m++) {
27050 GemmMicrokernelTester()
27051 .mr(4)
27052 .nr(8)
27053 .kr(1)
27054 .sr(1)
27055 .m(m)
27056 .n(n)
27057 .k(k)
27058 .iterations(1)
27059 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27060 }
27061 }
27062 }
27063 }
27064
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)27065 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
27066 for (size_t k = 1; k <= 20; k += 5) {
27067 for (uint32_t n = 1; n <= 8; n++) {
27068 for (uint32_t m = 1; m <= 4; m++) {
27069 GemmMicrokernelTester()
27070 .mr(4)
27071 .nr(8)
27072 .kr(1)
27073 .sr(1)
27074 .m(m)
27075 .n(n)
27076 .k(k)
27077 .cm_stride(11)
27078 .iterations(1)
27079 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27080 }
27081 }
27082 }
27083 }
27084
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,qmin)27085 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, qmin) {
27086 GemmMicrokernelTester()
27087 .mr(4)
27088 .nr(8)
27089 .kr(1)
27090 .sr(1)
27091 .m(4)
27092 .n(8)
27093 .k(4)
27094 .qmin(128)
27095 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27096 }
27097
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,qmax)27098 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, qmax) {
27099 GemmMicrokernelTester()
27100 .mr(4)
27101 .nr(8)
27102 .kr(1)
27103 .sr(1)
27104 .m(4)
27105 .n(8)
27106 .k(4)
27107 .qmax(128)
27108 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27109 }
27110
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,strided_cm)27111 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
27112 GemmMicrokernelTester()
27113 .mr(4)
27114 .nr(8)
27115 .kr(1)
27116 .sr(1)
27117 .m(4)
27118 .n(8)
27119 .k(4)
27120 .cm_stride(11)
27121 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
27122 }
27123 #endif // XNN_ARCH_WASMRELAXEDSIMD
27124
27125
27126 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)27127 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
27128 GemmMicrokernelTester()
27129 .mr(4)
27130 .nr(8)
27131 .kr(1)
27132 .sr(4)
27133 .m(4)
27134 .n(8)
27135 .k(4)
27136 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27137 }
27138
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,strided_cn)27139 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
27140 GemmMicrokernelTester()
27141 .mr(4)
27142 .nr(8)
27143 .kr(1)
27144 .sr(4)
27145 .m(4)
27146 .n(8)
27147 .k(4)
27148 .cn_stride(11)
27149 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27150 }
27151
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)27152 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
27153 GemmMicrokernelTester()
27154 .mr(4)
27155 .nr(8)
27156 .kr(1)
27157 .sr(4)
27158 .m(4)
27159 .n(8)
27160 .k(4)
27161 .a_stride(7)
27162 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27163 }
27164
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)27165 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
27166 for (uint32_t n = 1; n <= 8; n++) {
27167 for (uint32_t m = 1; m <= 4; m++) {
27168 GemmMicrokernelTester()
27169 .mr(4)
27170 .nr(8)
27171 .kr(1)
27172 .sr(4)
27173 .m(m)
27174 .n(n)
27175 .k(4)
27176 .iterations(1)
27177 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27178 }
27179 }
27180 }
27181
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)27182 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
27183 for (uint32_t m = 1; m <= 4; m++) {
27184 GemmMicrokernelTester()
27185 .mr(4)
27186 .nr(8)
27187 .kr(1)
27188 .sr(4)
27189 .m(m)
27190 .n(8)
27191 .k(4)
27192 .iterations(1)
27193 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27194 }
27195 }
27196
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)27197 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
27198 for (uint32_t n = 1; n <= 8; n++) {
27199 GemmMicrokernelTester()
27200 .mr(4)
27201 .nr(8)
27202 .kr(1)
27203 .sr(4)
27204 .m(4)
27205 .n(n)
27206 .k(4)
27207 .iterations(1)
27208 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27209 }
27210 }
27211
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)27212 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
27213 for (size_t k = 1; k < 4; k++) {
27214 GemmMicrokernelTester()
27215 .mr(4)
27216 .nr(8)
27217 .kr(1)
27218 .sr(4)
27219 .m(4)
27220 .n(8)
27221 .k(k)
27222 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27223 }
27224 }
27225
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)27226 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
27227 for (size_t k = 1; k < 4; k++) {
27228 GemmMicrokernelTester()
27229 .mr(4)
27230 .nr(8)
27231 .kr(1)
27232 .sr(4)
27233 .m(4)
27234 .n(8)
27235 .k(k)
27236 .a_stride(7)
27237 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27238 }
27239 }
27240
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)27241 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
27242 for (size_t k = 1; k < 4; k++) {
27243 for (uint32_t n = 1; n <= 8; n++) {
27244 for (uint32_t m = 1; m <= 4; m++) {
27245 GemmMicrokernelTester()
27246 .mr(4)
27247 .nr(8)
27248 .kr(1)
27249 .sr(4)
27250 .m(m)
27251 .n(n)
27252 .k(k)
27253 .iterations(1)
27254 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27255 }
27256 }
27257 }
27258 }
27259
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)27260 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
27261 for (size_t k = 5; k < 8; k++) {
27262 GemmMicrokernelTester()
27263 .mr(4)
27264 .nr(8)
27265 .kr(1)
27266 .sr(4)
27267 .m(4)
27268 .n(8)
27269 .k(k)
27270 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27271 }
27272 }
27273
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)27274 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
27275 for (size_t k = 5; k < 8; k++) {
27276 GemmMicrokernelTester()
27277 .mr(4)
27278 .nr(8)
27279 .kr(1)
27280 .sr(4)
27281 .m(4)
27282 .n(8)
27283 .k(k)
27284 .a_stride(11)
27285 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27286 }
27287 }
27288
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)27289 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
27290 for (size_t k = 5; k < 8; k++) {
27291 for (uint32_t n = 1; n <= 8; n++) {
27292 for (uint32_t m = 1; m <= 4; m++) {
27293 GemmMicrokernelTester()
27294 .mr(4)
27295 .nr(8)
27296 .kr(1)
27297 .sr(4)
27298 .m(m)
27299 .n(n)
27300 .k(k)
27301 .iterations(1)
27302 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27303 }
27304 }
27305 }
27306 }
27307
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4)27308 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
27309 for (size_t k = 8; k <= 40; k += 4) {
27310 GemmMicrokernelTester()
27311 .mr(4)
27312 .nr(8)
27313 .kr(1)
27314 .sr(4)
27315 .m(4)
27316 .n(8)
27317 .k(k)
27318 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27319 }
27320 }
27321
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)27322 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
27323 for (size_t k = 8; k <= 40; k += 4) {
27324 GemmMicrokernelTester()
27325 .mr(4)
27326 .nr(8)
27327 .kr(1)
27328 .sr(4)
27329 .m(4)
27330 .n(8)
27331 .k(k)
27332 .a_stride(43)
27333 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27334 }
27335 }
27336
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)27337 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
27338 for (size_t k = 8; k <= 40; k += 4) {
27339 for (uint32_t n = 1; n <= 8; n++) {
27340 for (uint32_t m = 1; m <= 4; m++) {
27341 GemmMicrokernelTester()
27342 .mr(4)
27343 .nr(8)
27344 .kr(1)
27345 .sr(4)
27346 .m(m)
27347 .n(n)
27348 .k(k)
27349 .iterations(1)
27350 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27351 }
27352 }
27353 }
27354 }
27355
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)27356 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
27357 for (uint32_t n = 9; n < 16; n++) {
27358 for (size_t k = 1; k <= 20; k += 5) {
27359 GemmMicrokernelTester()
27360 .mr(4)
27361 .nr(8)
27362 .kr(1)
27363 .sr(4)
27364 .m(4)
27365 .n(n)
27366 .k(k)
27367 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27368 }
27369 }
27370 }
27371
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)27372 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
27373 for (uint32_t n = 9; n < 16; n++) {
27374 for (size_t k = 1; k <= 20; k += 5) {
27375 GemmMicrokernelTester()
27376 .mr(4)
27377 .nr(8)
27378 .kr(1)
27379 .sr(4)
27380 .m(4)
27381 .n(n)
27382 .k(k)
27383 .cn_stride(11)
27384 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27385 }
27386 }
27387 }
27388
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)27389 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
27390 for (uint32_t n = 9; n < 16; n++) {
27391 for (size_t k = 1; k <= 20; k += 5) {
27392 GemmMicrokernelTester()
27393 .mr(4)
27394 .nr(8)
27395 .kr(1)
27396 .sr(4)
27397 .m(4)
27398 .n(n)
27399 .k(k)
27400 .a_stride(23)
27401 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27402 }
27403 }
27404 }
27405
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)27406 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
27407 for (uint32_t n = 9; n < 16; n++) {
27408 for (size_t k = 1; k <= 20; k += 5) {
27409 for (uint32_t m = 1; m <= 4; m++) {
27410 GemmMicrokernelTester()
27411 .mr(4)
27412 .nr(8)
27413 .kr(1)
27414 .sr(4)
27415 .m(m)
27416 .n(n)
27417 .k(k)
27418 .iterations(1)
27419 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27420 }
27421 }
27422 }
27423 }
27424
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8)27425 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
27426 for (uint32_t n = 16; n <= 24; n += 8) {
27427 for (size_t k = 1; k <= 20; k += 5) {
27428 GemmMicrokernelTester()
27429 .mr(4)
27430 .nr(8)
27431 .kr(1)
27432 .sr(4)
27433 .m(4)
27434 .n(n)
27435 .k(k)
27436 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27437 }
27438 }
27439 }
27440
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)27441 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
27442 for (uint32_t n = 16; n <= 24; n += 8) {
27443 for (size_t k = 1; k <= 20; k += 5) {
27444 GemmMicrokernelTester()
27445 .mr(4)
27446 .nr(8)
27447 .kr(1)
27448 .sr(4)
27449 .m(4)
27450 .n(n)
27451 .k(k)
27452 .cn_stride(11)
27453 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27454 }
27455 }
27456 }
27457
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)27458 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
27459 for (uint32_t n = 16; n <= 24; n += 8) {
27460 for (size_t k = 1; k <= 20; k += 5) {
27461 GemmMicrokernelTester()
27462 .mr(4)
27463 .nr(8)
27464 .kr(1)
27465 .sr(4)
27466 .m(4)
27467 .n(n)
27468 .k(k)
27469 .a_stride(23)
27470 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27471 }
27472 }
27473 }
27474
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)27475 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
27476 for (uint32_t n = 16; n <= 24; n += 8) {
27477 for (size_t k = 1; k <= 20; k += 5) {
27478 for (uint32_t m = 1; m <= 4; m++) {
27479 GemmMicrokernelTester()
27480 .mr(4)
27481 .nr(8)
27482 .kr(1)
27483 .sr(4)
27484 .m(m)
27485 .n(n)
27486 .k(k)
27487 .iterations(1)
27488 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27489 }
27490 }
27491 }
27492 }
27493
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)27494 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
27495 for (size_t k = 1; k <= 20; k += 5) {
27496 for (uint32_t n = 1; n <= 8; n++) {
27497 for (uint32_t m = 1; m <= 4; m++) {
27498 GemmMicrokernelTester()
27499 .mr(4)
27500 .nr(8)
27501 .kr(1)
27502 .sr(4)
27503 .m(m)
27504 .n(n)
27505 .k(k)
27506 .cm_stride(11)
27507 .iterations(1)
27508 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27509 }
27510 }
27511 }
27512 }
27513
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,qmin)27514 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, qmin) {
27515 GemmMicrokernelTester()
27516 .mr(4)
27517 .nr(8)
27518 .kr(1)
27519 .sr(4)
27520 .m(4)
27521 .n(8)
27522 .k(4)
27523 .qmin(128)
27524 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27525 }
27526
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,qmax)27527 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, qmax) {
27528 GemmMicrokernelTester()
27529 .mr(4)
27530 .nr(8)
27531 .kr(1)
27532 .sr(4)
27533 .m(4)
27534 .n(8)
27535 .k(4)
27536 .qmax(128)
27537 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27538 }
27539
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm)27540 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
27541 GemmMicrokernelTester()
27542 .mr(4)
27543 .nr(8)
27544 .kr(1)
27545 .sr(4)
27546 .m(4)
27547 .n(8)
27548 .k(4)
27549 .cm_stride(11)
27550 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
27551 }
27552 #endif // XNN_ARCH_WASMRELAXEDSIMD
27553
27554
27555 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)27556 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
27557 GemmMicrokernelTester()
27558 .mr(5)
27559 .nr(8)
27560 .kr(1)
27561 .sr(1)
27562 .m(5)
27563 .n(8)
27564 .k(1)
27565 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27566 }
27567
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)27568 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
27569 GemmMicrokernelTester()
27570 .mr(5)
27571 .nr(8)
27572 .kr(1)
27573 .sr(1)
27574 .m(5)
27575 .n(8)
27576 .k(1)
27577 .cn_stride(11)
27578 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27579 }
27580
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_strided_a)27581 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_strided_a) {
27582 GemmMicrokernelTester()
27583 .mr(5)
27584 .nr(8)
27585 .kr(1)
27586 .sr(1)
27587 .m(5)
27588 .n(8)
27589 .k(1)
27590 .a_stride(3)
27591 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27592 }
27593
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)27594 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
27595 for (uint32_t n = 1; n <= 8; n++) {
27596 for (uint32_t m = 1; m <= 5; m++) {
27597 GemmMicrokernelTester()
27598 .mr(5)
27599 .nr(8)
27600 .kr(1)
27601 .sr(1)
27602 .m(m)
27603 .n(n)
27604 .k(1)
27605 .iterations(1)
27606 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27607 }
27608 }
27609 }
27610
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)27611 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
27612 for (uint32_t m = 1; m <= 5; m++) {
27613 GemmMicrokernelTester()
27614 .mr(5)
27615 .nr(8)
27616 .kr(1)
27617 .sr(1)
27618 .m(m)
27619 .n(8)
27620 .k(1)
27621 .iterations(1)
27622 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27623 }
27624 }
27625
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)27626 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
27627 for (uint32_t n = 1; n <= 8; n++) {
27628 GemmMicrokernelTester()
27629 .mr(5)
27630 .nr(8)
27631 .kr(1)
27632 .sr(1)
27633 .m(5)
27634 .n(n)
27635 .k(1)
27636 .iterations(1)
27637 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27638 }
27639 }
27640
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)27641 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
27642 for (size_t k = 2; k < 10; k++) {
27643 GemmMicrokernelTester()
27644 .mr(5)
27645 .nr(8)
27646 .kr(1)
27647 .sr(1)
27648 .m(5)
27649 .n(8)
27650 .k(k)
27651 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27652 }
27653 }
27654
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_strided_a)27655 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_strided_a) {
27656 for (size_t k = 2; k < 10; k++) {
27657 GemmMicrokernelTester()
27658 .mr(5)
27659 .nr(8)
27660 .kr(1)
27661 .sr(1)
27662 .m(5)
27663 .n(8)
27664 .k(k)
27665 .a_stride(11)
27666 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27667 }
27668 }
27669
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)27670 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
27671 for (size_t k = 2; k < 10; k++) {
27672 for (uint32_t n = 1; n <= 8; n++) {
27673 for (uint32_t m = 1; m <= 5; m++) {
27674 GemmMicrokernelTester()
27675 .mr(5)
27676 .nr(8)
27677 .kr(1)
27678 .sr(1)
27679 .m(m)
27680 .n(n)
27681 .k(k)
27682 .iterations(1)
27683 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27684 }
27685 }
27686 }
27687 }
27688
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)27689 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
27690 for (uint32_t n = 9; n < 16; n++) {
27691 for (size_t k = 1; k <= 5; k += 2) {
27692 GemmMicrokernelTester()
27693 .mr(5)
27694 .nr(8)
27695 .kr(1)
27696 .sr(1)
27697 .m(5)
27698 .n(n)
27699 .k(k)
27700 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27701 }
27702 }
27703 }
27704
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)27705 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
27706 for (uint32_t n = 9; n < 16; n++) {
27707 for (size_t k = 1; k <= 5; k += 2) {
27708 GemmMicrokernelTester()
27709 .mr(5)
27710 .nr(8)
27711 .kr(1)
27712 .sr(1)
27713 .m(5)
27714 .n(n)
27715 .k(k)
27716 .cn_stride(11)
27717 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27718 }
27719 }
27720 }
27721
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_a)27722 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_a) {
27723 for (uint32_t n = 9; n < 16; n++) {
27724 for (size_t k = 1; k <= 5; k += 2) {
27725 GemmMicrokernelTester()
27726 .mr(5)
27727 .nr(8)
27728 .kr(1)
27729 .sr(1)
27730 .m(5)
27731 .n(n)
27732 .k(k)
27733 .a_stride(7)
27734 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27735 }
27736 }
27737 }
27738
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)27739 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
27740 for (uint32_t n = 9; n < 16; n++) {
27741 for (size_t k = 1; k <= 5; k += 2) {
27742 for (uint32_t m = 1; m <= 5; m++) {
27743 GemmMicrokernelTester()
27744 .mr(5)
27745 .nr(8)
27746 .kr(1)
27747 .sr(1)
27748 .m(m)
27749 .n(n)
27750 .k(k)
27751 .iterations(1)
27752 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27753 }
27754 }
27755 }
27756 }
27757
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)27758 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
27759 for (uint32_t n = 16; n <= 24; n += 8) {
27760 for (size_t k = 1; k <= 5; k += 2) {
27761 GemmMicrokernelTester()
27762 .mr(5)
27763 .nr(8)
27764 .kr(1)
27765 .sr(1)
27766 .m(5)
27767 .n(n)
27768 .k(k)
27769 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27770 }
27771 }
27772 }
27773
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)27774 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
27775 for (uint32_t n = 16; n <= 24; n += 8) {
27776 for (size_t k = 1; k <= 5; k += 2) {
27777 GemmMicrokernelTester()
27778 .mr(5)
27779 .nr(8)
27780 .kr(1)
27781 .sr(1)
27782 .m(5)
27783 .n(n)
27784 .k(k)
27785 .cn_stride(11)
27786 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27787 }
27788 }
27789 }
27790
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_a)27791 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_a) {
27792 for (uint32_t n = 16; n <= 24; n += 8) {
27793 for (size_t k = 1; k <= 5; k += 2) {
27794 GemmMicrokernelTester()
27795 .mr(5)
27796 .nr(8)
27797 .kr(1)
27798 .sr(1)
27799 .m(5)
27800 .n(n)
27801 .k(k)
27802 .a_stride(7)
27803 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27804 }
27805 }
27806 }
27807
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)27808 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
27809 for (uint32_t n = 16; n <= 24; n += 8) {
27810 for (size_t k = 1; k <= 5; k += 2) {
27811 for (uint32_t m = 1; m <= 5; m++) {
27812 GemmMicrokernelTester()
27813 .mr(5)
27814 .nr(8)
27815 .kr(1)
27816 .sr(1)
27817 .m(m)
27818 .n(n)
27819 .k(k)
27820 .iterations(1)
27821 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27822 }
27823 }
27824 }
27825 }
27826
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)27827 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
27828 for (size_t k = 1; k <= 5; k += 2) {
27829 for (uint32_t n = 1; n <= 8; n++) {
27830 for (uint32_t m = 1; m <= 5; m++) {
27831 GemmMicrokernelTester()
27832 .mr(5)
27833 .nr(8)
27834 .kr(1)
27835 .sr(1)
27836 .m(m)
27837 .n(n)
27838 .k(k)
27839 .cm_stride(11)
27840 .iterations(1)
27841 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27842 }
27843 }
27844 }
27845 }
27846
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)27847 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
27848 GemmMicrokernelTester()
27849 .mr(5)
27850 .nr(8)
27851 .kr(1)
27852 .sr(1)
27853 .m(5)
27854 .n(8)
27855 .k(1)
27856 .qmin(128)
27857 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27858 }
27859
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)27860 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
27861 GemmMicrokernelTester()
27862 .mr(5)
27863 .nr(8)
27864 .kr(1)
27865 .sr(1)
27866 .m(5)
27867 .n(8)
27868 .k(1)
27869 .qmax(128)
27870 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27871 }
27872
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)27873 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
27874 GemmMicrokernelTester()
27875 .mr(5)
27876 .nr(8)
27877 .kr(1)
27878 .sr(1)
27879 .m(5)
27880 .n(8)
27881 .k(1)
27882 .cm_stride(11)
27883 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27884 }
27885 #endif // XNN_ARCH_WASMRELAXEDSIMD
27886
27887
27888 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1)27889 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1) {
27890 GemmMicrokernelTester()
27891 .mr(6)
27892 .nr(8)
27893 .kr(1)
27894 .sr(1)
27895 .m(6)
27896 .n(8)
27897 .k(1)
27898 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27899 }
27900
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cn)27901 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cn) {
27902 GemmMicrokernelTester()
27903 .mr(6)
27904 .nr(8)
27905 .kr(1)
27906 .sr(1)
27907 .m(6)
27908 .n(8)
27909 .k(1)
27910 .cn_stride(11)
27911 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27912 }
27913
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_strided_a)27914 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_strided_a) {
27915 GemmMicrokernelTester()
27916 .mr(6)
27917 .nr(8)
27918 .kr(1)
27919 .sr(1)
27920 .m(6)
27921 .n(8)
27922 .k(1)
27923 .a_stride(3)
27924 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27925 }
27926
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile)27927 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile) {
27928 for (uint32_t n = 1; n <= 8; n++) {
27929 for (uint32_t m = 1; m <= 6; m++) {
27930 GemmMicrokernelTester()
27931 .mr(6)
27932 .nr(8)
27933 .kr(1)
27934 .sr(1)
27935 .m(m)
27936 .n(n)
27937 .k(1)
27938 .iterations(1)
27939 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27940 }
27941 }
27942 }
27943
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_m)27944 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_m) {
27945 for (uint32_t m = 1; m <= 6; m++) {
27946 GemmMicrokernelTester()
27947 .mr(6)
27948 .nr(8)
27949 .kr(1)
27950 .sr(1)
27951 .m(m)
27952 .n(8)
27953 .k(1)
27954 .iterations(1)
27955 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27956 }
27957 }
27958
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_eq_1_subtile_n)27959 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_eq_1_subtile_n) {
27960 for (uint32_t n = 1; n <= 8; n++) {
27961 GemmMicrokernelTester()
27962 .mr(6)
27963 .nr(8)
27964 .kr(1)
27965 .sr(1)
27966 .m(6)
27967 .n(n)
27968 .k(1)
27969 .iterations(1)
27970 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27971 }
27972 }
27973
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1)27974 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1) {
27975 for (size_t k = 2; k < 10; k++) {
27976 GemmMicrokernelTester()
27977 .mr(6)
27978 .nr(8)
27979 .kr(1)
27980 .sr(1)
27981 .m(6)
27982 .n(8)
27983 .k(k)
27984 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27985 }
27986 }
27987
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_strided_a)27988 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_strided_a) {
27989 for (size_t k = 2; k < 10; k++) {
27990 GemmMicrokernelTester()
27991 .mr(6)
27992 .nr(8)
27993 .kr(1)
27994 .sr(1)
27995 .m(6)
27996 .n(8)
27997 .k(k)
27998 .a_stride(11)
27999 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28000 }
28001 }
28002
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,k_gt_1_subtile)28003 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, k_gt_1_subtile) {
28004 for (size_t k = 2; k < 10; k++) {
28005 for (uint32_t n = 1; n <= 8; n++) {
28006 for (uint32_t m = 1; m <= 6; m++) {
28007 GemmMicrokernelTester()
28008 .mr(6)
28009 .nr(8)
28010 .kr(1)
28011 .sr(1)
28012 .m(m)
28013 .n(n)
28014 .k(k)
28015 .iterations(1)
28016 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28017 }
28018 }
28019 }
28020 }
28021
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8)28022 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8) {
28023 for (uint32_t n = 9; n < 16; n++) {
28024 for (size_t k = 1; k <= 5; k += 2) {
28025 GemmMicrokernelTester()
28026 .mr(6)
28027 .nr(8)
28028 .kr(1)
28029 .sr(1)
28030 .m(6)
28031 .n(n)
28032 .k(k)
28033 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28034 }
28035 }
28036 }
28037
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_cn)28038 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_cn) {
28039 for (uint32_t n = 9; n < 16; n++) {
28040 for (size_t k = 1; k <= 5; k += 2) {
28041 GemmMicrokernelTester()
28042 .mr(6)
28043 .nr(8)
28044 .kr(1)
28045 .sr(1)
28046 .m(6)
28047 .n(n)
28048 .k(k)
28049 .cn_stride(11)
28050 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28051 }
28052 }
28053 }
28054
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_strided_a)28055 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_strided_a) {
28056 for (uint32_t n = 9; n < 16; n++) {
28057 for (size_t k = 1; k <= 5; k += 2) {
28058 GemmMicrokernelTester()
28059 .mr(6)
28060 .nr(8)
28061 .kr(1)
28062 .sr(1)
28063 .m(6)
28064 .n(n)
28065 .k(k)
28066 .a_stride(7)
28067 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28068 }
28069 }
28070 }
28071
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_gt_8_subtile)28072 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_gt_8_subtile) {
28073 for (uint32_t n = 9; n < 16; n++) {
28074 for (size_t k = 1; k <= 5; k += 2) {
28075 for (uint32_t m = 1; m <= 6; m++) {
28076 GemmMicrokernelTester()
28077 .mr(6)
28078 .nr(8)
28079 .kr(1)
28080 .sr(1)
28081 .m(m)
28082 .n(n)
28083 .k(k)
28084 .iterations(1)
28085 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28086 }
28087 }
28088 }
28089 }
28090
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8)28091 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8) {
28092 for (uint32_t n = 16; n <= 24; n += 8) {
28093 for (size_t k = 1; k <= 5; k += 2) {
28094 GemmMicrokernelTester()
28095 .mr(6)
28096 .nr(8)
28097 .kr(1)
28098 .sr(1)
28099 .m(6)
28100 .n(n)
28101 .k(k)
28102 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28103 }
28104 }
28105 }
28106
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_cn)28107 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_cn) {
28108 for (uint32_t n = 16; n <= 24; n += 8) {
28109 for (size_t k = 1; k <= 5; k += 2) {
28110 GemmMicrokernelTester()
28111 .mr(6)
28112 .nr(8)
28113 .kr(1)
28114 .sr(1)
28115 .m(6)
28116 .n(n)
28117 .k(k)
28118 .cn_stride(11)
28119 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28120 }
28121 }
28122 }
28123
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_strided_a)28124 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_strided_a) {
28125 for (uint32_t n = 16; n <= 24; n += 8) {
28126 for (size_t k = 1; k <= 5; k += 2) {
28127 GemmMicrokernelTester()
28128 .mr(6)
28129 .nr(8)
28130 .kr(1)
28131 .sr(1)
28132 .m(6)
28133 .n(n)
28134 .k(k)
28135 .a_stride(7)
28136 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28137 }
28138 }
28139 }
28140
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,n_div_8_subtile)28141 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, n_div_8_subtile) {
28142 for (uint32_t n = 16; n <= 24; n += 8) {
28143 for (size_t k = 1; k <= 5; k += 2) {
28144 for (uint32_t m = 1; m <= 6; m++) {
28145 GemmMicrokernelTester()
28146 .mr(6)
28147 .nr(8)
28148 .kr(1)
28149 .sr(1)
28150 .m(m)
28151 .n(n)
28152 .k(k)
28153 .iterations(1)
28154 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28155 }
28156 }
28157 }
28158 }
28159
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm_subtile)28160 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm_subtile) {
28161 for (size_t k = 1; k <= 5; k += 2) {
28162 for (uint32_t n = 1; n <= 8; n++) {
28163 for (uint32_t m = 1; m <= 6; m++) {
28164 GemmMicrokernelTester()
28165 .mr(6)
28166 .nr(8)
28167 .kr(1)
28168 .sr(1)
28169 .m(m)
28170 .n(n)
28171 .k(k)
28172 .cm_stride(11)
28173 .iterations(1)
28174 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28175 }
28176 }
28177 }
28178 }
28179
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,qmin)28180 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, qmin) {
28181 GemmMicrokernelTester()
28182 .mr(6)
28183 .nr(8)
28184 .kr(1)
28185 .sr(1)
28186 .m(6)
28187 .n(8)
28188 .k(1)
28189 .qmin(128)
28190 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28191 }
28192
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,qmax)28193 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, qmax) {
28194 GemmMicrokernelTester()
28195 .mr(6)
28196 .nr(8)
28197 .kr(1)
28198 .sr(1)
28199 .m(6)
28200 .n(8)
28201 .k(1)
28202 .qmax(128)
28203 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28204 }
28205
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT,strided_cm)28206 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_LOADSPLAT, strided_cm) {
28207 GemmMicrokernelTester()
28208 .mr(6)
28209 .nr(8)
28210 .kr(1)
28211 .sr(1)
28212 .m(6)
28213 .n(8)
28214 .k(1)
28215 .cm_stride(11)
28216 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28217 }
28218 #endif // XNN_ARCH_WASMRELAXEDSIMD
28219
28220
28221 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_eq_1)28222 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1) {
28223 GemmMicrokernelTester()
28224 .mr(2)
28225 .nr(4)
28226 .kr(1)
28227 .sr(1)
28228 .m(2)
28229 .n(4)
28230 .k(1)
28231 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28232 }
28233
TEST(F32_GEMMINC_MINMAX_2X4__WASM,strided_cn)28234 TEST(F32_GEMMINC_MINMAX_2X4__WASM, strided_cn) {
28235 GemmMicrokernelTester()
28236 .mr(2)
28237 .nr(4)
28238 .kr(1)
28239 .sr(1)
28240 .m(2)
28241 .n(4)
28242 .k(1)
28243 .cn_stride(7)
28244 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28245 }
28246
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_eq_1_strided_a)28247 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_strided_a) {
28248 GemmMicrokernelTester()
28249 .mr(2)
28250 .nr(4)
28251 .kr(1)
28252 .sr(1)
28253 .m(2)
28254 .n(4)
28255 .k(1)
28256 .a_stride(3)
28257 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28258 }
28259
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_eq_1_subtile)28260 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_subtile) {
28261 for (uint32_t n = 1; n <= 4; n++) {
28262 for (uint32_t m = 1; m <= 2; m++) {
28263 GemmMicrokernelTester()
28264 .mr(2)
28265 .nr(4)
28266 .kr(1)
28267 .sr(1)
28268 .m(m)
28269 .n(n)
28270 .k(1)
28271 .iterations(1)
28272 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28273 }
28274 }
28275 }
28276
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_eq_1_subtile_m)28277 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_subtile_m) {
28278 for (uint32_t m = 1; m <= 2; m++) {
28279 GemmMicrokernelTester()
28280 .mr(2)
28281 .nr(4)
28282 .kr(1)
28283 .sr(1)
28284 .m(m)
28285 .n(4)
28286 .k(1)
28287 .iterations(1)
28288 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28289 }
28290 }
28291
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_eq_1_subtile_n)28292 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_subtile_n) {
28293 for (uint32_t n = 1; n <= 4; n++) {
28294 GemmMicrokernelTester()
28295 .mr(2)
28296 .nr(4)
28297 .kr(1)
28298 .sr(1)
28299 .m(2)
28300 .n(n)
28301 .k(1)
28302 .iterations(1)
28303 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28304 }
28305 }
28306
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_gt_1)28307 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_gt_1) {
28308 for (size_t k = 2; k < 10; k++) {
28309 GemmMicrokernelTester()
28310 .mr(2)
28311 .nr(4)
28312 .kr(1)
28313 .sr(1)
28314 .m(2)
28315 .n(4)
28316 .k(k)
28317 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28318 }
28319 }
28320
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_gt_1_strided_a)28321 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_gt_1_strided_a) {
28322 for (size_t k = 2; k < 10; k++) {
28323 GemmMicrokernelTester()
28324 .mr(2)
28325 .nr(4)
28326 .kr(1)
28327 .sr(1)
28328 .m(2)
28329 .n(4)
28330 .k(k)
28331 .a_stride(11)
28332 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28333 }
28334 }
28335
TEST(F32_GEMMINC_MINMAX_2X4__WASM,k_gt_1_subtile)28336 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_gt_1_subtile) {
28337 for (size_t k = 2; k < 10; k++) {
28338 for (uint32_t n = 1; n <= 4; n++) {
28339 for (uint32_t m = 1; m <= 2; m++) {
28340 GemmMicrokernelTester()
28341 .mr(2)
28342 .nr(4)
28343 .kr(1)
28344 .sr(1)
28345 .m(m)
28346 .n(n)
28347 .k(k)
28348 .iterations(1)
28349 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28350 }
28351 }
28352 }
28353 }
28354
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_gt_4)28355 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4) {
28356 for (uint32_t n = 5; n < 8; n++) {
28357 for (size_t k = 1; k <= 5; k += 2) {
28358 GemmMicrokernelTester()
28359 .mr(2)
28360 .nr(4)
28361 .kr(1)
28362 .sr(1)
28363 .m(2)
28364 .n(n)
28365 .k(k)
28366 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28367 }
28368 }
28369 }
28370
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_gt_4_strided_cn)28371 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4_strided_cn) {
28372 for (uint32_t n = 5; n < 8; n++) {
28373 for (size_t k = 1; k <= 5; k += 2) {
28374 GemmMicrokernelTester()
28375 .mr(2)
28376 .nr(4)
28377 .kr(1)
28378 .sr(1)
28379 .m(2)
28380 .n(n)
28381 .k(k)
28382 .cn_stride(7)
28383 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28384 }
28385 }
28386 }
28387
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_gt_4_strided_a)28388 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4_strided_a) {
28389 for (uint32_t n = 5; n < 8; n++) {
28390 for (size_t k = 1; k <= 5; k += 2) {
28391 GemmMicrokernelTester()
28392 .mr(2)
28393 .nr(4)
28394 .kr(1)
28395 .sr(1)
28396 .m(2)
28397 .n(n)
28398 .k(k)
28399 .a_stride(7)
28400 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28401 }
28402 }
28403 }
28404
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_gt_4_subtile)28405 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4_subtile) {
28406 for (uint32_t n = 5; n < 8; n++) {
28407 for (size_t k = 1; k <= 5; k += 2) {
28408 for (uint32_t m = 1; m <= 2; m++) {
28409 GemmMicrokernelTester()
28410 .mr(2)
28411 .nr(4)
28412 .kr(1)
28413 .sr(1)
28414 .m(m)
28415 .n(n)
28416 .k(k)
28417 .iterations(1)
28418 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28419 }
28420 }
28421 }
28422 }
28423
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_div_4)28424 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4) {
28425 for (uint32_t n = 8; n <= 12; n += 4) {
28426 for (size_t k = 1; k <= 5; k += 2) {
28427 GemmMicrokernelTester()
28428 .mr(2)
28429 .nr(4)
28430 .kr(1)
28431 .sr(1)
28432 .m(2)
28433 .n(n)
28434 .k(k)
28435 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28436 }
28437 }
28438 }
28439
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_div_4_strided_cn)28440 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4_strided_cn) {
28441 for (uint32_t n = 8; n <= 12; n += 4) {
28442 for (size_t k = 1; k <= 5; k += 2) {
28443 GemmMicrokernelTester()
28444 .mr(2)
28445 .nr(4)
28446 .kr(1)
28447 .sr(1)
28448 .m(2)
28449 .n(n)
28450 .k(k)
28451 .cn_stride(7)
28452 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28453 }
28454 }
28455 }
28456
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_div_4_strided_a)28457 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4_strided_a) {
28458 for (uint32_t n = 8; n <= 12; n += 4) {
28459 for (size_t k = 1; k <= 5; k += 2) {
28460 GemmMicrokernelTester()
28461 .mr(2)
28462 .nr(4)
28463 .kr(1)
28464 .sr(1)
28465 .m(2)
28466 .n(n)
28467 .k(k)
28468 .a_stride(7)
28469 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28470 }
28471 }
28472 }
28473
TEST(F32_GEMMINC_MINMAX_2X4__WASM,n_div_4_subtile)28474 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4_subtile) {
28475 for (uint32_t n = 8; n <= 12; n += 4) {
28476 for (size_t k = 1; k <= 5; k += 2) {
28477 for (uint32_t m = 1; m <= 2; m++) {
28478 GemmMicrokernelTester()
28479 .mr(2)
28480 .nr(4)
28481 .kr(1)
28482 .sr(1)
28483 .m(m)
28484 .n(n)
28485 .k(k)
28486 .iterations(1)
28487 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28488 }
28489 }
28490 }
28491 }
28492
TEST(F32_GEMMINC_MINMAX_2X4__WASM,strided_cm_subtile)28493 TEST(F32_GEMMINC_MINMAX_2X4__WASM, strided_cm_subtile) {
28494 for (size_t k = 1; k <= 5; k += 2) {
28495 for (uint32_t n = 1; n <= 4; n++) {
28496 for (uint32_t m = 1; m <= 2; m++) {
28497 GemmMicrokernelTester()
28498 .mr(2)
28499 .nr(4)
28500 .kr(1)
28501 .sr(1)
28502 .m(m)
28503 .n(n)
28504 .k(k)
28505 .cm_stride(7)
28506 .iterations(1)
28507 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28508 }
28509 }
28510 }
28511 }
28512
TEST(F32_GEMMINC_MINMAX_2X4__WASM,qmin)28513 TEST(F32_GEMMINC_MINMAX_2X4__WASM, qmin) {
28514 GemmMicrokernelTester()
28515 .mr(2)
28516 .nr(4)
28517 .kr(1)
28518 .sr(1)
28519 .m(2)
28520 .n(4)
28521 .k(1)
28522 .qmin(128)
28523 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28524 }
28525
TEST(F32_GEMMINC_MINMAX_2X4__WASM,qmax)28526 TEST(F32_GEMMINC_MINMAX_2X4__WASM, qmax) {
28527 GemmMicrokernelTester()
28528 .mr(2)
28529 .nr(4)
28530 .kr(1)
28531 .sr(1)
28532 .m(2)
28533 .n(4)
28534 .k(1)
28535 .qmax(128)
28536 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28537 }
28538
TEST(F32_GEMMINC_MINMAX_2X4__WASM,strided_cm)28539 TEST(F32_GEMMINC_MINMAX_2X4__WASM, strided_cm) {
28540 GemmMicrokernelTester()
28541 .mr(2)
28542 .nr(4)
28543 .kr(1)
28544 .sr(1)
28545 .m(2)
28546 .n(4)
28547 .k(1)
28548 .cm_stride(7)
28549 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
28550 }
28551 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28552
28553
28554 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_eq_1)28555 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1) {
28556 GemmMicrokernelTester()
28557 .mr(4)
28558 .nr(4)
28559 .kr(1)
28560 .sr(1)
28561 .m(4)
28562 .n(4)
28563 .k(1)
28564 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28565 }
28566
TEST(F32_GEMMINC_MINMAX_4X4__WASM,strided_cn)28567 TEST(F32_GEMMINC_MINMAX_4X4__WASM, strided_cn) {
28568 GemmMicrokernelTester()
28569 .mr(4)
28570 .nr(4)
28571 .kr(1)
28572 .sr(1)
28573 .m(4)
28574 .n(4)
28575 .k(1)
28576 .cn_stride(7)
28577 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28578 }
28579
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_eq_1_strided_a)28580 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_strided_a) {
28581 GemmMicrokernelTester()
28582 .mr(4)
28583 .nr(4)
28584 .kr(1)
28585 .sr(1)
28586 .m(4)
28587 .n(4)
28588 .k(1)
28589 .a_stride(3)
28590 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28591 }
28592
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_eq_1_subtile)28593 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_subtile) {
28594 for (uint32_t n = 1; n <= 4; n++) {
28595 for (uint32_t m = 1; m <= 4; m++) {
28596 GemmMicrokernelTester()
28597 .mr(4)
28598 .nr(4)
28599 .kr(1)
28600 .sr(1)
28601 .m(m)
28602 .n(n)
28603 .k(1)
28604 .iterations(1)
28605 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28606 }
28607 }
28608 }
28609
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_eq_1_subtile_m)28610 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_subtile_m) {
28611 for (uint32_t m = 1; m <= 4; m++) {
28612 GemmMicrokernelTester()
28613 .mr(4)
28614 .nr(4)
28615 .kr(1)
28616 .sr(1)
28617 .m(m)
28618 .n(4)
28619 .k(1)
28620 .iterations(1)
28621 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28622 }
28623 }
28624
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_eq_1_subtile_n)28625 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_subtile_n) {
28626 for (uint32_t n = 1; n <= 4; n++) {
28627 GemmMicrokernelTester()
28628 .mr(4)
28629 .nr(4)
28630 .kr(1)
28631 .sr(1)
28632 .m(4)
28633 .n(n)
28634 .k(1)
28635 .iterations(1)
28636 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28637 }
28638 }
28639
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_gt_1)28640 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_gt_1) {
28641 for (size_t k = 2; k < 10; k++) {
28642 GemmMicrokernelTester()
28643 .mr(4)
28644 .nr(4)
28645 .kr(1)
28646 .sr(1)
28647 .m(4)
28648 .n(4)
28649 .k(k)
28650 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28651 }
28652 }
28653
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_gt_1_strided_a)28654 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_gt_1_strided_a) {
28655 for (size_t k = 2; k < 10; k++) {
28656 GemmMicrokernelTester()
28657 .mr(4)
28658 .nr(4)
28659 .kr(1)
28660 .sr(1)
28661 .m(4)
28662 .n(4)
28663 .k(k)
28664 .a_stride(11)
28665 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28666 }
28667 }
28668
TEST(F32_GEMMINC_MINMAX_4X4__WASM,k_gt_1_subtile)28669 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_gt_1_subtile) {
28670 for (size_t k = 2; k < 10; k++) {
28671 for (uint32_t n = 1; n <= 4; n++) {
28672 for (uint32_t m = 1; m <= 4; m++) {
28673 GemmMicrokernelTester()
28674 .mr(4)
28675 .nr(4)
28676 .kr(1)
28677 .sr(1)
28678 .m(m)
28679 .n(n)
28680 .k(k)
28681 .iterations(1)
28682 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28683 }
28684 }
28685 }
28686 }
28687
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_gt_4)28688 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4) {
28689 for (uint32_t n = 5; n < 8; n++) {
28690 for (size_t k = 1; k <= 5; k += 2) {
28691 GemmMicrokernelTester()
28692 .mr(4)
28693 .nr(4)
28694 .kr(1)
28695 .sr(1)
28696 .m(4)
28697 .n(n)
28698 .k(k)
28699 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28700 }
28701 }
28702 }
28703
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_gt_4_strided_cn)28704 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4_strided_cn) {
28705 for (uint32_t n = 5; n < 8; n++) {
28706 for (size_t k = 1; k <= 5; k += 2) {
28707 GemmMicrokernelTester()
28708 .mr(4)
28709 .nr(4)
28710 .kr(1)
28711 .sr(1)
28712 .m(4)
28713 .n(n)
28714 .k(k)
28715 .cn_stride(7)
28716 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28717 }
28718 }
28719 }
28720
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_gt_4_strided_a)28721 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4_strided_a) {
28722 for (uint32_t n = 5; n < 8; n++) {
28723 for (size_t k = 1; k <= 5; k += 2) {
28724 GemmMicrokernelTester()
28725 .mr(4)
28726 .nr(4)
28727 .kr(1)
28728 .sr(1)
28729 .m(4)
28730 .n(n)
28731 .k(k)
28732 .a_stride(7)
28733 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28734 }
28735 }
28736 }
28737
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_gt_4_subtile)28738 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4_subtile) {
28739 for (uint32_t n = 5; n < 8; n++) {
28740 for (size_t k = 1; k <= 5; k += 2) {
28741 for (uint32_t m = 1; m <= 4; m++) {
28742 GemmMicrokernelTester()
28743 .mr(4)
28744 .nr(4)
28745 .kr(1)
28746 .sr(1)
28747 .m(m)
28748 .n(n)
28749 .k(k)
28750 .iterations(1)
28751 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28752 }
28753 }
28754 }
28755 }
28756
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_div_4)28757 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4) {
28758 for (uint32_t n = 8; n <= 12; n += 4) {
28759 for (size_t k = 1; k <= 5; k += 2) {
28760 GemmMicrokernelTester()
28761 .mr(4)
28762 .nr(4)
28763 .kr(1)
28764 .sr(1)
28765 .m(4)
28766 .n(n)
28767 .k(k)
28768 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28769 }
28770 }
28771 }
28772
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_div_4_strided_cn)28773 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4_strided_cn) {
28774 for (uint32_t n = 8; n <= 12; n += 4) {
28775 for (size_t k = 1; k <= 5; k += 2) {
28776 GemmMicrokernelTester()
28777 .mr(4)
28778 .nr(4)
28779 .kr(1)
28780 .sr(1)
28781 .m(4)
28782 .n(n)
28783 .k(k)
28784 .cn_stride(7)
28785 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28786 }
28787 }
28788 }
28789
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_div_4_strided_a)28790 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4_strided_a) {
28791 for (uint32_t n = 8; n <= 12; n += 4) {
28792 for (size_t k = 1; k <= 5; k += 2) {
28793 GemmMicrokernelTester()
28794 .mr(4)
28795 .nr(4)
28796 .kr(1)
28797 .sr(1)
28798 .m(4)
28799 .n(n)
28800 .k(k)
28801 .a_stride(7)
28802 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28803 }
28804 }
28805 }
28806
TEST(F32_GEMMINC_MINMAX_4X4__WASM,n_div_4_subtile)28807 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4_subtile) {
28808 for (uint32_t n = 8; n <= 12; n += 4) {
28809 for (size_t k = 1; k <= 5; k += 2) {
28810 for (uint32_t m = 1; m <= 4; m++) {
28811 GemmMicrokernelTester()
28812 .mr(4)
28813 .nr(4)
28814 .kr(1)
28815 .sr(1)
28816 .m(m)
28817 .n(n)
28818 .k(k)
28819 .iterations(1)
28820 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28821 }
28822 }
28823 }
28824 }
28825
TEST(F32_GEMMINC_MINMAX_4X4__WASM,strided_cm_subtile)28826 TEST(F32_GEMMINC_MINMAX_4X4__WASM, strided_cm_subtile) {
28827 for (size_t k = 1; k <= 5; k += 2) {
28828 for (uint32_t n = 1; n <= 4; n++) {
28829 for (uint32_t m = 1; m <= 4; m++) {
28830 GemmMicrokernelTester()
28831 .mr(4)
28832 .nr(4)
28833 .kr(1)
28834 .sr(1)
28835 .m(m)
28836 .n(n)
28837 .k(k)
28838 .cm_stride(7)
28839 .iterations(1)
28840 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28841 }
28842 }
28843 }
28844 }
28845
TEST(F32_GEMMINC_MINMAX_4X4__WASM,qmin)28846 TEST(F32_GEMMINC_MINMAX_4X4__WASM, qmin) {
28847 GemmMicrokernelTester()
28848 .mr(4)
28849 .nr(4)
28850 .kr(1)
28851 .sr(1)
28852 .m(4)
28853 .n(4)
28854 .k(1)
28855 .qmin(128)
28856 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28857 }
28858
TEST(F32_GEMMINC_MINMAX_4X4__WASM,qmax)28859 TEST(F32_GEMMINC_MINMAX_4X4__WASM, qmax) {
28860 GemmMicrokernelTester()
28861 .mr(4)
28862 .nr(4)
28863 .kr(1)
28864 .sr(1)
28865 .m(4)
28866 .n(4)
28867 .k(1)
28868 .qmax(128)
28869 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28870 }
28871
TEST(F32_GEMMINC_MINMAX_4X4__WASM,strided_cm)28872 TEST(F32_GEMMINC_MINMAX_4X4__WASM, strided_cm) {
28873 GemmMicrokernelTester()
28874 .mr(4)
28875 .nr(4)
28876 .kr(1)
28877 .sr(1)
28878 .m(4)
28879 .n(4)
28880 .k(1)
28881 .cm_stride(7)
28882 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
28883 }
28884 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28885
28886
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_eq_1)28887 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1) {
28888 GemmMicrokernelTester()
28889 .mr(2)
28890 .nr(4)
28891 .kr(1)
28892 .sr(1)
28893 .m(2)
28894 .n(4)
28895 .k(1)
28896 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28897 }
28898
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,strided_cn)28899 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, strided_cn) {
28900 GemmMicrokernelTester()
28901 .mr(2)
28902 .nr(4)
28903 .kr(1)
28904 .sr(1)
28905 .m(2)
28906 .n(4)
28907 .k(1)
28908 .cn_stride(7)
28909 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28910 }
28911
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_eq_1_strided_a)28912 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
28913 GemmMicrokernelTester()
28914 .mr(2)
28915 .nr(4)
28916 .kr(1)
28917 .sr(1)
28918 .m(2)
28919 .n(4)
28920 .k(1)
28921 .a_stride(3)
28922 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28923 }
28924
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_eq_1_subtile)28925 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
28926 for (uint32_t n = 1; n <= 4; n++) {
28927 for (uint32_t m = 1; m <= 2; m++) {
28928 GemmMicrokernelTester()
28929 .mr(2)
28930 .nr(4)
28931 .kr(1)
28932 .sr(1)
28933 .m(m)
28934 .n(n)
28935 .k(1)
28936 .iterations(1)
28937 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28938 }
28939 }
28940 }
28941
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_eq_1_subtile_m)28942 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
28943 for (uint32_t m = 1; m <= 2; m++) {
28944 GemmMicrokernelTester()
28945 .mr(2)
28946 .nr(4)
28947 .kr(1)
28948 .sr(1)
28949 .m(m)
28950 .n(4)
28951 .k(1)
28952 .iterations(1)
28953 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28954 }
28955 }
28956
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_eq_1_subtile_n)28957 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
28958 for (uint32_t n = 1; n <= 4; n++) {
28959 GemmMicrokernelTester()
28960 .mr(2)
28961 .nr(4)
28962 .kr(1)
28963 .sr(1)
28964 .m(2)
28965 .n(n)
28966 .k(1)
28967 .iterations(1)
28968 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28969 }
28970 }
28971
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_gt_1)28972 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_gt_1) {
28973 for (size_t k = 2; k < 10; k++) {
28974 GemmMicrokernelTester()
28975 .mr(2)
28976 .nr(4)
28977 .kr(1)
28978 .sr(1)
28979 .m(2)
28980 .n(4)
28981 .k(k)
28982 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28983 }
28984 }
28985
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_gt_1_strided_a)28986 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_gt_1_strided_a) {
28987 for (size_t k = 2; k < 10; k++) {
28988 GemmMicrokernelTester()
28989 .mr(2)
28990 .nr(4)
28991 .kr(1)
28992 .sr(1)
28993 .m(2)
28994 .n(4)
28995 .k(k)
28996 .a_stride(11)
28997 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
28998 }
28999 }
29000
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,k_gt_1_subtile)29001 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
29002 for (size_t k = 2; k < 10; k++) {
29003 for (uint32_t n = 1; n <= 4; n++) {
29004 for (uint32_t m = 1; m <= 2; m++) {
29005 GemmMicrokernelTester()
29006 .mr(2)
29007 .nr(4)
29008 .kr(1)
29009 .sr(1)
29010 .m(m)
29011 .n(n)
29012 .k(k)
29013 .iterations(1)
29014 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29015 }
29016 }
29017 }
29018 }
29019
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_gt_4)29020 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4) {
29021 for (uint32_t n = 5; n < 8; n++) {
29022 for (size_t k = 1; k <= 5; k += 2) {
29023 GemmMicrokernelTester()
29024 .mr(2)
29025 .nr(4)
29026 .kr(1)
29027 .sr(1)
29028 .m(2)
29029 .n(n)
29030 .k(k)
29031 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29032 }
29033 }
29034 }
29035
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_gt_4_strided_cn)29036 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
29037 for (uint32_t n = 5; n < 8; n++) {
29038 for (size_t k = 1; k <= 5; k += 2) {
29039 GemmMicrokernelTester()
29040 .mr(2)
29041 .nr(4)
29042 .kr(1)
29043 .sr(1)
29044 .m(2)
29045 .n(n)
29046 .k(k)
29047 .cn_stride(7)
29048 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29049 }
29050 }
29051 }
29052
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_gt_4_strided_a)29053 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
29054 for (uint32_t n = 5; n < 8; n++) {
29055 for (size_t k = 1; k <= 5; k += 2) {
29056 GemmMicrokernelTester()
29057 .mr(2)
29058 .nr(4)
29059 .kr(1)
29060 .sr(1)
29061 .m(2)
29062 .n(n)
29063 .k(k)
29064 .a_stride(7)
29065 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29066 }
29067 }
29068 }
29069
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_gt_4_subtile)29070 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
29071 for (uint32_t n = 5; n < 8; n++) {
29072 for (size_t k = 1; k <= 5; k += 2) {
29073 for (uint32_t m = 1; m <= 2; m++) {
29074 GemmMicrokernelTester()
29075 .mr(2)
29076 .nr(4)
29077 .kr(1)
29078 .sr(1)
29079 .m(m)
29080 .n(n)
29081 .k(k)
29082 .iterations(1)
29083 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29084 }
29085 }
29086 }
29087 }
29088
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_div_4)29089 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4) {
29090 for (uint32_t n = 8; n <= 12; n += 4) {
29091 for (size_t k = 1; k <= 5; k += 2) {
29092 GemmMicrokernelTester()
29093 .mr(2)
29094 .nr(4)
29095 .kr(1)
29096 .sr(1)
29097 .m(2)
29098 .n(n)
29099 .k(k)
29100 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29101 }
29102 }
29103 }
29104
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_div_4_strided_cn)29105 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
29106 for (uint32_t n = 8; n <= 12; n += 4) {
29107 for (size_t k = 1; k <= 5; k += 2) {
29108 GemmMicrokernelTester()
29109 .mr(2)
29110 .nr(4)
29111 .kr(1)
29112 .sr(1)
29113 .m(2)
29114 .n(n)
29115 .k(k)
29116 .cn_stride(7)
29117 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29118 }
29119 }
29120 }
29121
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_div_4_strided_a)29122 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
29123 for (uint32_t n = 8; n <= 12; n += 4) {
29124 for (size_t k = 1; k <= 5; k += 2) {
29125 GemmMicrokernelTester()
29126 .mr(2)
29127 .nr(4)
29128 .kr(1)
29129 .sr(1)
29130 .m(2)
29131 .n(n)
29132 .k(k)
29133 .a_stride(7)
29134 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29135 }
29136 }
29137 }
29138
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,n_div_4_subtile)29139 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4_subtile) {
29140 for (uint32_t n = 8; n <= 12; n += 4) {
29141 for (size_t k = 1; k <= 5; k += 2) {
29142 for (uint32_t m = 1; m <= 2; m++) {
29143 GemmMicrokernelTester()
29144 .mr(2)
29145 .nr(4)
29146 .kr(1)
29147 .sr(1)
29148 .m(m)
29149 .n(n)
29150 .k(k)
29151 .iterations(1)
29152 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29153 }
29154 }
29155 }
29156 }
29157
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,strided_cm_subtile)29158 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, strided_cm_subtile) {
29159 for (size_t k = 1; k <= 5; k += 2) {
29160 for (uint32_t n = 1; n <= 4; n++) {
29161 for (uint32_t m = 1; m <= 2; m++) {
29162 GemmMicrokernelTester()
29163 .mr(2)
29164 .nr(4)
29165 .kr(1)
29166 .sr(1)
29167 .m(m)
29168 .n(n)
29169 .k(k)
29170 .cm_stride(7)
29171 .iterations(1)
29172 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29173 }
29174 }
29175 }
29176 }
29177
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,qmin)29178 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, qmin) {
29179 GemmMicrokernelTester()
29180 .mr(2)
29181 .nr(4)
29182 .kr(1)
29183 .sr(1)
29184 .m(2)
29185 .n(4)
29186 .k(1)
29187 .qmin(128)
29188 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29189 }
29190
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,qmax)29191 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, qmax) {
29192 GemmMicrokernelTester()
29193 .mr(2)
29194 .nr(4)
29195 .kr(1)
29196 .sr(1)
29197 .m(2)
29198 .n(4)
29199 .k(1)
29200 .qmax(128)
29201 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29202 }
29203
TEST(F32_GEMMINC_MINMAX_2X4__SCALAR,strided_cm)29204 TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, strided_cm) {
29205 GemmMicrokernelTester()
29206 .mr(2)
29207 .nr(4)
29208 .kr(1)
29209 .sr(1)
29210 .m(2)
29211 .n(4)
29212 .k(1)
29213 .cm_stride(7)
29214 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
29215 }
29216