Lines Matching +full:k +full:- +full:block

3 # This source code is licensed under the BSD-style license found in the
7 - name: xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64
9 k-block: 2
11 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7
13 k-block: 2
15 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53
17 k-block: 4
20 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55
22 k-block: 4
25 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75
27 k-block: 4
30 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64
32 k-block: 2
34 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53
36 k-block: 4
39 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75
41 k-block: 4
45 - name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53
47 k-block: 8
50 - name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75
52 k-block: 8
55 - name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64
57 k-block: 2
59 - name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53
61 k-block: 8
64 - name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75
66 k-block: 8
69 - name: xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53
71 k-block: 4
74 - name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75
76 k-block: 8
78 - name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_ld64
80 k-block: 2
82 - name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75
84 k-block: 8
86 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53
88 k-block: 4
91 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55
93 k-block: 4
96 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75
98 k-block: 8
101 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64
103 k-block: 2
105 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128
107 k-block: 4
109 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53
111 k-block: 4
114 - name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75
116 k-block: 8
119 - name: xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53
121 k-block: 4
124 - name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75
126 k-block: 8
129 - name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75
131 k-block: 8
134 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53
136 k-block: 4
139 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55
141 k-block: 4
144 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73
146 k-block: 8
149 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75
151 k-block: 8
154 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64
156 k-block: 2
158 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128
160 k-block: 4
162 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53
164 k-block: 4
167 - name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75
169 k-block: 8
173 - name: xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64
175 k-block: 2
176 - name: xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64
178 k-block: 2
179 - name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64
181 k-block: 2
182 - name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64
184 k-block: 2
186 - aarch64
187 - name: xnn_f32_gemm_minmax_ukernel_1x8s4__neon
189 k-block: 4
190 - name: xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma
192 k-block: 4
193 - name: xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64
195 k-block: 2
196 - name: xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64
198 k-block: 2
200 - aarch64
201 - name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld64
203 k-block: 2
204 - name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128
206 k-block: 4
207 - name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64
209 k-block: 2
210 - name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128
212 k-block: 4
213 - name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64
215 k-block: 2
216 - name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128
218 k-block: 4
219 - name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64
221 k-block: 2
223 - aarch64
224 - name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128
226 k-block: 4
228 - aarch64
229 - name: xnn_f32_gemm_minmax_ukernel_4x8s4__neon
231 k-block: 4
232 - name: xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma
234 k-block: 4
235 - name: xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64
237 k-block: 2
238 - name: xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64
240 k-block: 2
242 - aarch64
243 - name: xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64
245 k-block: 2
246 - name: xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64
248 k-block: 2
250 - aarch64
251 - name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64
253 k-block: 2
254 - name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128
256 k-block: 4
257 - name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64
259 k-block: 2
260 - name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128
262 k-block: 4
263 - name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64
265 k-block: 2
266 - name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128
268 k-block: 4
269 - name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64
271 k-block: 2
273 - aarch64
274 - name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128
276 k-block: 4
278 - aarch64
279 - name: xnn_f32_gemm_minmax_ukernel_6x8s4__neon
281 k-block: 4
282 - name: xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma
284 k-block: 4
285 - name: xnn_f32_gemm_minmax_ukernel_8x8s4__neon
287 k-block: 4
288 - name: xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma
290 k-block: 4
292 - name: xnn_f32_gemm_minmax_ukernel_1x8__sse_dup
294 k-block: 4
295 - name: xnn_f32_gemm_minmax_ukernel_1x8__sse_load1
297 k-block: 1
298 - name: xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup
300 k-block: 4
301 - name: xnn_f32_gemm_minmax_ukernel_1x8s4__sse
303 k-block: 4
304 - name: xnn_f32_gemm_minmax_ukernel_3x8__sse_dup
306 k-block: 4
307 - name: xnn_f32_gemm_minmax_ukernel_3x8__sse_load1
309 k-block: 1
310 - name: xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup
312 k-block: 4
313 - name: xnn_f32_gemm_minmax_ukernel_3x8s4__sse
315 k-block: 4
316 - name: xnn_f32_gemm_minmax_ukernel_4x2c4__sse
318 k-block: 4
319 - name: xnn_f32_gemm_minmax_ukernel_4x8__sse_dup
321 k-block: 4
322 - name: xnn_f32_gemm_minmax_ukernel_4x8__sse_load1
324 k-block: 1
325 - name: xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup
327 k-block: 4
328 - name: xnn_f32_gemm_minmax_ukernel_4x8s4__sse
330 k-block: 4
331 - name: xnn_f32_gemm_minmax_ukernel_5x8__sse_dup
333 k-block: 4
334 - name: xnn_f32_gemm_minmax_ukernel_5x8__sse_load1
336 k-block: 1
337 - name: xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup
339 k-block: 4
340 - name: xnn_f32_gemm_minmax_ukernel_5x8s4__sse
342 k-block: 4
344 - name: xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast
346 k-block: 1
347 - name: xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast
349 k-block: 1
350 - name: xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast
352 k-block: 1
353 - name: xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast
355 k-block: 1
356 - name: xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast
358 k-block: 1
359 - name: xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast
361 k-block: 1
362 - name: xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast
364 k-block: 1
365 - name: xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast
367 k-block: 1
368 - name: xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast
370 k-block: 1
372 - name: xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast
374 k-block: 1
375 - name: xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast
377 k-block: 1
378 - name: xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast
380 k-block: 4
381 - name: xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast
383 k-block: 1
384 - name: xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast
386 k-block: 4
387 - name: xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast
389 k-block: 1
390 - name: xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast
392 k-block: 1
393 - name: xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast
395 k-block: 4
396 - name: xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast
398 k-block: 1
399 - name: xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast
401 k-block: 1
402 - name: xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast
404 k-block: 4
405 - name: xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast
407 k-block: 1
408 - name: xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast
410 k-block: 1
411 - name: xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast
413 k-block: 1
415 - name: xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast
417 k-block: 1
418 - name: xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast
420 k-block: 1
421 - name: xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast
423 k-block: 1
424 - name: xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast
426 k-block: 1
427 - name: xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast
429 k-block: 1
430 - name: xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast
432 k-block: 1
434 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat
436 k-block: 1
437 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat
439 k-block: 4
440 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat
442 k-block: 1
443 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat
445 k-block: 4
446 - name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm
448 k-block: 4
449 - name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86
451 k-block: 4
452 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat
454 k-block: 1
455 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat
457 k-block: 4
458 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat
460 k-block: 1
461 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat
463 k-block: 4
464 - name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm
466 k-block: 4
467 - name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86
469 k-block: 4
470 - name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm
472 k-block: 4
473 - name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86
475 k-block: 4
476 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat
478 k-block: 1
479 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat
481 k-block: 4
482 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat
484 k-block: 1
485 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat
487 k-block: 4
488 - name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm
490 k-block: 4
491 - name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86
493 k-block: 4
494 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat
496 k-block: 1
497 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat
499 k-block: 4
500 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat
502 k-block: 1
503 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat
505 k-block: 4
506 - name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm
508 k-block: 4
509 - name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86
511 k-block: 4
512 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat
514 k-block: 1
515 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat
517 k-block: 4
518 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat
520 k-block: 1
521 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat
523 k-block: 4
524 - name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm
526 k-block: 4
527 - name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86
529 k-block: 4
531 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat
533 k-block: 1
534 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat
536 k-block: 4
537 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat
539 k-block: 1
540 - name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat
542 k-block: 4
543 - name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd
545 k-block: 4
546 - name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma
548 k-block: 4
549 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat
551 k-block: 1
552 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat
554 k-block: 4
555 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat
557 k-block: 1
558 - name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat
560 k-block: 4
561 - name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd
563 k-block: 4
564 - name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma
566 k-block: 4
567 - name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd
569 k-block: 4
570 - name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma
572 k-block: 4
573 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat
575 k-block: 1
576 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat
578 k-block: 4
579 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat
581 k-block: 1
582 - name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat
584 k-block: 4
585 - name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd
587 k-block: 4
588 - name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma
590 k-block: 4
591 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat
593 k-block: 1
594 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat
596 k-block: 4
597 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat
599 k-block: 1
600 - name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat
602 k-block: 4
603 - name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd
605 k-block: 4
606 - name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma
608 k-block: 4
609 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat
611 k-block: 1
612 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat
614 k-block: 4
615 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat
617 k-block: 1
618 - name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat
620 k-block: 4
621 - name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd
623 k-block: 4
624 - name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma
626 k-block: 4
628 - name: xnn_f32_gemm_minmax_ukernel_1x4__wasm
630 k-block: 1
631 - name: xnn_f32_gemm_minmax_ukernel_2x4__wasm
633 k-block: 1
634 - name: xnn_f32_gemm_minmax_ukernel_4x2__wasm
636 k-block: 1
637 - name: xnn_f32_gemm_minmax_ukernel_4x4__wasm
639 k-block: 1
641 - name: xnn_f32_gemm_minmax_ukernel_1x4__scalar
643 k-block: 1
644 - name: xnn_f32_gemm_minmax_ukernel_2x4__scalar
646 k-block: 1
647 - name: xnn_f32_gemm_minmax_ukernel_4x2__scalar
649 k-block: 1
650 - name: xnn_f32_gemm_minmax_ukernel_4x4__scalar
652 k-block: 1
654 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7
656 k-block: 2
658 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53
660 k-block: 4
663 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55
665 k-block: 4
667 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75
669 k-block: 4
672 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_ld64
674 k-block: 2
676 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75
678 k-block: 4
682 - name: xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75
684 k-block: 8
686 - name: xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75
688 k-block: 8
690 - name: xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75
692 k-block: 8
694 - name: xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75
696 k-block: 8
698 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75
700 k-block: 8
702 - name: xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75
704 k-block: 8
706 - name: xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_ld128
708 k-block: 4