Lines Matching full:benchmark
21 #include <benchmark/benchmark.h>
41 static void GEMMBenchmark(benchmark::State& state, in GEMMBenchmark()
45 benchmark::utils::IsaCheckFunction isa_check = nullptr) in GEMMBenchmark()
59 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr); in GEMMBenchmark()
60 const size_t kc_stride = benchmark::utils::RoundUp(kc, kr * sr); in GEMMBenchmark()
77 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in GEMMBenchmark()
97 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(uint8_t)); in GEMMBenchmark()
115 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in GEMMBenchmark()
120 state.counters["OPS"] = benchmark::Counter( in GEMMBenchmark()
121 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in GEMMBenchmark()
158 static void GemmlowpBenchmark(benchmark::State& state, uint32_t threads) in GemmlowpBenchmark()
176 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in GemmlowpBenchmark()
192 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(uint8_t)); in GemmlowpBenchmark()
204 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in GemmlowpBenchmark()
209 state.counters["OPS"] = benchmark::Counter( in GemmlowpBenchmark()
210 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in GemmlowpBenchmark()
213 static void gemmlowp_st(benchmark::State& state, const char* net) in gemmlowp_st()
221 static void RuyBenchmark(benchmark::State& state, size_t threads) in RuyBenchmark()
233 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in RuyBenchmark()
245 // Note: context must be static to avoid the cost of re-creating it for each benchmark. in RuyBenchmark()
265 …// Thus, on the first benchmark, we compute GEMM for 500 milliseconds (to be safe) without recordi… in RuyBenchmark()
287 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(uint8_t)); in RuyBenchmark()
298 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in RuyBenchmark()
303 state.counters["OPS"] = benchmark::Counter( in RuyBenchmark()
304 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in RuyBenchmark()
307 static void ruy_st(benchmark::State& state, const char* net) in ruy_st()
315 …static void qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* n… in qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53()
319 4, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53()
321 …static void qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const ch… in qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53()
324 4, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53()
326 …static void qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7(benchmark::State& state, const char* ne… in qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7()
330 4, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7()
332 …static void qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7(benchmark::State& state, const cha… in qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7()
335 4, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7()
337 static void qu8_gemm_4x8__aarch32_neon_mlal_lane_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x8__aarch32_neon_mlal_lane_ld64()
341 4, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x8__aarch32_neon_mlal_lane_ld64()
343 …static void qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* ne… in qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64()
346 4, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64()
348 …static void qu8_gemm_1x8__aarch32_neon_mlal_lane_cortex_a7(benchmark::State& state, const char* ne… in qu8_gemm_1x8__aarch32_neon_mlal_lane_cortex_a7()
352 1, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_1x8__aarch32_neon_mlal_lane_cortex_a7()
354 …static void qu8_gemm_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7(benchmark::State& state, const cha… in qu8_gemm_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7()
357 1, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7()
371 …static void qu8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
375 4, 16, 4, 1, benchmark::utils::CheckNEONDOT); in BENCHMARK_GEMM()
377 static void qu8_gemm_4x16c4__aarch64_neondot_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x16c4__aarch64_neondot_ld128()
381 4, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_4x16c4__aarch64_neondot_ld128()
383 static void qu8_gemm_4x8c4__aarch64_neondot_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x8c4__aarch64_neondot_ld128()
388 benchmark::utils::CheckNEONDOT); in qu8_gemm_4x8c4__aarch64_neondot_ld128()
390 static void qu8_gemm_4x8c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) { in qu8_gemm_4x8c4__aarch64_neondot_cortex_a55()
394 4, 8, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_4x8c4__aarch64_neondot_cortex_a55()
396 …static void qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* … in qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53()
401 benchmark::utils::CheckNEON); in qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53()
403 …static void qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const c… in qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53()
408 benchmark::utils::CheckNEON); in qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53()
410 static void qu8_gemm_4x16__aarch64_neon_mlal_lane_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x16__aarch64_neon_mlal_lane_ld64()
415 benchmark::utils::CheckNEON); in qu8_gemm_4x16__aarch64_neon_mlal_lane_ld64()
417 …static void qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* n… in qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64()
422 benchmark::utils::CheckNEON); in qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64()
424 …static void qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a75(benchmark::State& state, const char* … in qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a75()
429 benchmark::utils::CheckNEON); in qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a75()
431 …static void qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75(benchmark::State& state, const c… in qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75()
436 benchmark::utils::CheckNEON); in qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75()
452 static void qu8_gemm_1x8c4__neondot(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
456 1, 8, 4, 1, benchmark::utils::CheckNEONDOT); in BENCHMARK_GEMM()
458 static void qu8_gemm_2x8c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_2x8c4__neondot()
462 2, 8, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_2x8c4__neondot()
464 static void qu8_gemm_3x8c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_3x8c4__neondot()
468 3, 8, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_3x8c4__neondot()
470 static void qu8_gemm_4x8c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_4x8c4__neondot()
474 4, 8, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_4x8c4__neondot()
476 static void qu8_gemm_5x8c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_5x8c4__neondot()
480 5, 8, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_5x8c4__neondot()
482 static void qu8_gemm_6x8c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_6x8c4__neondot()
486 6, 8, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_6x8c4__neondot()
488 static void qu8_gemm_8x8c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_8x8c4__neondot()
492 8, 8, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_8x8c4__neondot()
494 static void qu8_gemm_1x16c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_1x16c4__neondot()
498 1, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_1x16c4__neondot()
500 static void qu8_gemm_2x16c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_2x16c4__neondot()
504 2, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_2x16c4__neondot()
506 static void qu8_gemm_3x16c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_3x16c4__neondot()
510 3, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_3x16c4__neondot()
512 static void qu8_gemm_4x16c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_4x16c4__neondot()
516 4, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_4x16c4__neondot()
518 static void qu8_gemm_5x16c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_5x16c4__neondot()
522 5, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_5x16c4__neondot()
524 static void qu8_gemm_6x16c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_6x16c4__neondot()
528 6, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_6x16c4__neondot()
530 static void qu8_gemm_8x16c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_8x16c4__neondot()
534 8, 16, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_8x16c4__neondot()
536 static void qu8_gemm_1x32c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_1x32c4__neondot()
540 1, 32, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_1x32c4__neondot()
542 static void qu8_gemm_2x32c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_2x32c4__neondot()
546 2, 32, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_2x32c4__neondot()
548 static void qu8_gemm_3x32c4__neondot(benchmark::State& state, const char* net) { in qu8_gemm_3x32c4__neondot()
552 3, 32, 4, 1, benchmark::utils::CheckNEONDOT); in qu8_gemm_3x32c4__neondot()
576 static void qu8_gemm_1x8__neon_mlal_lane(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
580 1, 8, 1, 1, benchmark::utils::CheckNEON); in BENCHMARK_GEMM()
582 static void qu8_gemm_2x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_2x8__neon_mlal_lane()
586 2, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_2x8__neon_mlal_lane()
588 static void qu8_gemm_3x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_3x8__neon_mlal_lane()
592 3, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_3x8__neon_mlal_lane()
594 static void qu8_gemm_4x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_4x8__neon_mlal_lane()
598 4, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x8__neon_mlal_lane()
600 static void qu8_gemm_6x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_6x8__neon_mlal_lane()
604 6, 8, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_6x8__neon_mlal_lane()
606 static void qu8_gemm_1x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_1x16__neon_mlal_lane()
610 1, 16, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_1x16__neon_mlal_lane()
612 static void qu8_gemm_2x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_2x16__neon_mlal_lane()
616 2, 16, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_2x16__neon_mlal_lane()
618 static void qu8_gemm_3x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_3x16__neon_mlal_lane()
622 3, 16, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_3x16__neon_mlal_lane()
624 static void qu8_gemm_4x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_4x16__neon_mlal_lane()
628 4, 16, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_4x16__neon_mlal_lane()
630 static void qu8_gemm_6x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qu8_gemm_6x16__neon_mlal_lane()
634 6, 16, 1, 1, benchmark::utils::CheckNEON); in qu8_gemm_6x16__neon_mlal_lane()
651 static void qu8_gemm_1x1c4__armsimd32(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
655 1, 1, 4, 1, benchmark::utils::CheckARMV6); in BENCHMARK_GEMM()
657 static void qu8_gemm_2x1c4__armsimd32(benchmark::State& state, const char* net) { in qu8_gemm_2x1c4__armsimd32()
661 2, 1, 4, 1, benchmark::utils::CheckARMV6); in qu8_gemm_2x1c4__armsimd32()
663 static void qu8_gemm_1x2c4__armsimd32(benchmark::State& state, const char* net) { in qu8_gemm_1x2c4__armsimd32()
667 1, 2, 4, 1, benchmark::utils::CheckARMV6); in qu8_gemm_1x2c4__armsimd32()
669 static void qu8_gemm_2x2c4__armsimd32(benchmark::State& state, const char* net) { in qu8_gemm_2x2c4__armsimd32()
673 2, 2, 4, 1, benchmark::utils::CheckARMV6); in qu8_gemm_2x2c4__armsimd32()
684 static void qu8_gemm_1x16c8__avx512skx(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
689 benchmark::utils::CheckAVX512SKX); in BENCHMARK_GEMM()
691 static void qu8_gemm_2x16c8__avx512skx(benchmark::State& state, const char* net) { in qu8_gemm_2x16c8__avx512skx()
696 benchmark::utils::CheckAVX512SKX); in qu8_gemm_2x16c8__avx512skx()
698 static void qu8_gemm_3x16c8__avx512skx(benchmark::State& state, const char* net) { in qu8_gemm_3x16c8__avx512skx()
703 benchmark::utils::CheckAVX512SKX); in qu8_gemm_3x16c8__avx512skx()
705 static void qu8_gemm_4x16c8__avx512skx(benchmark::State& state, const char* net) { in qu8_gemm_4x16c8__avx512skx()
710 benchmark::utils::CheckAVX512SKX); in qu8_gemm_4x16c8__avx512skx()
712 static void qu8_gemm_1x8c8__avx2(benchmark::State& state, const char* net) { in qu8_gemm_1x8c8__avx2()
717 benchmark::utils::CheckAVX2); in qu8_gemm_1x8c8__avx2()
719 static void qu8_gemm_2x8c8__avx2(benchmark::State& state, const char* net) { in qu8_gemm_2x8c8__avx2()
724 benchmark::utils::CheckAVX2); in qu8_gemm_2x8c8__avx2()
726 static void qu8_gemm_3x8c8__avx2(benchmark::State& state, const char* net) { in qu8_gemm_3x8c8__avx2()
731 benchmark::utils::CheckAVX2); in qu8_gemm_3x8c8__avx2()
733 static void qu8_gemm_1x4c2__xop_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__xop_ld64()
738 benchmark::utils::CheckXOP); in qu8_gemm_1x4c2__xop_ld64()
740 static void qu8_gemm_2x4c2__xop_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__xop_ld64()
745 benchmark::utils::CheckXOP); in qu8_gemm_2x4c2__xop_ld64()
747 static void qu8_gemm_3x4c2__xop_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__xop_ld64()
752 benchmark::utils::CheckXOP); in qu8_gemm_3x4c2__xop_ld64()
754 static void qu8_gemm_4x4c2__xop_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__xop_ld64()
759 benchmark::utils::CheckXOP); in qu8_gemm_4x4c2__xop_ld64()
761 static void qu8_gemm_1x4c2__xop_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__xop_ld128()
766 benchmark::utils::CheckXOP); in qu8_gemm_1x4c2__xop_ld128()
768 static void qu8_gemm_2x4c2__xop_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__xop_ld128()
773 benchmark::utils::CheckXOP); in qu8_gemm_2x4c2__xop_ld128()
775 static void qu8_gemm_3x4c2__xop_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__xop_ld128()
780 benchmark::utils::CheckXOP); in qu8_gemm_3x4c2__xop_ld128()
782 static void qu8_gemm_4x4c2__xop_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__xop_ld128()
787 benchmark::utils::CheckXOP); in qu8_gemm_4x4c2__xop_ld128()
789 static void qu8_gemm_1x4c8__xop_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__xop_ld64()
794 benchmark::utils::CheckXOP); in qu8_gemm_1x4c8__xop_ld64()
796 static void qu8_gemm_2x4c8__xop_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__xop_ld64()
801 benchmark::utils::CheckXOP); in qu8_gemm_2x4c8__xop_ld64()
803 static void qu8_gemm_3x4c8__xop_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__xop_ld64()
808 benchmark::utils::CheckXOP); in qu8_gemm_3x4c8__xop_ld64()
810 static void qu8_gemm_1x4c8__xop_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__xop_ld128()
815 benchmark::utils::CheckXOP); in qu8_gemm_1x4c8__xop_ld128()
817 static void qu8_gemm_2x4c8__xop_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__xop_ld128()
822 benchmark::utils::CheckXOP); in qu8_gemm_2x4c8__xop_ld128()
824 static void qu8_gemm_3x4c8__xop_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__xop_ld128()
829 benchmark::utils::CheckXOP); in qu8_gemm_3x4c8__xop_ld128()
831 static void qu8_gemm_1x4c2__avx_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__avx_ld64()
836 benchmark::utils::CheckAVX); in qu8_gemm_1x4c2__avx_ld64()
838 static void qu8_gemm_2x4c2__avx_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__avx_ld64()
843 benchmark::utils::CheckAVX); in qu8_gemm_2x4c2__avx_ld64()
845 static void qu8_gemm_3x4c2__avx_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__avx_ld64()
850 benchmark::utils::CheckAVX); in qu8_gemm_3x4c2__avx_ld64()
852 static void qu8_gemm_4x4c2__avx_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__avx_ld64()
857 benchmark::utils::CheckAVX); in qu8_gemm_4x4c2__avx_ld64()
859 static void qu8_gemm_1x4c2__avx_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__avx_ld128()
864 benchmark::utils::CheckAVX); in qu8_gemm_1x4c2__avx_ld128()
866 static void qu8_gemm_2x4c2__avx_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__avx_ld128()
871 benchmark::utils::CheckAVX); in qu8_gemm_2x4c2__avx_ld128()
873 static void qu8_gemm_3x4c2__avx_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__avx_ld128()
878 benchmark::utils::CheckAVX); in qu8_gemm_3x4c2__avx_ld128()
880 static void qu8_gemm_4x4c2__avx_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__avx_ld128()
885 benchmark::utils::CheckAVX); in qu8_gemm_4x4c2__avx_ld128()
887 static void qu8_gemm_1x4c8__avx_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__avx_ld64()
892 benchmark::utils::CheckAVX); in qu8_gemm_1x4c8__avx_ld64()
894 static void qu8_gemm_2x4c8__avx_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__avx_ld64()
899 benchmark::utils::CheckAVX); in qu8_gemm_2x4c8__avx_ld64()
901 static void qu8_gemm_3x4c8__avx_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__avx_ld64()
906 benchmark::utils::CheckAVX); in qu8_gemm_3x4c8__avx_ld64()
908 static void qu8_gemm_1x4c8__avx_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__avx_ld128()
913 benchmark::utils::CheckAVX); in qu8_gemm_1x4c8__avx_ld128()
915 static void qu8_gemm_2x4c8__avx_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__avx_ld128()
920 benchmark::utils::CheckAVX); in qu8_gemm_2x4c8__avx_ld128()
922 static void qu8_gemm_3x4c8__avx_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__avx_ld128()
927 benchmark::utils::CheckAVX); in qu8_gemm_3x4c8__avx_ld128()
929 static void qu8_gemm_1x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__sse41_ld64()
934 benchmark::utils::CheckSSE41); in qu8_gemm_1x4c2__sse41_ld64()
936 static void qu8_gemm_2x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__sse41_ld64()
941 benchmark::utils::CheckSSE41); in qu8_gemm_2x4c2__sse41_ld64()
943 static void qu8_gemm_3x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__sse41_ld64()
948 benchmark::utils::CheckSSE41); in qu8_gemm_3x4c2__sse41_ld64()
950 static void qu8_gemm_4x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__sse41_ld64()
955 benchmark::utils::CheckSSE41); in qu8_gemm_4x4c2__sse41_ld64()
957 static void qu8_gemm_1x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__sse41_ld128()
962 benchmark::utils::CheckSSE41); in qu8_gemm_1x4c2__sse41_ld128()
964 static void qu8_gemm_2x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__sse41_ld128()
969 benchmark::utils::CheckSSE41); in qu8_gemm_2x4c2__sse41_ld128()
971 static void qu8_gemm_3x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__sse41_ld128()
976 benchmark::utils::CheckSSE41); in qu8_gemm_3x4c2__sse41_ld128()
978 static void qu8_gemm_4x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__sse41_ld128()
983 benchmark::utils::CheckSSE41); in qu8_gemm_4x4c2__sse41_ld128()
985 static void qu8_gemm_1x4c8__sse41_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__sse41_ld64()
990 benchmark::utils::CheckSSE41); in qu8_gemm_1x4c8__sse41_ld64()
992 static void qu8_gemm_2x4c8__sse41_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__sse41_ld64()
997 benchmark::utils::CheckSSE41); in qu8_gemm_2x4c8__sse41_ld64()
999 static void qu8_gemm_3x4c8__sse41_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__sse41_ld64()
1004 benchmark::utils::CheckSSE41); in qu8_gemm_3x4c8__sse41_ld64()
1006 static void qu8_gemm_1x4c8__sse41_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__sse41_ld128()
1011 benchmark::utils::CheckSSE41); in qu8_gemm_1x4c8__sse41_ld128()
1013 static void qu8_gemm_2x4c8__sse41_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__sse41_ld128()
1018 benchmark::utils::CheckSSE41); in qu8_gemm_2x4c8__sse41_ld128()
1020 static void qu8_gemm_3x4c8__sse41_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__sse41_ld128()
1025 benchmark::utils::CheckSSE41); in qu8_gemm_3x4c8__sse41_ld128()
1027 static void qu8_gemm_1x4c2__sse2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__sse2_ld64()
1033 static void qu8_gemm_2x4c2__sse2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__sse2_ld64()
1039 static void qu8_gemm_3x4c2__sse2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__sse2_ld64()
1045 static void qu8_gemm_4x4c2__sse2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__sse2_ld64()
1051 static void qu8_gemm_1x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__sse2_ld128()
1057 static void qu8_gemm_2x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__sse2_ld128()
1063 static void qu8_gemm_3x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__sse2_ld128()
1069 static void qu8_gemm_4x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__sse2_ld128()
1075 static void qu8_gemm_1x4c8__sse2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__sse2_ld64()
1081 static void qu8_gemm_2x4c8__sse2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__sse2_ld64()
1087 static void qu8_gemm_3x4c8__sse2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__sse2_ld64()
1093 static void qu8_gemm_1x4c8__sse2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__sse2_ld128()
1099 static void qu8_gemm_2x4c8__sse2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__sse2_ld128()
1105 static void qu8_gemm_3x4c8__sse2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__sse2_ld128()
1184 static void qu8_gemm_1x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
1190 static void qu8_gemm_2x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__wasmsimd_dot16x2_ld64()
1196 static void qu8_gemm_3x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__wasmsimd_dot16x2_ld64()
1202 static void qu8_gemm_4x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__wasmsimd_dot16x2_ld64()
1209 static void qu8_gemm_1x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2__wasmsimd_dot16x2_ld128()
1215 static void qu8_gemm_2x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2__wasmsimd_dot16x2_ld128()
1221 static void qu8_gemm_3x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2__wasmsimd_dot16x2_ld128()
1227 static void qu8_gemm_4x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2__wasmsimd_dot16x2_ld128()
1234 static void qu8_gemm_1x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2s4__wasmsimd_dot16x2_ld64()
1240 static void qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64()
1246 static void qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64()
1252 static void qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64()
1259 static void qu8_gemm_1x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c2s4__wasmsimd_dot16x2_ld128()
1265 static void qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128()
1271 static void qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128()
1277 static void qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128()
1284 static void qu8_gemm_1x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__wasmsimd_dot16x2_ld64()
1290 static void qu8_gemm_2x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__wasmsimd_dot16x2_ld64()
1296 static void qu8_gemm_3x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__wasmsimd_dot16x2_ld64()
1302 static void qu8_gemm_4x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qu8_gemm_4x4c8__wasmsimd_dot16x2_ld64()
1309 static void qu8_gemm_1x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_1x4c8__wasmsimd_dot16x2_ld128()
1315 static void qu8_gemm_2x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_2x4c8__wasmsimd_dot16x2_ld128()
1321 static void qu8_gemm_3x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_3x4c8__wasmsimd_dot16x2_ld128()
1327 static void qu8_gemm_4x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qu8_gemm_4x4c8__wasmsimd_dot16x2_ld128()
1364 static void qu8_gemm_1x2__wasm_fmagic(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
1370 static void qu8_gemm_2x2__wasm_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_2x2__wasm_fmagic()
1376 static void qu8_gemm_3x2__wasm_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_3x2__wasm_fmagic()
1382 static void qu8_gemm_4x2__wasm_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_4x2__wasm_fmagic()
1388 static void qu8_gemm_1x4__wasm_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_1x4__wasm_fmagic()
1394 static void qu8_gemm_2x4__wasm_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_2x4__wasm_fmagic()
1400 static void qu8_gemm_3x4__wasm_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_3x4__wasm_fmagic()
1406 static void qu8_gemm_4x4__wasm_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_4x4__wasm_fmagic()
1424 static void qu8_gemm_1x2__scalar_fmagic(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
1430 static void qu8_gemm_2x2__scalar_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_2x2__scalar_fmagic()
1436 static void qu8_gemm_3x2__scalar_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_3x2__scalar_fmagic()
1442 static void qu8_gemm_4x2__scalar_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_4x2__scalar_fmagic()
1448 static void qu8_gemm_1x4__scalar_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_1x4__scalar_fmagic()
1454 static void qu8_gemm_2x4__scalar_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_2x4__scalar_fmagic()
1460 static void qu8_gemm_3x4__scalar_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_3x4__scalar_fmagic()
1466 static void qu8_gemm_4x4__scalar_fmagic(benchmark::State& state, const char* net) { in qu8_gemm_4x4__scalar_fmagic()
1473 static void qu8_gemm_1x2__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_1x2__scalar_imagic()
1479 static void qu8_gemm_2x2__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_2x2__scalar_imagic()
1485 static void qu8_gemm_3x2__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_3x2__scalar_imagic()
1491 static void qu8_gemm_4x2__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_4x2__scalar_imagic()
1497 static void qu8_gemm_1x4__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_1x4__scalar_imagic()
1503 static void qu8_gemm_2x4__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_2x4__scalar_imagic()
1509 static void qu8_gemm_3x4__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_3x4__scalar_imagic()
1515 static void qu8_gemm_4x4__scalar_imagic(benchmark::State& state, const char* net) { in qu8_gemm_4x4__scalar_imagic()
1522 static void qu8_gemm_1x2__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_1x2__scalar_lrintf()
1528 static void qu8_gemm_2x2__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_2x2__scalar_lrintf()
1534 static void qu8_gemm_3x2__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_3x2__scalar_lrintf()
1540 static void qu8_gemm_4x2__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_4x2__scalar_lrintf()
1546 static void qu8_gemm_1x4__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_1x4__scalar_lrintf()
1552 static void qu8_gemm_2x4__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_2x4__scalar_lrintf()
1558 static void qu8_gemm_3x4__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_3x4__scalar_lrintf()
1564 static void qu8_gemm_4x4__scalar_lrintf(benchmark::State& state, const char* net) { in qu8_gemm_4x4__scalar_lrintf()