Lines Matching full:state
35 static void GEMMBenchmark(benchmark::State& state, in GEMMBenchmark() argument
43 state.SkipWithError("cpuinfo initialization failed"); in GEMMBenchmark()
46 if (isa_check && !isa_check(state)) { in GEMMBenchmark()
50 const size_t mc = state.range(0); in GEMMBenchmark()
51 const size_t nc = state.range(1); in GEMMBenchmark()
52 const size_t kc = state.range(2); in GEMMBenchmark()
91 for (auto _ : state) { in GEMMBenchmark()
92 // Use circular buffers (exceeding cache size) and prefetch to control cache state: in GEMMBenchmark()
96 state.PauseTiming(); in GEMMBenchmark()
99 state.ResumeTiming(); in GEMMBenchmark()
117 state.counters["cpufreq"] = cpu_frequency; in GEMMBenchmark()
120 state.counters["OPS"] = benchmark::Counter( in GEMMBenchmark()
121 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in GEMMBenchmark()
125 static void RuyBenchmark(benchmark::State& state, size_t threads) in RuyBenchmark() argument
127 const size_t mc = state.range(0); in RuyBenchmark()
128 const size_t nc = state.range(1); in RuyBenchmark()
129 const size_t kc = state.range(2); in RuyBenchmark()
184 for (auto _ : state) { in RuyBenchmark()
185 // Use circular buffers (exceeding cache size) and prefetch to control cache state: in RuyBenchmark()
190 state.PauseTiming(); in RuyBenchmark()
193 state.ResumeTiming(); in RuyBenchmark()
204 state.counters["cpufreq"] = cpu_frequency; in RuyBenchmark()
207 state.counters["OPS"] = benchmark::Counter( in RuyBenchmark()
208 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in RuyBenchmark()
211 static void ruy_st(benchmark::State& state, const char* net) in ruy_st() argument
213 RuyBenchmark(state, 1); in ruy_st()
218 static void GEMMBenchmark(benchmark::State& state, in GEMMBenchmark() argument
227 const size_t nc = state.range(1); in GEMMBenchmark()
228 const size_t kc = state.range(2); in GEMMBenchmark()
232 state, in GEMMBenchmark()
238 static void jit_qs8_gemm_4x8c4__aarch32_neondot_ld64(benchmark::State& state, const char* net) { in jit_qs8_gemm_4x8c4__aarch32_neondot_ld64() argument
239 … GEMMBenchmark(state, xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, 4, 8, 4, 1, in jit_qs8_gemm_4x8c4__aarch32_neondot_ld64()
242 …static void jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64(benchmark::State& state, const char* net… in jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64() argument
243 …GEMMBenchmark(state, xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, 4, 8, 1… in jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64()
246 …static void jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char… in jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64() argument
247 …GEMMBenchmark(state, xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, 4,… in jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64()
256 static void qs8_gemm_4x8c4__aarch32_neondot_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
257 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, 4, 8, 4, 1, in BENCHMARK_GEMM()
260 static void qs8_gemm_4x8c4__aarch32_neondot_cortex_a55(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__aarch32_neondot_cortex_a55() argument
261 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, 4, 8, 4,… in qs8_gemm_4x8c4__aarch32_neondot_cortex_a55()
264 …static void qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* n… in qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53() argument
265 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, 4, … in qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53()
268 …static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const ch… in qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53() argument
269 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53… in qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53()
272 …static void qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7(benchmark::State& state, const char* ne… in qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7() argument
273 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, 4, 8… in qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7()
276 …static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7(benchmark::State& state, const cha… in qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7() argument
277 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7,… in qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7()
280 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64() argument
281 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, 4, 8, 1, … in qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64()
284 …static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* ne… in qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64() argument
285 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, 4, 8… in qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64()
288 …static void qs8_gemm_1x8__aarch32_neon_mlal_lane_cortex_a7(benchmark::State& state, const char* ne… in qs8_gemm_1x8__aarch32_neon_mlal_lane_cortex_a7() argument
289 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, 1, 8… in qs8_gemm_1x8__aarch32_neon_mlal_lane_cortex_a7()
292 …static void qs8_gemm_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7(benchmark::State& state, const cha… in qs8_gemm_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7() argument
293 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7,… in qs8_gemm_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7()
310 …static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
311 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, … in BENCHMARK_GEMM()
314 static void qs8_gemm_1x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__aarch64_neondot_ld32() argument
315 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld32, 1, 16, 4, 1, in qs8_gemm_1x16c4__aarch64_neondot_ld32()
318 static void qs8_gemm_1x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__aarch64_neondot_ld64() argument
319 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, 1, 16, 4, 1, in qs8_gemm_1x16c4__aarch64_neondot_ld64()
322 static void qs8_gemm_4x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__aarch64_neondot_ld32() argument
323 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, 4, 16, 4, 1, in qs8_gemm_4x16c4__aarch64_neondot_ld32()
326 static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__aarch64_neondot_ld64() argument
327 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1, in qs8_gemm_4x16c4__aarch64_neondot_ld64()
330 static void qs8_gemm_4x16c4__aarch64_neondot_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__aarch64_neondot_ld128() argument
331 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, 4, 16, 4, 1, in qs8_gemm_4x16c4__aarch64_neondot_ld128()
334 static void qs8_gemm_4x8__aarch64_neon_mlal_lane_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x8__aarch64_neon_mlal_lane_ld64() argument
335 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, 4, 8, 1, … in qs8_gemm_4x8__aarch64_neon_mlal_lane_ld64()
338 …static void qs8_gemm_4x8__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* ne… in qs8_gemm_4x8__aarch64_neon_mlal_lane_prfm_ld64() argument
339 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, 4, 8… in qs8_gemm_4x8__aarch64_neon_mlal_lane_prfm_ld64()
342 …static void qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* … in qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53() argument
343 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, 4,… in qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53()
346 …static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const c… in qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53() argument
347 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a5… in qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53()
350 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x16__aarch64_neon_mlal_lane_ld64() argument
351 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, 4, 16, 1… in qs8_gemm_4x16__aarch64_neon_mlal_lane_ld64()
354 …static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* n… in qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64() argument
355 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, 4, … in qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64()
358 static void qs8_gemm_1x8c8__aarch64_neon_mlal_prfm(benchmark::State& state, const char* net) { in qs8_gemm_1x8c8__aarch64_neon_mlal_prfm() argument
359 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm, 1, 8, 8, 1, in qs8_gemm_1x8c8__aarch64_neon_mlal_prfm()
362 static void qs8_gemm_1x8c8__aarch64_neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_1x8c8__aarch64_neon_mlal() argument
363 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, 1, 8, 8, 1, in qs8_gemm_1x8c8__aarch64_neon_mlal()
366 …static void qs8_gemm_1x8c8__aarch64_neon_mlal_cortex_a53(benchmark::State& state, const char* net)… in qs8_gemm_1x8c8__aarch64_neon_mlal_cortex_a53() argument
367 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, 1, 8, … in qs8_gemm_1x8c8__aarch64_neon_mlal_cortex_a53()
370 …static void qs8_gemm_1x8c8__aarch64_neon_mlal_prfm_cortex_a53(benchmark::State& state, const char*… in qs8_gemm_1x8c8__aarch64_neon_mlal_prfm_cortex_a53() argument
371 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, 1… in qs8_gemm_1x8c8__aarch64_neon_mlal_prfm_cortex_a53()
374 static void qs8_gemm_2x8c8__aarch64_neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__aarch64_neon_mull() argument
375 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, 2, 8, 8, 1, in qs8_gemm_2x8c8__aarch64_neon_mull()
378 static void qs8_gemm_2x8c8__aarch64_neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__aarch64_neon_mlal() argument
379 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, 2, 8, 8, 1, in qs8_gemm_2x8c8__aarch64_neon_mlal()
382 static void qs8_gemm_2x8c8__aarch64_neon_mlal_prfm(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__aarch64_neon_mlal_prfm() argument
383 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, 2, 8, 8, 1, in qs8_gemm_2x8c8__aarch64_neon_mlal_prfm()
386 …static void qs8_gemm_2x8c8__aarch64_neon_mlal_cortex_a53(benchmark::State& state, const char* net)… in qs8_gemm_2x8c8__aarch64_neon_mlal_cortex_a53() argument
387 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, 2, 8, … in qs8_gemm_2x8c8__aarch64_neon_mlal_cortex_a53()
390 …static void qs8_gemm_2x8c8__aarch64_neon_mlal_prfm_cortex_a53(benchmark::State& state, const char*… in qs8_gemm_2x8c8__aarch64_neon_mlal_prfm_cortex_a53() argument
391 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, 2… in qs8_gemm_2x8c8__aarch64_neon_mlal_prfm_cortex_a53()
394 static void qs8_gemm_2x8c16__aarch64_neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c16__aarch64_neon_mlal() argument
395 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, 2, 8, 16, 1, in qs8_gemm_2x8c16__aarch64_neon_mlal()
425 static void qs8_gemm_1x8c4__neondot(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
426 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, 1, 8, 4, 1, in BENCHMARK_GEMM()
429 static void qs8_gemm_4x8c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neondot() argument
430 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot, 4, 8, 4, 1, in qs8_gemm_4x8c4__neondot()
433 static void qs8_gemm_6x8c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_6x8c4__neondot() argument
434 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, 6, 8, 4, 1, in qs8_gemm_6x8c4__neondot()
437 static void qs8_gemm_8x8c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_8x8c4__neondot() argument
438 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, 8, 8, 4, 1, in qs8_gemm_8x8c4__neondot()
441 static void qs8_gemm_1x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neondot() argument
442 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot, 1, 16, 4, 1, in qs8_gemm_1x16c4__neondot()
445 static void qs8_gemm_4x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neondot() argument
446 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, 4, 16, 4, 1, in qs8_gemm_4x16c4__neondot()
449 static void qs8_gemm_6x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_6x16c4__neondot() argument
450 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, 6, 16, 4, 1, in qs8_gemm_6x16c4__neondot()
453 static void qs8_gemm_8x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_8x16c4__neondot() argument
454 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, 8, 16, 4, 1, in qs8_gemm_8x16c4__neondot()
470 static void qs8_gemm_1x8__neon_mlal_lane(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
471 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, 1, 8, 1, 1, in BENCHMARK_GEMM()
474 static void qs8_gemm_2x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_2x8__neon_mlal_lane() argument
475 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, 2, 8, 1, 1, in qs8_gemm_2x8__neon_mlal_lane()
478 static void qs8_gemm_3x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_3x8__neon_mlal_lane() argument
479 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, 3, 8, 1, 1, in qs8_gemm_3x8__neon_mlal_lane()
482 static void qs8_gemm_4x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_4x8__neon_mlal_lane() argument
483 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, 4, 8, 1, 1, in qs8_gemm_4x8__neon_mlal_lane()
486 static void qs8_gemm_6x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_6x8__neon_mlal_lane() argument
487 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, 6, 8, 1, 1, in qs8_gemm_6x8__neon_mlal_lane()
490 static void qs8_gemm_1x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_1x16__neon_mlal_lane() argument
491 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, 1, 16, 1, 1, in qs8_gemm_1x16__neon_mlal_lane()
494 static void qs8_gemm_2x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_2x16__neon_mlal_lane() argument
495 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, 2, 16, 1, 1, in qs8_gemm_2x16__neon_mlal_lane()
498 static void qs8_gemm_3x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_3x16__neon_mlal_lane() argument
499 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, 3, 16, 1, 1, in qs8_gemm_3x16__neon_mlal_lane()
502 static void qs8_gemm_4x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_4x16__neon_mlal_lane() argument
503 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, 4, 16, 1, 1, in qs8_gemm_4x16__neon_mlal_lane()
506 static void qs8_gemm_6x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_6x16__neon_mlal_lane() argument
507 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, 6, 16, 1, 1, in qs8_gemm_6x16__neon_mlal_lane()
510 static void qs8_gemm_1x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_1x8__neon_mlal_lane_prfm() argument
511 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm, 1, 8, 1, 1, in qs8_gemm_1x8__neon_mlal_lane_prfm()
514 static void qs8_gemm_2x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_2x8__neon_mlal_lane_prfm() argument
515 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane_prfm, 2, 8, 1, 1, in qs8_gemm_2x8__neon_mlal_lane_prfm()
518 static void qs8_gemm_3x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_3x8__neon_mlal_lane_prfm() argument
519 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, 3, 8, 1, 1, in qs8_gemm_3x8__neon_mlal_lane_prfm()
522 static void qs8_gemm_4x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_4x8__neon_mlal_lane_prfm() argument
523 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane_prfm, 4, 8, 1, 1, in qs8_gemm_4x8__neon_mlal_lane_prfm()
526 static void qs8_gemm_6x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_6x8__neon_mlal_lane_prfm() argument
527 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, 6, 8, 1, 1, in qs8_gemm_6x8__neon_mlal_lane_prfm()
530 static void qs8_gemm_1x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_1x16__neon_mlal_lane_prfm() argument
531 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, 1, 16, 1, 1, in qs8_gemm_1x16__neon_mlal_lane_prfm()
534 static void qs8_gemm_2x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_2x16__neon_mlal_lane_prfm() argument
535 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, 2, 16, 1, 1, in qs8_gemm_2x16__neon_mlal_lane_prfm()
538 static void qs8_gemm_3x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_3x16__neon_mlal_lane_prfm() argument
539 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, 3, 16, 1, 1, in qs8_gemm_3x16__neon_mlal_lane_prfm()
542 static void qs8_gemm_4x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_4x16__neon_mlal_lane_prfm() argument
543 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm, 4, 16, 1, 1, in qs8_gemm_4x16__neon_mlal_lane_prfm()
546 static void qs8_gemm_6x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) { in qs8_gemm_6x16__neon_mlal_lane_prfm() argument
547 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, 6, 16, 1, 1, in qs8_gemm_6x16__neon_mlal_lane_prfm()
550 static void qs8_gemm_1x8c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mull_dup() argument
551 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mull_dup()
554 static void qs8_gemm_2x8c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mull_dup() argument
555 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mull_dup()
558 static void qs8_gemm_3x8c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mull_dup() argument
559 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mull_dup()
562 static void qs8_gemm_4x8c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mull_dup() argument
563 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mull_dup()
566 static void qs8_gemm_1x16c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mull_dup() argument
567 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mull_dup()
570 static void qs8_gemm_2x16c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mull_dup() argument
571 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mull_dup()
574 static void qs8_gemm_3x16c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mull_dup() argument
575 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mull_dup()
578 static void qs8_gemm_4x16c2__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mull_dup() argument
579 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mull_dup()
582 static void qs8_gemm_1x8c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mlal_dup() argument
583 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mlal_dup()
586 static void qs8_gemm_2x8c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mlal_dup() argument
587 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mlal_dup()
590 static void qs8_gemm_3x8c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mlal_dup() argument
591 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mlal_dup()
594 static void qs8_gemm_4x8c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mlal_dup() argument
595 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mlal_dup()
598 static void qs8_gemm_1x16c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mlal_dup() argument
599 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mlal_dup()
602 static void qs8_gemm_2x16c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mlal_dup() argument
603 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mlal_dup()
606 static void qs8_gemm_3x16c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mlal_dup() argument
607 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mlal_dup()
610 static void qs8_gemm_4x16c2__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mlal_dup() argument
611 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mlal_dup()
614 static void qs8_gemm_1x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mull_ld1r() argument
615 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mull_ld1r()
618 static void qs8_gemm_2x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mull_ld1r() argument
619 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mull_ld1r()
622 static void qs8_gemm_3x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mull_ld1r() argument
623 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mull_ld1r()
626 static void qs8_gemm_4x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mull_ld1r() argument
627 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mull_ld1r()
630 static void qs8_gemm_1x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mull_ld1r() argument
631 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mull_ld1r()
634 static void qs8_gemm_2x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mull_ld1r() argument
635 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mull_ld1r()
638 static void qs8_gemm_3x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mull_ld1r() argument
639 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mull_ld1r()
642 static void qs8_gemm_4x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mull_ld1r() argument
643 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mull_ld1r()
646 static void qs8_gemm_1x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mlal_ld1r() argument
647 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mlal_ld1r()
650 static void qs8_gemm_2x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mlal_ld1r() argument
651 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mlal_ld1r()
654 static void qs8_gemm_3x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mlal_ld1r() argument
655 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mlal_ld1r()
658 static void qs8_gemm_4x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mlal_ld1r() argument
659 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mlal_ld1r()
662 static void qs8_gemm_1x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mlal_ld1r() argument
663 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mlal_ld1r()
666 static void qs8_gemm_2x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mlal_ld1r() argument
667 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mlal_ld1r()
670 static void qs8_gemm_3x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mlal_ld1r() argument
671 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mlal_ld1r()
674 static void qs8_gemm_4x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mlal_ld1r() argument
675 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mlal_ld1r()
678 static void qs8_gemm_1x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mull_ld2r() argument
679 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mull_ld2r()
682 static void qs8_gemm_2x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mull_ld2r() argument
683 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mull_ld2r()
686 static void qs8_gemm_3x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mull_ld2r() argument
687 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mull_ld2r()
690 static void qs8_gemm_4x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mull_ld2r() argument
691 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mull_ld2r()
694 static void qs8_gemm_1x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mull_ld2r() argument
695 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mull_ld2r()
698 static void qs8_gemm_2x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mull_ld2r() argument
699 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mull_ld2r()
702 static void qs8_gemm_3x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mull_ld2r() argument
703 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mull_ld2r()
706 static void qs8_gemm_4x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mull_ld2r() argument
707 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mull_ld2r()
710 static void qs8_gemm_1x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mlal_ld2r() argument
711 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mlal_ld2r()
714 static void qs8_gemm_2x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mlal_ld2r() argument
715 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mlal_ld2r()
718 static void qs8_gemm_3x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mlal_ld2r() argument
719 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mlal_ld2r()
722 static void qs8_gemm_4x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mlal_ld2r() argument
723 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mlal_ld2r()
726 static void qs8_gemm_1x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mlal_ld2r() argument
727 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mlal_ld2r()
730 static void qs8_gemm_2x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mlal_ld2r() argument
731 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mlal_ld2r()
734 static void qs8_gemm_3x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mlal_ld2r() argument
735 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mlal_ld2r()
738 static void qs8_gemm_4x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mlal_ld2r() argument
739 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mlal_ld2r()
742 static void qs8_gemm_1x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mull_ld4r() argument
743 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mull_ld4r()
746 static void qs8_gemm_2x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mull_ld4r() argument
747 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mull_ld4r()
750 static void qs8_gemm_3x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mull_ld4r() argument
751 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mull_ld4r()
754 static void qs8_gemm_4x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mull_ld4r() argument
755 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mull_ld4r()
758 static void qs8_gemm_1x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mull_ld4r() argument
759 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mull_ld4r()
762 static void qs8_gemm_2x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mull_ld4r() argument
763 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mull_ld4r()
766 static void qs8_gemm_3x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mull_ld4r() argument
767 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mull_ld4r()
770 static void qs8_gemm_4x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mull_ld4r() argument
771 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mull_ld4r()
774 static void qs8_gemm_1x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mlal_ld4r() argument
775 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, 1, 8, 2, 1, in qs8_gemm_1x8c2__neon_mlal_ld4r()
778 static void qs8_gemm_2x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mlal_ld4r() argument
779 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r, 2, 8, 2, 1, in qs8_gemm_2x8c2__neon_mlal_ld4r()
782 static void qs8_gemm_3x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mlal_ld4r() argument
783 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, 3, 8, 2, 1, in qs8_gemm_3x8c2__neon_mlal_ld4r()
786 static void qs8_gemm_4x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mlal_ld4r() argument
787 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, 4, 8, 2, 1, in qs8_gemm_4x8c2__neon_mlal_ld4r()
790 static void qs8_gemm_1x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mlal_ld4r() argument
791 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, 1, 16, 2, 1, in qs8_gemm_1x16c2__neon_mlal_ld4r()
794 static void qs8_gemm_2x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mlal_ld4r() argument
795 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, 2, 16, 2, 1, in qs8_gemm_2x16c2__neon_mlal_ld4r()
798 static void qs8_gemm_3x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mlal_ld4r() argument
799 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, 3, 16, 2, 1, in qs8_gemm_3x16c2__neon_mlal_ld4r()
802 static void qs8_gemm_4x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mlal_ld4r() argument
803 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, 4, 16, 2, 1, in qs8_gemm_4x16c2__neon_mlal_ld4r()
806 static void qs8_gemm_1x8c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2s4__neon_mull() argument
807 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, 1, 8, 2, 4, in qs8_gemm_1x8c2s4__neon_mull()
810 static void qs8_gemm_2x8c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2s4__neon_mull() argument
811 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, 2, 8, 2, 4, in qs8_gemm_2x8c2s4__neon_mull()
814 static void qs8_gemm_3x8c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2s4__neon_mull() argument
815 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, 3, 8, 2, 4, in qs8_gemm_3x8c2s4__neon_mull()
818 static void qs8_gemm_4x8c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2s4__neon_mull() argument
819 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, 4, 8, 2, 4, in qs8_gemm_4x8c2s4__neon_mull()
822 static void qs8_gemm_1x16c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2s4__neon_mull() argument
823 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, 1, 16, 2, 4, in qs8_gemm_1x16c2s4__neon_mull()
826 static void qs8_gemm_2x16c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2s4__neon_mull() argument
827 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, 2, 16, 2, 4, in qs8_gemm_2x16c2s4__neon_mull()
830 static void qs8_gemm_3x16c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2s4__neon_mull() argument
831 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, 3, 16, 2, 4, in qs8_gemm_3x16c2s4__neon_mull()
834 static void qs8_gemm_4x16c2s4__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2s4__neon_mull() argument
835 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, 4, 16, 2, 4, in qs8_gemm_4x16c2s4__neon_mull()
838 static void qs8_gemm_1x8c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2s4__neon_mlal() argument
839 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, 1, 8, 2, 4, in qs8_gemm_1x8c2s4__neon_mlal()
842 static void qs8_gemm_2x8c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2s4__neon_mlal() argument
843 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal, 2, 8, 2, 4, in qs8_gemm_2x8c2s4__neon_mlal()
846 static void qs8_gemm_3x8c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2s4__neon_mlal() argument
847 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, 3, 8, 2, 4, in qs8_gemm_3x8c2s4__neon_mlal()
850 static void qs8_gemm_4x8c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2s4__neon_mlal() argument
851 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, 4, 8, 2, 4, in qs8_gemm_4x8c2s4__neon_mlal()
854 static void qs8_gemm_1x16c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2s4__neon_mlal() argument
855 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, 1, 16, 2, 4, in qs8_gemm_1x16c2s4__neon_mlal()
858 static void qs8_gemm_2x16c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2s4__neon_mlal() argument
859 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, 2, 16, 2, 4, in qs8_gemm_2x16c2s4__neon_mlal()
862 static void qs8_gemm_3x16c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2s4__neon_mlal() argument
863 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, 3, 16, 2, 4, in qs8_gemm_3x16c2s4__neon_mlal()
866 static void qs8_gemm_4x16c2s4__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2s4__neon_mlal() argument
867 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, 4, 16, 2, 4, in qs8_gemm_4x16c2s4__neon_mlal()
870 static void qs8_gemm_1x8c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x8c4__neon_mull_dup() argument
871 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, 1, 8, 4, 1, in qs8_gemm_1x8c4__neon_mull_dup()
874 static void qs8_gemm_2x8c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x8c4__neon_mull_dup() argument
875 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, 2, 8, 4, 1, in qs8_gemm_2x8c4__neon_mull_dup()
878 static void qs8_gemm_3x8c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x8c4__neon_mull_dup() argument
879 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, 3, 8, 4, 1, in qs8_gemm_3x8c4__neon_mull_dup()
882 static void qs8_gemm_4x8c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neon_mull_dup() argument
883 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, 4, 8, 4, 1, in qs8_gemm_4x8c4__neon_mull_dup()
886 static void qs8_gemm_1x16c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neon_mull_dup() argument
887 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, 1, 16, 4, 1, in qs8_gemm_1x16c4__neon_mull_dup()
890 static void qs8_gemm_2x16c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x16c4__neon_mull_dup() argument
891 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, 2, 16, 4, 1, in qs8_gemm_2x16c4__neon_mull_dup()
894 static void qs8_gemm_3x16c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x16c4__neon_mull_dup() argument
895 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, 3, 16, 4, 1, in qs8_gemm_3x16c4__neon_mull_dup()
898 static void qs8_gemm_4x16c4__neon_mull_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neon_mull_dup() argument
899 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, 4, 16, 4, 1, in qs8_gemm_4x16c4__neon_mull_dup()
902 static void qs8_gemm_1x8c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x8c4__neon_mlal_dup() argument
903 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, 1, 8, 4, 1, in qs8_gemm_1x8c4__neon_mlal_dup()
906 static void qs8_gemm_2x8c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x8c4__neon_mlal_dup() argument
907 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, 2, 8, 4, 1, in qs8_gemm_2x8c4__neon_mlal_dup()
910 static void qs8_gemm_3x8c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x8c4__neon_mlal_dup() argument
911 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, 3, 8, 4, 1, in qs8_gemm_3x8c4__neon_mlal_dup()
914 static void qs8_gemm_4x8c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neon_mlal_dup() argument
915 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, 4, 8, 4, 1, in qs8_gemm_4x8c4__neon_mlal_dup()
918 static void qs8_gemm_1x16c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neon_mlal_dup() argument
919 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, 1, 16, 4, 1, in qs8_gemm_1x16c4__neon_mlal_dup()
922 static void qs8_gemm_2x16c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x16c4__neon_mlal_dup() argument
923 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, 2, 16, 4, 1, in qs8_gemm_2x16c4__neon_mlal_dup()
926 static void qs8_gemm_3x16c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x16c4__neon_mlal_dup() argument
927 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, 3, 16, 4, 1, in qs8_gemm_3x16c4__neon_mlal_dup()
930 static void qs8_gemm_4x16c4__neon_mlal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neon_mlal_dup() argument
931 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, 4, 16, 4, 1, in qs8_gemm_4x16c4__neon_mlal_dup()
934 static void qs8_gemm_1x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c4__neon_mull_ld1r() argument
935 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, 1, 8, 4, 1, in qs8_gemm_1x8c4__neon_mull_ld1r()
938 static void qs8_gemm_2x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c4__neon_mull_ld1r() argument
939 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, 2, 8, 4, 1, in qs8_gemm_2x8c4__neon_mull_ld1r()
942 static void qs8_gemm_3x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c4__neon_mull_ld1r() argument
943 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, 3, 8, 4, 1, in qs8_gemm_3x8c4__neon_mull_ld1r()
946 static void qs8_gemm_4x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neon_mull_ld1r() argument
947 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, 4, 8, 4, 1, in qs8_gemm_4x8c4__neon_mull_ld1r()
950 static void qs8_gemm_1x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neon_mull_ld1r() argument
951 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, 1, 16, 4, 1, in qs8_gemm_1x16c4__neon_mull_ld1r()
954 static void qs8_gemm_2x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c4__neon_mull_ld1r() argument
955 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r, 2, 16, 4, 1, in qs8_gemm_2x16c4__neon_mull_ld1r()
958 static void qs8_gemm_3x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c4__neon_mull_ld1r() argument
959 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, 3, 16, 4, 1, in qs8_gemm_3x16c4__neon_mull_ld1r()
962 static void qs8_gemm_4x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neon_mull_ld1r() argument
963 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, 4, 16, 4, 1, in qs8_gemm_4x16c4__neon_mull_ld1r()
966 static void qs8_gemm_1x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c4__neon_mlal_ld1r() argument
967 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, 1, 8, 4, 1, in qs8_gemm_1x8c4__neon_mlal_ld1r()
970 static void qs8_gemm_2x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c4__neon_mlal_ld1r() argument
971 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, 2, 8, 4, 1, in qs8_gemm_2x8c4__neon_mlal_ld1r()
974 static void qs8_gemm_3x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c4__neon_mlal_ld1r() argument
975 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, 3, 8, 4, 1, in qs8_gemm_3x8c4__neon_mlal_ld1r()
978 static void qs8_gemm_4x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neon_mlal_ld1r() argument
979 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, 4, 8, 4, 1, in qs8_gemm_4x8c4__neon_mlal_ld1r()
982 static void qs8_gemm_1x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neon_mlal_ld1r() argument
983 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, 1, 16, 4, 1, in qs8_gemm_1x16c4__neon_mlal_ld1r()
986 static void qs8_gemm_2x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c4__neon_mlal_ld1r() argument
987 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r, 2, 16, 4, 1, in qs8_gemm_2x16c4__neon_mlal_ld1r()
990 static void qs8_gemm_3x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c4__neon_mlal_ld1r() argument
991 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, 3, 16, 4, 1, in qs8_gemm_3x16c4__neon_mlal_ld1r()
994 static void qs8_gemm_4x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neon_mlal_ld1r() argument
995 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, 4, 16, 4, 1, in qs8_gemm_4x16c4__neon_mlal_ld1r()
998 static void qs8_gemm_1x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c4__neon_mull_ld2r() argument
999 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, 1, 8, 4, 1, in qs8_gemm_1x8c4__neon_mull_ld2r()
1002 static void qs8_gemm_2x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c4__neon_mull_ld2r() argument
1003 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, 2, 8, 4, 1, in qs8_gemm_2x8c4__neon_mull_ld2r()
1006 static void qs8_gemm_3x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c4__neon_mull_ld2r() argument
1007 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, 3, 8, 4, 1, in qs8_gemm_3x8c4__neon_mull_ld2r()
1010 static void qs8_gemm_4x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neon_mull_ld2r() argument
1011 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, 4, 8, 4, 1, in qs8_gemm_4x8c4__neon_mull_ld2r()
1014 static void qs8_gemm_1x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neon_mull_ld2r() argument
1015 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, 1, 16, 4, 1, in qs8_gemm_1x16c4__neon_mull_ld2r()
1018 static void qs8_gemm_2x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c4__neon_mull_ld2r() argument
1019 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, 2, 16, 4, 1, in qs8_gemm_2x16c4__neon_mull_ld2r()
1022 static void qs8_gemm_3x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c4__neon_mull_ld2r() argument
1023 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, 3, 16, 4, 1, in qs8_gemm_3x16c4__neon_mull_ld2r()
1026 static void qs8_gemm_4x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neon_mull_ld2r() argument
1027 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, 4, 16, 4, 1, in qs8_gemm_4x16c4__neon_mull_ld2r()
1030 static void qs8_gemm_1x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x8c4__neon_mlal_ld2r() argument
1031 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, 1, 8, 4, 1, in qs8_gemm_1x8c4__neon_mlal_ld2r()
1034 static void qs8_gemm_2x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x8c4__neon_mlal_ld2r() argument
1035 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, 2, 8, 4, 1, in qs8_gemm_2x8c4__neon_mlal_ld2r()
1038 static void qs8_gemm_3x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x8c4__neon_mlal_ld2r() argument
1039 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, 3, 8, 4, 1, in qs8_gemm_3x8c4__neon_mlal_ld2r()
1042 static void qs8_gemm_4x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neon_mlal_ld2r() argument
1043 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, 4, 8, 4, 1, in qs8_gemm_4x8c4__neon_mlal_ld2r()
1046 static void qs8_gemm_1x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neon_mlal_ld2r() argument
1047 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, 1, 16, 4, 1, in qs8_gemm_1x16c4__neon_mlal_ld2r()
1050 static void qs8_gemm_2x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_2x16c4__neon_mlal_ld2r() argument
1051 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, 2, 16, 4, 1, in qs8_gemm_2x16c4__neon_mlal_ld2r()
1054 static void qs8_gemm_3x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_3x16c4__neon_mlal_ld2r() argument
1055 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, 3, 16, 4, 1, in qs8_gemm_3x16c4__neon_mlal_ld2r()
1058 static void qs8_gemm_4x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neon_mlal_ld2r() argument
1059 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, 4, 16, 4, 1, in qs8_gemm_4x16c4__neon_mlal_ld2r()
1062 static void qs8_gemm_1x8c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_1x8c8__neon_mull() argument
1063 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mull, 1, 8, 8, 1, in qs8_gemm_1x8c8__neon_mull()
1066 static void qs8_gemm_2x8c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__neon_mull() argument
1067 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, 2, 8, 8, 1, in qs8_gemm_2x8c8__neon_mull()
1070 static void qs8_gemm_3x8c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_3x8c8__neon_mull() argument
1071 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, 3, 8, 8, 1, in qs8_gemm_3x8c8__neon_mull()
1074 static void qs8_gemm_4x8c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_4x8c8__neon_mull() argument
1075 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, 4, 8, 8, 1, in qs8_gemm_4x8c8__neon_mull()
1078 static void qs8_gemm_1x16c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_1x16c8__neon_mull() argument
1079 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, 1, 16, 8, 1, in qs8_gemm_1x16c8__neon_mull()
1082 static void qs8_gemm_2x16c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_2x16c8__neon_mull() argument
1083 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull, 2, 16, 8, 1, in qs8_gemm_2x16c8__neon_mull()
1086 static void qs8_gemm_3x16c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_3x16c8__neon_mull() argument
1087 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, 3, 16, 8, 1, in qs8_gemm_3x16c8__neon_mull()
1090 static void qs8_gemm_4x16c8__neon_mull(benchmark::State& state, const char* net) { in qs8_gemm_4x16c8__neon_mull() argument
1091 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull, 4, 16, 8, 1, in qs8_gemm_4x16c8__neon_mull()
1094 static void qs8_gemm_1x8c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_1x8c8__neon_mlal() argument
1095 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, 1, 8, 8, 1, in qs8_gemm_1x8c8__neon_mlal()
1098 static void qs8_gemm_2x8c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__neon_mlal() argument
1099 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, 2, 8, 8, 1, in qs8_gemm_2x8c8__neon_mlal()
1102 static void qs8_gemm_3x8c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_3x8c8__neon_mlal() argument
1103 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, 3, 8, 8, 1, in qs8_gemm_3x8c8__neon_mlal()
1106 static void qs8_gemm_4x8c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_4x8c8__neon_mlal() argument
1107 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, 4, 8, 8, 1, in qs8_gemm_4x8c8__neon_mlal()
1110 static void qs8_gemm_1x16c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_1x16c8__neon_mlal() argument
1111 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal, 1, 16, 8, 1, in qs8_gemm_1x16c8__neon_mlal()
1114 static void qs8_gemm_2x16c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x16c8__neon_mlal() argument
1115 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, 2, 16, 8, 1, in qs8_gemm_2x16c8__neon_mlal()
1118 static void qs8_gemm_3x16c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_3x16c8__neon_mlal() argument
1119 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, 3, 16, 8, 1, in qs8_gemm_3x16c8__neon_mlal()
1122 static void qs8_gemm_4x16c8__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_4x16c8__neon_mlal() argument
1123 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, 4, 16, 8, 1, in qs8_gemm_4x16c8__neon_mlal()
1126 static void qs8_gemm_1x8c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_1x8c16__neon_mlal() argument
1127 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, 1, 8, 16, 1, in qs8_gemm_1x8c16__neon_mlal()
1130 static void qs8_gemm_2x8c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c16__neon_mlal() argument
1131 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, 2, 8, 16, 1, in qs8_gemm_2x8c16__neon_mlal()
1134 static void qs8_gemm_3x8c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_3x8c16__neon_mlal() argument
1135 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, 3, 8, 16, 1, in qs8_gemm_3x8c16__neon_mlal()
1138 static void qs8_gemm_4x8c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_4x8c16__neon_mlal() argument
1139 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, 4, 8, 16, 1, in qs8_gemm_4x8c16__neon_mlal()
1142 static void qs8_gemm_1x16c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_1x16c16__neon_mlal() argument
1143 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, 1, 16, 16, 1, in qs8_gemm_1x16c16__neon_mlal()
1146 static void qs8_gemm_2x16c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_2x16c16__neon_mlal() argument
1147 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, 2, 16, 16, 1, in qs8_gemm_2x16c16__neon_mlal()
1150 static void qs8_gemm_3x16c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_3x16c16__neon_mlal() argument
1151 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, 3, 16, 16, 1, in qs8_gemm_3x16c16__neon_mlal()
1154 static void qs8_gemm_4x16c16__neon_mlal(benchmark::State& state, const char* net) { in qs8_gemm_4x16c16__neon_mlal() argument
1155 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, 4, 16, 16, 1, in qs8_gemm_4x16c16__neon_mlal()
1335 static void qs8_gemm_1x1c4__armsimd32(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
1336 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, 1, 1, 4, 1, in BENCHMARK_GEMM()
1339 static void qs8_gemm_2x1c4__armsimd32(benchmark::State& state, const char* net) { in qs8_gemm_2x1c4__armsimd32() argument
1340 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, 2, 1, 4, 1, in qs8_gemm_2x1c4__armsimd32()
1343 static void qs8_gemm_1x2c4__armsimd32(benchmark::State& state, const char* net) { in qs8_gemm_1x2c4__armsimd32() argument
1344 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, 1, 2, 4, 1, in qs8_gemm_1x2c4__armsimd32()
1347 static void qs8_gemm_2x2c4__armsimd32(benchmark::State& state, const char* net) { in qs8_gemm_2x2c4__armsimd32() argument
1348 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, 2, 2, 4, 1, in qs8_gemm_2x2c4__armsimd32()
1360 static void qs8_gemm_2x16c8__avx512skx(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
1361 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, 2, 16, 8, 1, in BENCHMARK_GEMM()
1364 static void qs8_gemm_3x16c8__avx512skx(benchmark::State& state, const char* net) { in qs8_gemm_3x16c8__avx512skx() argument
1365 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, 3, 16, 8, 1, in qs8_gemm_3x16c8__avx512skx()
1368 static void qs8_gemm_4x16c8__avx512skx(benchmark::State& state, const char* net) { in qs8_gemm_4x16c8__avx512skx() argument
1369 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, 4, 16, 8, 1, in qs8_gemm_4x16c8__avx512skx()
1373 static void qs8_gemm_2x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__avx2() argument
1374 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, 2, 8, 8, 1, in qs8_gemm_2x8c8__avx2()
1377 static void qs8_gemm_3x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_3x8c8__avx2() argument
1378 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, 3, 8, 8, 1, in qs8_gemm_3x8c8__avx2()
1382 static void qs8_gemm_xw_2x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x8c8__avx2() argument
1383 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, 2, 8, 8, 1, in qs8_gemm_xw_2x8c8__avx2()
1386 static void qs8_gemm_xw_3x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x8c8__avx2() argument
1387 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, 3, 8, 8, 1, in qs8_gemm_xw_3x8c8__avx2()
1391 static void qs8_gemm_2x4c2__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__xop_ld64() argument
1392 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, 2, 4, 2, 1, in qs8_gemm_2x4c2__xop_ld64()
1395 static void qs8_gemm_3x4c2__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__xop_ld64() argument
1396 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, 3, 4, 2, 1, in qs8_gemm_3x4c2__xop_ld64()
1399 static void qs8_gemm_4x4c2__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__xop_ld64() argument
1400 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, 4, 4, 2, 1, in qs8_gemm_4x4c2__xop_ld64()
1404 static void qs8_gemm_2x4c2__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__xop_ld128() argument
1405 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, 2, 4, 2, 1, in qs8_gemm_2x4c2__xop_ld128()
1408 static void qs8_gemm_3x4c2__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__xop_ld128() argument
1409 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, 3, 4, 2, 1, in qs8_gemm_3x4c2__xop_ld128()
1412 static void qs8_gemm_4x4c2__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__xop_ld128() argument
1413 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, 4, 4, 2, 1, in qs8_gemm_4x4c2__xop_ld128()
1417 static void qs8_gemm_xw_2x4c2__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2__xop() argument
1418 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__xop, 2, 4, 2, 1, in qs8_gemm_xw_2x4c2__xop()
1421 static void qs8_gemm_xw_3x4c2__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2__xop() argument
1422 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__xop, 3, 4, 2, 1, in qs8_gemm_xw_3x4c2__xop()
1425 static void qs8_gemm_xw_4x4c2__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__xop() argument
1426 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, 4, 4, 2, 1, in qs8_gemm_xw_4x4c2__xop()
1430 static void qs8_gemm_2x4c2s4__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__xop_ld64() argument
1431 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__xop_ld64()
1434 static void qs8_gemm_3x4c2s4__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__xop_ld64() argument
1435 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__xop_ld64()
1438 static void qs8_gemm_4x4c2s4__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__xop_ld64() argument
1439 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__xop_ld64()
1443 static void qs8_gemm_2x4c2s4__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__xop_ld128() argument
1444 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__xop_ld128()
1447 static void qs8_gemm_3x4c2s4__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__xop_ld128() argument
1448 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__xop_ld128()
1451 static void qs8_gemm_4x4c2s4__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__xop_ld128() argument
1452 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__xop_ld128()
1456 static void qs8_gemm_xw_2x4c2s4__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2s4__xop() argument
1457 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2s4__xop, 2, 4, 2, 4, in qs8_gemm_xw_2x4c2s4__xop()
1460 static void qs8_gemm_xw_3x4c2s4__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2s4__xop() argument
1461 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__xop, 3, 4, 2, 4, in qs8_gemm_xw_3x4c2s4__xop()
1464 static void qs8_gemm_xw_4x4c2s4__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2s4__xop() argument
1465 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2s4__xop, 4, 4, 2, 4, in qs8_gemm_xw_4x4c2s4__xop()
1469 static void qs8_gemm_2x4c8__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__xop_ld64() argument
1470 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, 2, 4, 8, 1, in qs8_gemm_2x4c8__xop_ld64()
1473 static void qs8_gemm_3x4c8__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__xop_ld64() argument
1474 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, 3, 4, 8, 1, in qs8_gemm_3x4c8__xop_ld64()
1478 static void qs8_gemm_2x4c8__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__xop_ld128() argument
1479 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, 2, 4, 8, 1, in qs8_gemm_2x4c8__xop_ld128()
1482 static void qs8_gemm_3x4c8__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__xop_ld128() argument
1483 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, 3, 4, 8, 1, in qs8_gemm_3x4c8__xop_ld128()
1487 static void qs8_gemm_xw_2x4c8__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__xop() argument
1488 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__xop, 2, 4, 8, 1, in qs8_gemm_xw_2x4c8__xop()
1491 static void qs8_gemm_xw_3x4c8__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__xop() argument
1492 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__xop, 3, 4, 8, 1, in qs8_gemm_xw_3x4c8__xop()
1496 static void qs8_gemm_2x4c2__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__avx_ld64() argument
1497 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, 2, 4, 2, 1, in qs8_gemm_2x4c2__avx_ld64()
1500 static void qs8_gemm_3x4c2__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__avx_ld64() argument
1501 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, 3, 4, 2, 1, in qs8_gemm_3x4c2__avx_ld64()
1504 static void qs8_gemm_4x4c2__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__avx_ld64() argument
1505 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, 4, 4, 2, 1, in qs8_gemm_4x4c2__avx_ld64()
1509 static void qs8_gemm_2x4c2__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__avx_ld128() argument
1510 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, 2, 4, 2, 1, in qs8_gemm_2x4c2__avx_ld128()
1513 static void qs8_gemm_3x4c2__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__avx_ld128() argument
1514 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, 3, 4, 2, 1, in qs8_gemm_3x4c2__avx_ld128()
1517 static void qs8_gemm_4x4c2__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__avx_ld128() argument
1518 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, 4, 4, 2, 1, in qs8_gemm_4x4c2__avx_ld128()
1522 static void qs8_gemm_xw_2x4c2__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2__avx() argument
1523 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, 2, 4, 2, 1, in qs8_gemm_xw_2x4c2__avx()
1526 static void qs8_gemm_xw_3x4c2__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2__avx() argument
1527 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, 3, 4, 2, 1, in qs8_gemm_xw_3x4c2__avx()
1530 static void qs8_gemm_xw_4x4c2__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__avx() argument
1531 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__avx, 4, 4, 2, 1, in qs8_gemm_xw_4x4c2__avx()
1535 static void qs8_gemm_2x4c2s4__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__avx_ld64() argument
1536 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__avx_ld64()
1539 static void qs8_gemm_3x4c2s4__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__avx_ld64() argument
1540 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__avx_ld64()
1543 static void qs8_gemm_4x4c2s4__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__avx_ld64() argument
1544 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__avx_ld64()
1548 static void qs8_gemm_2x4c2s4__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__avx_ld128() argument
1549 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__avx_ld128()
1552 static void qs8_gemm_3x4c2s4__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__avx_ld128() argument
1553 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__avx_ld128()
1556 static void qs8_gemm_4x4c2s4__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__avx_ld128() argument
1557 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__avx_ld128()
1561 static void qs8_gemm_xw_2x4c2s4__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2s4__avx() argument
1562 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2s4__avx, 2, 4, 2, 4, in qs8_gemm_xw_2x4c2s4__avx()
1565 static void qs8_gemm_xw_3x4c2s4__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2s4__avx() argument
1566 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__avx, 3, 4, 2, 4, in qs8_gemm_xw_3x4c2s4__avx()
1569 static void qs8_gemm_xw_4x4c2s4__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2s4__avx() argument
1570 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2s4__avx, 4, 4, 2, 4, in qs8_gemm_xw_4x4c2s4__avx()
1574 static void qs8_gemm_2x4c8__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__avx_ld64() argument
1575 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, 2, 4, 8, 1, in qs8_gemm_2x4c8__avx_ld64()
1578 static void qs8_gemm_3x4c8__avx_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__avx_ld64() argument
1579 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, 3, 4, 8, 1, in qs8_gemm_3x4c8__avx_ld64()
1583 static void qs8_gemm_2x4c8__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__avx_ld128() argument
1584 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, 2, 4, 8, 1, in qs8_gemm_2x4c8__avx_ld128()
1587 static void qs8_gemm_3x4c8__avx_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__avx_ld128() argument
1588 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, 3, 4, 8, 1, in qs8_gemm_3x4c8__avx_ld128()
1592 static void qs8_gemm_xw_2x4c8__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__avx() argument
1593 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, 2, 4, 8, 1, in qs8_gemm_xw_2x4c8__avx()
1596 static void qs8_gemm_xw_3x4c8__avx(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__avx() argument
1597 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, 3, 4, 8, 1, in qs8_gemm_xw_3x4c8__avx()
1601 static void qs8_gemm_2x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__sse41_ld64() argument
1602 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, 2, 4, 2, 1, in qs8_gemm_2x4c2__sse41_ld64()
1605 static void qs8_gemm_3x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__sse41_ld64() argument
1606 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, 3, 4, 2, 1, in qs8_gemm_3x4c2__sse41_ld64()
1609 static void qs8_gemm_4x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__sse41_ld64() argument
1610 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, 4, 4, 2, 1, in qs8_gemm_4x4c2__sse41_ld64()
1614 static void qs8_gemm_2x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__sse41_ld128() argument
1615 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, 2, 4, 2, 1, in qs8_gemm_2x4c2__sse41_ld128()
1618 static void qs8_gemm_3x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__sse41_ld128() argument
1619 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, 3, 4, 2, 1, in qs8_gemm_3x4c2__sse41_ld128()
1622 static void qs8_gemm_4x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__sse41_ld128() argument
1623 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, 4, 4, 2, 1, in qs8_gemm_4x4c2__sse41_ld128()
1627 static void qs8_gemm_xw_2x4c2__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2__sse41() argument
1628 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse41, 2, 4, 2, 1, in qs8_gemm_xw_2x4c2__sse41()
1631 static void qs8_gemm_xw_3x4c2__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2__sse41() argument
1632 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, 3, 4, 2, 1, in qs8_gemm_xw_3x4c2__sse41()
1635 static void qs8_gemm_xw_4x4c2__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__sse41() argument
1636 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__sse41, 4, 4, 2, 1, in qs8_gemm_xw_4x4c2__sse41()
1640 static void qs8_gemm_2x4c2s4__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__sse41_ld64() argument
1641 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__sse41_ld64()
1644 static void qs8_gemm_3x4c2s4__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__sse41_ld64() argument
1645 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__sse41_ld64()
1648 static void qs8_gemm_4x4c2s4__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__sse41_ld64() argument
1649 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__sse41_ld64()
1653 static void qs8_gemm_2x4c2s4__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__sse41_ld128() argument
1654 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__sse41_ld128()
1657 static void qs8_gemm_3x4c2s4__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__sse41_ld128() argument
1658 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__sse41_ld128()
1661 static void qs8_gemm_4x4c2s4__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__sse41_ld128() argument
1662 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__sse41_ld128()
1666 static void qs8_gemm_xw_2x4c2s4__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2s4__sse41() argument
1667 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2s4__sse41, 2, 4, 2, 4, in qs8_gemm_xw_2x4c2s4__sse41()
1670 static void qs8_gemm_xw_3x4c2s4__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2s4__sse41() argument
1671 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse41, 3, 4, 2, 4, in qs8_gemm_xw_3x4c2s4__sse41()
1674 static void qs8_gemm_xw_4x4c2s4__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2s4__sse41() argument
1675 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2s4__sse41, 4, 4, 2, 4, in qs8_gemm_xw_4x4c2s4__sse41()
1679 static void qs8_gemm_2x4c8__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse41_ld64() argument
1680 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, 2, 4, 8, 1, in qs8_gemm_2x4c8__sse41_ld64()
1683 static void qs8_gemm_3x4c8__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse41_ld64() argument
1684 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, 3, 4, 8, 1, in qs8_gemm_3x4c8__sse41_ld64()
1688 static void qs8_gemm_2x4c8__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse41_ld128() argument
1689 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, 2, 4, 8, 1, in qs8_gemm_2x4c8__sse41_ld128()
1692 static void qs8_gemm_3x4c8__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse41_ld128() argument
1693 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, 3, 4, 8, 1, in qs8_gemm_3x4c8__sse41_ld128()
1697 static void qs8_gemm_xw_2x4c8__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__sse41() argument
1698 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse41, 2, 4, 8, 1, in qs8_gemm_xw_2x4c8__sse41()
1701 static void qs8_gemm_xw_3x4c8__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__sse41() argument
1702 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, 3, 4, 8, 1, in qs8_gemm_xw_3x4c8__sse41()
1706 static void qs8_gemm_2x4c8__ssse3_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__ssse3_ld64() argument
1707 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, 2, 4, 8, 1, in qs8_gemm_2x4c8__ssse3_ld64()
1710 static void qs8_gemm_3x4c8__ssse3_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__ssse3_ld64() argument
1711 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, 3, 4, 8, 1, in qs8_gemm_3x4c8__ssse3_ld64()
1715 static void qs8_gemm_2x4c8__ssse3_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__ssse3_ld128() argument
1716 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, 2, 4, 8, 1, in qs8_gemm_2x4c8__ssse3_ld128()
1719 static void qs8_gemm_3x4c8__ssse3_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__ssse3_ld128() argument
1720 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, 3, 4, 8, 1, in qs8_gemm_3x4c8__ssse3_ld128()
1724 static void qs8_gemm_xw_2x4c8__ssse3(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__ssse3() argument
1725 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, 2, 4, 8, 1, in qs8_gemm_xw_2x4c8__ssse3()
1728 static void qs8_gemm_xw_3x4c8__ssse3(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__ssse3() argument
1729 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__ssse3, 3, 4, 8, 1, in qs8_gemm_xw_3x4c8__ssse3()
1733 static void qs8_gemm_2x4c2__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__sse2_ld64() argument
1734 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, 2, 4, 2, 1, in qs8_gemm_2x4c2__sse2_ld64()
1737 static void qs8_gemm_3x4c2__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__sse2_ld64() argument
1738 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, 3, 4, 2, 1, in qs8_gemm_3x4c2__sse2_ld64()
1741 static void qs8_gemm_4x4c2__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__sse2_ld64() argument
1742 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, 4, 4, 2, 1, in qs8_gemm_4x4c2__sse2_ld64()
1746 static void qs8_gemm_2x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__sse2_ld128() argument
1747 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, 2, 4, 2, 1, in qs8_gemm_2x4c2__sse2_ld128()
1750 static void qs8_gemm_3x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__sse2_ld128() argument
1751 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, 3, 4, 2, 1, in qs8_gemm_3x4c2__sse2_ld128()
1754 static void qs8_gemm_4x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__sse2_ld128() argument
1755 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, 4, 4, 2, 1, in qs8_gemm_4x4c2__sse2_ld128()
1759 static void qs8_gemm_xw_2x4c2__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2__sse2() argument
1760 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, 2, 4, 2, 1, in qs8_gemm_xw_2x4c2__sse2()
1763 static void qs8_gemm_xw_3x4c2__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2__sse2() argument
1764 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, 3, 4, 2, 1, in qs8_gemm_xw_3x4c2__sse2()
1767 static void qs8_gemm_xw_4x4c2__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__sse2() argument
1768 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__sse2, 4, 4, 2, 1, in qs8_gemm_xw_4x4c2__sse2()
1772 static void qs8_gemm_2x4c2s4__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__sse2_ld64() argument
1773 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__sse2_ld64()
1776 static void qs8_gemm_3x4c2s4__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__sse2_ld64() argument
1777 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__sse2_ld64()
1780 static void qs8_gemm_4x4c2s4__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__sse2_ld64() argument
1781 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__sse2_ld64()
1785 static void qs8_gemm_2x4c2s4__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__sse2_ld128() argument
1786 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__sse2_ld128()
1789 static void qs8_gemm_3x4c2s4__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__sse2_ld128() argument
1790 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__sse2_ld128()
1793 static void qs8_gemm_4x4c2s4__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__sse2_ld128() argument
1794 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__sse2_ld128()
1798 static void qs8_gemm_xw_2x4c2s4__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2s4__sse2() argument
1799 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2s4__sse2, 2, 4, 2, 4, in qs8_gemm_xw_2x4c2s4__sse2()
1802 static void qs8_gemm_xw_3x4c2s4__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2s4__sse2() argument
1803 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse2, 3, 4, 2, 4, in qs8_gemm_xw_3x4c2s4__sse2()
1806 static void qs8_gemm_xw_4x4c2s4__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2s4__sse2() argument
1807 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2s4__sse2, 4, 4, 2, 4, in qs8_gemm_xw_4x4c2s4__sse2()
1811 static void qs8_gemm_2x4c8__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse2_ld64() argument
1812 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, 2, 4, 8, 1, in qs8_gemm_2x4c8__sse2_ld64()
1815 static void qs8_gemm_3x4c8__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse2_ld64() argument
1816 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, 3, 4, 8, 1, in qs8_gemm_3x4c8__sse2_ld64()
1820 static void qs8_gemm_2x4c8__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse2_ld128() argument
1821 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, 2, 4, 8, 1, in qs8_gemm_2x4c8__sse2_ld128()
1824 static void qs8_gemm_3x4c8__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse2_ld128() argument
1825 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, 3, 4, 8, 1, in qs8_gemm_3x4c8__sse2_ld128()
1829 static void qs8_gemm_xw_2x4c8__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__sse2() argument
1830 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, 2, 4, 8, 1, in qs8_gemm_xw_2x4c8__sse2()
1833 static void qs8_gemm_xw_3x4c8__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__sse2() argument
1834 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, 3, 4, 8, 1, in qs8_gemm_xw_3x4c8__sse2()
1921 static void qs8_gemm_2x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
1922 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, 2, 4, 2, 1, in BENCHMARK_GEMM()
1925 static void qs8_gemm_3x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__wasmsimd_dot16x2_ld64() argument
1926 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, 3, 4, 2, 1, in qs8_gemm_3x4c2__wasmsimd_dot16x2_ld64()
1929 static void qs8_gemm_4x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__wasmsimd_dot16x2_ld64() argument
1930 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, 4, 4, 2, 1, in qs8_gemm_4x4c2__wasmsimd_dot16x2_ld64()
1934 static void qs8_gemm_2x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2__wasmsimd_dot16x2_ld128() argument
1935 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, 2, 4, 2, 1, in qs8_gemm_2x4c2__wasmsimd_dot16x2_ld128()
1938 static void qs8_gemm_3x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2__wasmsimd_dot16x2_ld128() argument
1939 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, 3, 4, 2, 1, in qs8_gemm_3x4c2__wasmsimd_dot16x2_ld128()
1942 static void qs8_gemm_4x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__wasmsimd_dot16x2_ld128() argument
1943 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, 4, 4, 2, 1, in qs8_gemm_4x4c2__wasmsimd_dot16x2_ld128()
1947 static void qs8_gemm_xw_2x4c2__wasmsimd_dot16x2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c2__wasmsimd_dot16x2() argument
1948 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2, 2, 4, 2, 1, in qs8_gemm_xw_2x4c2__wasmsimd_dot16x2()
1951 static void qs8_gemm_xw_3x4c2__wasmsimd_dot16x2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c2__wasmsimd_dot16x2() argument
1952 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2, 3, 4, 2, 1, in qs8_gemm_xw_3x4c2__wasmsimd_dot16x2()
1955 static void qs8_gemm_xw_4x4c2__wasmsimd_dot16x2(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__wasmsimd_dot16x2() argument
1956 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2, 4, 4, 2, 1, in qs8_gemm_xw_4x4c2__wasmsimd_dot16x2()
1960 static void qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64() argument
1961 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64()
1964 static void qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64() argument
1965 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64()
1968 static void qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64() argument
1969 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64()
1973 static void qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128() argument
1974 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, 2, 4, 2, 4, in qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128()
1977 static void qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128() argument
1978 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, 3, 4, 2, 4, in qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128()
1981 static void qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128() argument
1982 … GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, 4, 4, 2, 4, in qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128()
1986 static void qs8_gemm_2x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__wasmsimd_dot16x2_ld64() argument
1987 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, 2, 4, 8, 1, in qs8_gemm_2x4c8__wasmsimd_dot16x2_ld64()
1990 static void qs8_gemm_3x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__wasmsimd_dot16x2_ld64() argument
1991 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, 3, 4, 8, 1, in qs8_gemm_3x4c8__wasmsimd_dot16x2_ld64()
1994 static void qs8_gemm_4x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c8__wasmsimd_dot16x2_ld64() argument
1995 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, 4, 4, 8, 1, in qs8_gemm_4x4c8__wasmsimd_dot16x2_ld64()
1999 static void qs8_gemm_2x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__wasmsimd_dot16x2_ld128() argument
2000 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, 2, 4, 8, 1, in qs8_gemm_2x4c8__wasmsimd_dot16x2_ld128()
2003 static void qs8_gemm_3x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__wasmsimd_dot16x2_ld128() argument
2004 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, 3, 4, 8, 1, in qs8_gemm_3x4c8__wasmsimd_dot16x2_ld128()
2007 static void qs8_gemm_4x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c8__wasmsimd_dot16x2_ld128() argument
2008 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, 4, 4, 8, 1, in qs8_gemm_4x4c8__wasmsimd_dot16x2_ld128()
2012 static void qs8_gemm_xw_2x4c8__wasmsimd_dot16x2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__wasmsimd_dot16x2() argument
2013 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2, 2, 4, 8, 1, in qs8_gemm_xw_2x4c8__wasmsimd_dot16x2()
2016 static void qs8_gemm_xw_3x4c8__wasmsimd_dot16x2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__wasmsimd_dot16x2() argument
2017 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2, 3, 4, 8, 1, in qs8_gemm_xw_3x4c8__wasmsimd_dot16x2()
2020 static void qs8_gemm_xw_4x4c8__wasmsimd_dot16x2(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c8__wasmsimd_dot16x2() argument
2021 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2, 4, 4, 8, 1, in qs8_gemm_xw_4x4c8__wasmsimd_dot16x2()
2055 static void qs8_gemm_2x2__wasm_fmagic(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
2056 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, 2, 2, 1, 1, in BENCHMARK_GEMM()
2059 static void qs8_gemm_3x2__wasm_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_3x2__wasm_fmagic() argument
2060 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, 3, 2, 1, 1, in qs8_gemm_3x2__wasm_fmagic()
2063 static void qs8_gemm_4x2__wasm_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_4x2__wasm_fmagic() argument
2064 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, 4, 2, 1, 1, in qs8_gemm_4x2__wasm_fmagic()
2067 static void qs8_gemm_2x4__wasm_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_2x4__wasm_fmagic() argument
2068 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, 2, 4, 1, 1, in qs8_gemm_2x4__wasm_fmagic()
2071 static void qs8_gemm_3x4__wasm_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_3x4__wasm_fmagic() argument
2072 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, 3, 4, 1, 1, in qs8_gemm_3x4__wasm_fmagic()
2075 static void qs8_gemm_4x4__wasm_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_4x4__wasm_fmagic() argument
2076 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, 4, 4, 1, 1, in qs8_gemm_4x4__wasm_fmagic()
2089 static void qs8_gemm_2x2__scalar_fmagic(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
2090 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, 2, 2, 1, 1, in BENCHMARK_GEMM()
2093 static void qs8_gemm_3x2__scalar_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_3x2__scalar_fmagic() argument
2094 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, 3, 2, 1, 1, in qs8_gemm_3x2__scalar_fmagic()
2097 static void qs8_gemm_4x2__scalar_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_4x2__scalar_fmagic() argument
2098 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, 4, 2, 1, 1, in qs8_gemm_4x2__scalar_fmagic()
2101 static void qs8_gemm_2x4__scalar_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_2x4__scalar_fmagic() argument
2102 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, 2, 4, 1, 1, in qs8_gemm_2x4__scalar_fmagic()
2105 static void qs8_gemm_3x4__scalar_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_3x4__scalar_fmagic() argument
2106 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, 3, 4, 1, 1, in qs8_gemm_3x4__scalar_fmagic()
2109 static void qs8_gemm_4x4__scalar_fmagic(benchmark::State& state, const char* net) { in qs8_gemm_4x4__scalar_fmagic() argument
2110 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, 4, 4, 1, 1, in qs8_gemm_4x4__scalar_fmagic()
2114 static void qs8_gemm_2x2__scalar_imagic(benchmark::State& state, const char* net) { in qs8_gemm_2x2__scalar_imagic() argument
2115 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, 2, 2, 1, 1, in qs8_gemm_2x2__scalar_imagic()
2118 static void qs8_gemm_3x2__scalar_imagic(benchmark::State& state, const char* net) { in qs8_gemm_3x2__scalar_imagic() argument
2119 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, 3, 2, 1, 1, in qs8_gemm_3x2__scalar_imagic()
2122 static void qs8_gemm_4x2__scalar_imagic(benchmark::State& state, const char* net) { in qs8_gemm_4x2__scalar_imagic() argument
2123 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, 4, 2, 1, 1, in qs8_gemm_4x2__scalar_imagic()
2126 static void qs8_gemm_2x4__scalar_imagic(benchmark::State& state, const char* net) { in qs8_gemm_2x4__scalar_imagic() argument
2127 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, 2, 4, 1, 1, in qs8_gemm_2x4__scalar_imagic()
2130 static void qs8_gemm_3x4__scalar_imagic(benchmark::State& state, const char* net) { in qs8_gemm_3x4__scalar_imagic() argument
2131 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, 3, 4, 1, 1, in qs8_gemm_3x4__scalar_imagic()
2134 static void qs8_gemm_4x4__scalar_imagic(benchmark::State& state, const char* net) { in qs8_gemm_4x4__scalar_imagic() argument
2135 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, 4, 4, 1, 1, in qs8_gemm_4x4__scalar_imagic()
2139 static void qs8_gemm_2x2__scalar_lrintf(benchmark::State& state, const char* net) { in qs8_gemm_2x2__scalar_lrintf() argument
2140 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, 2, 2, 1, 1, in qs8_gemm_2x2__scalar_lrintf()
2143 static void qs8_gemm_3x2__scalar_lrintf(benchmark::State& state, const char* net) { in qs8_gemm_3x2__scalar_lrintf() argument
2144 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, 3, 2, 1, 1, in qs8_gemm_3x2__scalar_lrintf()
2147 static void qs8_gemm_4x2__scalar_lrintf(benchmark::State& state, const char* net) { in qs8_gemm_4x2__scalar_lrintf() argument
2148 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, 4, 2, 1, 1, in qs8_gemm_4x2__scalar_lrintf()
2151 static void qs8_gemm_2x4__scalar_lrintf(benchmark::State& state, const char* net) { in qs8_gemm_2x4__scalar_lrintf() argument
2152 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, 2, 4, 1, 1, in qs8_gemm_2x4__scalar_lrintf()
2155 static void qs8_gemm_3x4__scalar_lrintf(benchmark::State& state, const char* net) { in qs8_gemm_3x4__scalar_lrintf() argument
2156 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, 3, 4, 1, 1, in qs8_gemm_3x4__scalar_lrintf()
2159 static void qs8_gemm_4x4__scalar_lrintf(benchmark::State& state, const char* net) { in qs8_gemm_4x4__scalar_lrintf() argument
2160 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, 4, 4, 1, 1, in qs8_gemm_4x4__scalar_lrintf()