xref: /aosp_15_r20/external/XNNPACK/bench/f32-gemm.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker 
9*4bdc9457SAndroid Build Coastguard Worker #include <algorithm>
10*4bdc9457SAndroid Build Coastguard Worker #include <cfloat>
11*4bdc9457SAndroid Build Coastguard Worker #include <chrono>
12*4bdc9457SAndroid Build Coastguard Worker #include <cmath>
13*4bdc9457SAndroid Build Coastguard Worker #include <functional>
14*4bdc9457SAndroid Build Coastguard Worker #include <mutex>
15*4bdc9457SAndroid Build Coastguard Worker #include <random>
16*4bdc9457SAndroid Build Coastguard Worker #include <vector>
17*4bdc9457SAndroid Build Coastguard Worker 
18*4bdc9457SAndroid Build Coastguard Worker #include <benchmark/benchmark.h>
19*4bdc9457SAndroid Build Coastguard Worker #ifdef BENCHMARK_RUY
20*4bdc9457SAndroid Build Coastguard Worker #include "ruy/ruy.h"
21*4bdc9457SAndroid Build Coastguard Worker #endif  // BENCHMARK_RUY
22*4bdc9457SAndroid Build Coastguard Worker #include "bench/gemm.h"
23*4bdc9457SAndroid Build Coastguard Worker #include "bench/utils.h"
24*4bdc9457SAndroid Build Coastguard Worker 
25*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
26*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/aligned-allocator.h>
27*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/allocator.h>
28*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
29*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/gemm.h>
30*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
31*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microfnptr.h>
32*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
33*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/pack.h>
34*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/packx.h>
35*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/ppmm.h>
36*4bdc9457SAndroid Build Coastguard Worker 
37*4bdc9457SAndroid Build Coastguard Worker 
GEMMBenchmark(benchmark::State & state,xnn_f32_gemm_minmax_ukernel_function gemm,size_t mr,size_t nr,size_t kr,size_t sr,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)38*4bdc9457SAndroid Build Coastguard Worker static void GEMMBenchmark(benchmark::State& state,
39*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_gemm_minmax_ukernel_function gemm,
40*4bdc9457SAndroid Build Coastguard Worker   size_t mr, size_t nr, size_t kr, size_t sr,
41*4bdc9457SAndroid Build Coastguard Worker   xnn_init_f32_minmax_params_fn init_params,
42*4bdc9457SAndroid Build Coastguard Worker   benchmark::utils::IsaCheckFunction isa_check = nullptr)
43*4bdc9457SAndroid Build Coastguard Worker {
44*4bdc9457SAndroid Build Coastguard Worker   if (isa_check && !isa_check(state)) {
45*4bdc9457SAndroid Build Coastguard Worker     return;
46*4bdc9457SAndroid Build Coastguard Worker   }
47*4bdc9457SAndroid Build Coastguard Worker 
48*4bdc9457SAndroid Build Coastguard Worker   const size_t mc = state.range(0);
49*4bdc9457SAndroid Build Coastguard Worker   const size_t nc = state.range(1);
50*4bdc9457SAndroid Build Coastguard Worker   const size_t kc = state.range(2);
51*4bdc9457SAndroid Build Coastguard Worker 
52*4bdc9457SAndroid Build Coastguard Worker   const size_t nc_stride = benchmark::utils::RoundUp(nc, nr);
53*4bdc9457SAndroid Build Coastguard Worker   const size_t kc_stride = benchmark::utils::RoundUp(kc, kr * sr);
54*4bdc9457SAndroid Build Coastguard Worker 
55*4bdc9457SAndroid Build Coastguard Worker   std::random_device random_device;
56*4bdc9457SAndroid Build Coastguard Worker   auto rng = std::mt19937(random_device());
57*4bdc9457SAndroid Build Coastguard Worker   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
58*4bdc9457SAndroid Build Coastguard Worker 
59*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> a(mc * kc + XNN_EXTRA_BYTES / sizeof(float));
60*4bdc9457SAndroid Build Coastguard Worker   std::generate(a.begin(), a.end(), std::ref(f32rng));
61*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> k(nc * kc);
62*4bdc9457SAndroid Build Coastguard Worker   std::generate(k.begin(), k.end(), std::ref(f32rng));
63*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> b(nc);
64*4bdc9457SAndroid Build Coastguard Worker   std::generate(b.begin(), b.end(), std::ref(f32rng));
65*4bdc9457SAndroid Build Coastguard Worker 
66*4bdc9457SAndroid Build Coastguard Worker   const size_t w_elements = nc_stride * kc_stride + nc_stride;
67*4bdc9457SAndroid Build Coastguard Worker   const size_t c_elements = mc * nc;
68*4bdc9457SAndroid Build Coastguard Worker   const size_t num_buffers = 1 +
69*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
70*4bdc9457SAndroid Build Coastguard Worker       sizeof(float) * (w_elements + c_elements));
71*4bdc9457SAndroid Build Coastguard Worker 
72*4bdc9457SAndroid Build Coastguard Worker   std::vector<float, AlignedAllocator<float, 64>> w(w_elements * num_buffers);
73*4bdc9457SAndroid Build Coastguard Worker   std::fill(w.begin(), w.end(), 0.0f);
74*4bdc9457SAndroid Build Coastguard Worker   xnn_pack_f32_gemm_goi_w(1 /* groups */, nc, kc, nr, kr, sr, k.data(), b.data(), w.data(), 0, nullptr);
75*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> c(c_elements * num_buffers);
76*4bdc9457SAndroid Build Coastguard Worker   std::fill(c.begin(), c.end(), std::nanf(""));
77*4bdc9457SAndroid Build Coastguard Worker 
78*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_minmax_params params;
79*4bdc9457SAndroid Build Coastguard Worker   init_params(&params,
80*4bdc9457SAndroid Build Coastguard Worker     -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
81*4bdc9457SAndroid Build Coastguard Worker 
82*4bdc9457SAndroid Build Coastguard Worker   size_t buffer_index = 0;
83*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
84*4bdc9457SAndroid Build Coastguard Worker     // Use circular buffers (exceeding cache size) and prefetch to control cache state:
85*4bdc9457SAndroid Build Coastguard Worker     // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
86*4bdc9457SAndroid Build Coastguard Worker     // - W is not in cache (for any cache level)
87*4bdc9457SAndroid Build Coastguard Worker     // - C is not in cache (for any cache level)
88*4bdc9457SAndroid Build Coastguard Worker     state.PauseTiming();
89*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
90*4bdc9457SAndroid Build Coastguard Worker     buffer_index = (buffer_index + 1) % num_buffers;
91*4bdc9457SAndroid Build Coastguard Worker     state.ResumeTiming();
92*4bdc9457SAndroid Build Coastguard Worker 
93*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t m = 0; m < mc; m += mr) {
94*4bdc9457SAndroid Build Coastguard Worker       const uint32_t mb = min(mc - m, mr);
95*4bdc9457SAndroid Build Coastguard Worker       gemm(
96*4bdc9457SAndroid Build Coastguard Worker         mb, nc, kc * sizeof(float),
97*4bdc9457SAndroid Build Coastguard Worker         a.data() + m * kc, kc * sizeof(float),
98*4bdc9457SAndroid Build Coastguard Worker         w.data() + buffer_index * nc_stride * (kc_stride + 1),
99*4bdc9457SAndroid Build Coastguard Worker         c.data() + (buffer_index * mc + m) * nc, nc * sizeof(float), nr * sizeof(float),
100*4bdc9457SAndroid Build Coastguard Worker         &params);
101*4bdc9457SAndroid Build Coastguard Worker     }
102*4bdc9457SAndroid Build Coastguard Worker   }
103*4bdc9457SAndroid Build Coastguard Worker 
104*4bdc9457SAndroid Build Coastguard Worker   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
105*4bdc9457SAndroid Build Coastguard Worker   if (cpu_frequency != 0) {
106*4bdc9457SAndroid Build Coastguard Worker     state.counters["cpufreq"] = cpu_frequency;
107*4bdc9457SAndroid Build Coastguard Worker   }
108*4bdc9457SAndroid Build Coastguard Worker 
109*4bdc9457SAndroid Build Coastguard Worker   state.counters["FLOPS"] = benchmark::Counter(
110*4bdc9457SAndroid Build Coastguard Worker     uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
111*4bdc9457SAndroid Build Coastguard Worker }
112*4bdc9457SAndroid Build Coastguard Worker 
PPMM1PBenchmark(benchmark::State & state,xnn_f32_ppmm_minmax_ukernel_function ppmm,xnn_x32_packx_ukernel_function packx,size_t mr,size_t nr,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)113*4bdc9457SAndroid Build Coastguard Worker static void PPMM1PBenchmark(benchmark::State& state,
114*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_ppmm_minmax_ukernel_function ppmm,
115*4bdc9457SAndroid Build Coastguard Worker   xnn_x32_packx_ukernel_function packx,
116*4bdc9457SAndroid Build Coastguard Worker   size_t mr, size_t nr,
117*4bdc9457SAndroid Build Coastguard Worker   xnn_init_f32_minmax_params_fn init_params,
118*4bdc9457SAndroid Build Coastguard Worker   benchmark::utils::IsaCheckFunction isa_check = nullptr)
119*4bdc9457SAndroid Build Coastguard Worker {
120*4bdc9457SAndroid Build Coastguard Worker   if (isa_check && !isa_check(state)) {
121*4bdc9457SAndroid Build Coastguard Worker     return;
122*4bdc9457SAndroid Build Coastguard Worker   }
123*4bdc9457SAndroid Build Coastguard Worker 
124*4bdc9457SAndroid Build Coastguard Worker   const size_t mc = state.range(0);
125*4bdc9457SAndroid Build Coastguard Worker   const size_t nc = state.range(1);
126*4bdc9457SAndroid Build Coastguard Worker   const size_t kc = state.range(2);
127*4bdc9457SAndroid Build Coastguard Worker 
128*4bdc9457SAndroid Build Coastguard Worker   const size_t nc_stride = benchmark::utils::RoundUp(nc, nr);
129*4bdc9457SAndroid Build Coastguard Worker 
130*4bdc9457SAndroid Build Coastguard Worker   std::random_device random_device;
131*4bdc9457SAndroid Build Coastguard Worker   auto rng = std::mt19937(random_device());
132*4bdc9457SAndroid Build Coastguard Worker   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
133*4bdc9457SAndroid Build Coastguard Worker 
134*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> a(mc * kc + XNN_EXTRA_BYTES / sizeof(float));
135*4bdc9457SAndroid Build Coastguard Worker   std::generate(a.begin(), a.end(), std::ref(f32rng));
136*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> k(nc * kc);
137*4bdc9457SAndroid Build Coastguard Worker   std::generate(k.begin(), k.end(), std::ref(f32rng));
138*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> b(nc);
139*4bdc9457SAndroid Build Coastguard Worker   std::generate(b.begin(), b.end(), std::ref(f32rng));
140*4bdc9457SAndroid Build Coastguard Worker 
141*4bdc9457SAndroid Build Coastguard Worker   std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> t(mr * kc);
142*4bdc9457SAndroid Build Coastguard Worker 
143*4bdc9457SAndroid Build Coastguard Worker   const size_t w_elements = nc_stride * kc + nc_stride;
144*4bdc9457SAndroid Build Coastguard Worker   const size_t c_elements = mc * nc;
145*4bdc9457SAndroid Build Coastguard Worker   const size_t num_buffers = 1 +
146*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
147*4bdc9457SAndroid Build Coastguard Worker       sizeof(float) * (w_elements + c_elements));
148*4bdc9457SAndroid Build Coastguard Worker 
149*4bdc9457SAndroid Build Coastguard Worker   std::vector<float, AlignedAllocator<float, 64>> w(w_elements * num_buffers);
150*4bdc9457SAndroid Build Coastguard Worker   std::fill(w.begin(), w.end(), 0.0f);
151*4bdc9457SAndroid Build Coastguard Worker   xnn_pack_f32_gemm_goi_w(1 /* groups */, nc, kc, nr, 1 /* kr */, 1 /* sr */, k.data(), b.data(), w.data(), 0, nullptr);
152*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> c(c_elements * num_buffers);
153*4bdc9457SAndroid Build Coastguard Worker   std::fill(c.begin(), c.end(), std::nanf(""));
154*4bdc9457SAndroid Build Coastguard Worker 
155*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_minmax_params params;
156*4bdc9457SAndroid Build Coastguard Worker   init_params(&params,
157*4bdc9457SAndroid Build Coastguard Worker     -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
158*4bdc9457SAndroid Build Coastguard Worker 
159*4bdc9457SAndroid Build Coastguard Worker   size_t buffer_index = 0;
160*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
161*4bdc9457SAndroid Build Coastguard Worker     // Use circular buffers (exceeding cache size) and prefetch to control cache state:
162*4bdc9457SAndroid Build Coastguard Worker     // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
163*4bdc9457SAndroid Build Coastguard Worker     // - W is not in cache (for any cache level)
164*4bdc9457SAndroid Build Coastguard Worker     // - C is not in cache (for any cache level)
165*4bdc9457SAndroid Build Coastguard Worker     state.PauseTiming();
166*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
167*4bdc9457SAndroid Build Coastguard Worker     buffer_index = (buffer_index + 1) % num_buffers;
168*4bdc9457SAndroid Build Coastguard Worker     state.ResumeTiming();
169*4bdc9457SAndroid Build Coastguard Worker 
170*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t m = 0; m < mc; m += mr) {
171*4bdc9457SAndroid Build Coastguard Worker       const uint32_t mb = min(mc - m, mr);
172*4bdc9457SAndroid Build Coastguard Worker       packx(mb, kc, reinterpret_cast<const uint32_t*>(a.data() + m * kc), kc, t.data());
173*4bdc9457SAndroid Build Coastguard Worker       ppmm(
174*4bdc9457SAndroid Build Coastguard Worker         mb, nc, kc * sizeof(float),
175*4bdc9457SAndroid Build Coastguard Worker         reinterpret_cast<const float*>(t.data()),
176*4bdc9457SAndroid Build Coastguard Worker         w.data() + nc_stride * buffer_index * (kc + 1),
177*4bdc9457SAndroid Build Coastguard Worker         c.data() + (mc * buffer_index + m) * nc, nc * sizeof(float), nr * sizeof(float),
178*4bdc9457SAndroid Build Coastguard Worker         &params);
179*4bdc9457SAndroid Build Coastguard Worker     }
180*4bdc9457SAndroid Build Coastguard Worker   }
181*4bdc9457SAndroid Build Coastguard Worker 
182*4bdc9457SAndroid Build Coastguard Worker   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
183*4bdc9457SAndroid Build Coastguard Worker   if (cpu_frequency != 0) {
184*4bdc9457SAndroid Build Coastguard Worker     state.counters["cpufreq"] = cpu_frequency;
185*4bdc9457SAndroid Build Coastguard Worker   }
186*4bdc9457SAndroid Build Coastguard Worker 
187*4bdc9457SAndroid Build Coastguard Worker   state.counters["FLOPS"] = benchmark::Counter(
188*4bdc9457SAndroid Build Coastguard Worker     uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
189*4bdc9457SAndroid Build Coastguard Worker }
190*4bdc9457SAndroid Build Coastguard Worker 
PPMM2PBenchmark(benchmark::State & state,xnn_f32_ppmm_minmax_ukernel_function ppmm,xnn_x32_packx_ukernel_function packx,size_t mr,size_t nr,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)191*4bdc9457SAndroid Build Coastguard Worker static void PPMM2PBenchmark(benchmark::State& state,
192*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_ppmm_minmax_ukernel_function ppmm,
193*4bdc9457SAndroid Build Coastguard Worker   xnn_x32_packx_ukernel_function packx,
194*4bdc9457SAndroid Build Coastguard Worker   size_t mr, size_t nr,
195*4bdc9457SAndroid Build Coastguard Worker   xnn_init_f32_minmax_params_fn init_params,
196*4bdc9457SAndroid Build Coastguard Worker   benchmark::utils::IsaCheckFunction isa_check = nullptr)
197*4bdc9457SAndroid Build Coastguard Worker {
198*4bdc9457SAndroid Build Coastguard Worker   if (isa_check && !isa_check(state)) {
199*4bdc9457SAndroid Build Coastguard Worker     return;
200*4bdc9457SAndroid Build Coastguard Worker   }
201*4bdc9457SAndroid Build Coastguard Worker 
202*4bdc9457SAndroid Build Coastguard Worker   const size_t mc = state.range(0);
203*4bdc9457SAndroid Build Coastguard Worker   const size_t nc = state.range(1);
204*4bdc9457SAndroid Build Coastguard Worker   const size_t kc = state.range(2);
205*4bdc9457SAndroid Build Coastguard Worker 
206*4bdc9457SAndroid Build Coastguard Worker   const size_t mc_stride = benchmark::utils::RoundUp(mc, mr);
207*4bdc9457SAndroid Build Coastguard Worker   const size_t nc_stride = benchmark::utils::RoundUp(nc, nr);
208*4bdc9457SAndroid Build Coastguard Worker 
209*4bdc9457SAndroid Build Coastguard Worker   std::random_device random_device;
210*4bdc9457SAndroid Build Coastguard Worker   auto rng = std::mt19937(random_device());
211*4bdc9457SAndroid Build Coastguard Worker   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
212*4bdc9457SAndroid Build Coastguard Worker 
213*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> a(mc * kc + XNN_EXTRA_BYTES / sizeof(float));
214*4bdc9457SAndroid Build Coastguard Worker   std::generate(a.begin(), a.end(), std::ref(f32rng));
215*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> k(nc * kc);
216*4bdc9457SAndroid Build Coastguard Worker   std::generate(k.begin(), k.end(), std::ref(f32rng));
217*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> b(nc);
218*4bdc9457SAndroid Build Coastguard Worker   std::generate(b.begin(), b.end(), std::ref(f32rng));
219*4bdc9457SAndroid Build Coastguard Worker 
220*4bdc9457SAndroid Build Coastguard Worker   std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> t(mc_stride * kc);
221*4bdc9457SAndroid Build Coastguard Worker 
222*4bdc9457SAndroid Build Coastguard Worker   const size_t w_elements = nc_stride * kc + nc_stride;
223*4bdc9457SAndroid Build Coastguard Worker   const size_t c_elements = mc * nc;
224*4bdc9457SAndroid Build Coastguard Worker   const size_t num_buffers = 1 +
225*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
226*4bdc9457SAndroid Build Coastguard Worker       sizeof(float) * (w_elements + c_elements));
227*4bdc9457SAndroid Build Coastguard Worker 
228*4bdc9457SAndroid Build Coastguard Worker   std::vector<float, AlignedAllocator<float, 64>> w(w_elements * num_buffers);
229*4bdc9457SAndroid Build Coastguard Worker   std::fill(w.begin(), w.end(), 0.0f);
230*4bdc9457SAndroid Build Coastguard Worker   xnn_pack_f32_gemm_goi_w(1 /* groups */, nc, kc, nr, 1 /* kr */, 1 /* sr */, k.data(), b.data(), w.data(), 0, nullptr);
231*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> c(c_elements * num_buffers);
232*4bdc9457SAndroid Build Coastguard Worker   std::fill(c.begin(), c.end(), std::nanf(""));
233*4bdc9457SAndroid Build Coastguard Worker 
234*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_minmax_params params;
235*4bdc9457SAndroid Build Coastguard Worker   init_params(&params,
236*4bdc9457SAndroid Build Coastguard Worker     -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
237*4bdc9457SAndroid Build Coastguard Worker 
238*4bdc9457SAndroid Build Coastguard Worker   size_t buffer_index = 0;
239*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
240*4bdc9457SAndroid Build Coastguard Worker     // Use circular buffers (exceeding cache size) and prefetch to control cache state:
241*4bdc9457SAndroid Build Coastguard Worker     // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
242*4bdc9457SAndroid Build Coastguard Worker     // - W is not in cache (for any cache level)
243*4bdc9457SAndroid Build Coastguard Worker     // - C is not in cache (for any cache level)
244*4bdc9457SAndroid Build Coastguard Worker     state.PauseTiming();
245*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
246*4bdc9457SAndroid Build Coastguard Worker     buffer_index = (buffer_index + 1) % num_buffers;
247*4bdc9457SAndroid Build Coastguard Worker     state.ResumeTiming();
248*4bdc9457SAndroid Build Coastguard Worker 
249*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t m = 0; m < mc; m += mr) {
250*4bdc9457SAndroid Build Coastguard Worker       const uint32_t mb = min(mc - m, mr);
251*4bdc9457SAndroid Build Coastguard Worker       packx(mb, kc, reinterpret_cast<const uint32_t*>(a.data() + m * kc), kc, t.data() + m * kc);
252*4bdc9457SAndroid Build Coastguard Worker     }
253*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t m = 0; m < mc; m += mr) {
254*4bdc9457SAndroid Build Coastguard Worker       const uint32_t mb = min(mc - m, mr);
255*4bdc9457SAndroid Build Coastguard Worker       ppmm(
256*4bdc9457SAndroid Build Coastguard Worker         mb, nc, kc * sizeof(float),
257*4bdc9457SAndroid Build Coastguard Worker         reinterpret_cast<const float*>(t.data() + m * kc),
258*4bdc9457SAndroid Build Coastguard Worker         w.data() + nc_stride * buffer_index * (kc + 1),
259*4bdc9457SAndroid Build Coastguard Worker         c.data() + (mc * buffer_index + m) * nc, nc * sizeof(float), nr * sizeof(float),
260*4bdc9457SAndroid Build Coastguard Worker         &params);
261*4bdc9457SAndroid Build Coastguard Worker     }
262*4bdc9457SAndroid Build Coastguard Worker   }
263*4bdc9457SAndroid Build Coastguard Worker 
264*4bdc9457SAndroid Build Coastguard Worker   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
265*4bdc9457SAndroid Build Coastguard Worker   if (cpu_frequency != 0) {
266*4bdc9457SAndroid Build Coastguard Worker     state.counters["cpufreq"] = cpu_frequency;
267*4bdc9457SAndroid Build Coastguard Worker   }
268*4bdc9457SAndroid Build Coastguard Worker 
269*4bdc9457SAndroid Build Coastguard Worker   state.counters["FLOPS"] = benchmark::Counter(
270*4bdc9457SAndroid Build Coastguard Worker     uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
271*4bdc9457SAndroid Build Coastguard Worker }
272*4bdc9457SAndroid Build Coastguard Worker 
273*4bdc9457SAndroid Build Coastguard Worker #ifdef BENCHMARK_RUY
RuyBenchmark(benchmark::State & state,uint32_t threads)274*4bdc9457SAndroid Build Coastguard Worker static void RuyBenchmark(benchmark::State& state, uint32_t threads)
275*4bdc9457SAndroid Build Coastguard Worker {
276*4bdc9457SAndroid Build Coastguard Worker   std::random_device random_device;
277*4bdc9457SAndroid Build Coastguard Worker   auto rng = std::mt19937(random_device());
278*4bdc9457SAndroid Build Coastguard Worker   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
279*4bdc9457SAndroid Build Coastguard Worker 
280*4bdc9457SAndroid Build Coastguard Worker   const size_t mc = state.range(0);
281*4bdc9457SAndroid Build Coastguard Worker   const size_t nc = state.range(1);
282*4bdc9457SAndroid Build Coastguard Worker   const size_t kc = state.range(2);
283*4bdc9457SAndroid Build Coastguard Worker 
284*4bdc9457SAndroid Build Coastguard Worker   const size_t num_buffers = 1 +
285*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
286*4bdc9457SAndroid Build Coastguard Worker       sizeof(float) * (nc * (mc + kc + 1)));
287*4bdc9457SAndroid Build Coastguard Worker 
288*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> a(mc * kc + XNN_EXTRA_BYTES / sizeof(float));
289*4bdc9457SAndroid Build Coastguard Worker   std::generate(a.begin(), a.end(), std::ref(f32rng));
290*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> k(num_buffers * nc * kc);
291*4bdc9457SAndroid Build Coastguard Worker   std::generate(k.begin(), k.end(), std::ref(f32rng));
292*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> b(num_buffers * nc);
293*4bdc9457SAndroid Build Coastguard Worker   std::generate(b.begin(), b.end(), std::ref(f32rng));
294*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> c(num_buffers * nc * mc);
295*4bdc9457SAndroid Build Coastguard Worker   std::fill(c.begin(), c.end(), std::nanf(""));
296*4bdc9457SAndroid Build Coastguard Worker 
297*4bdc9457SAndroid Build Coastguard Worker   // Note: context must be static to avoid the cost of re-creating it for each benchmark.
298*4bdc9457SAndroid Build Coastguard Worker   static ruy::Context context;
299*4bdc9457SAndroid Build Coastguard Worker   context.set_max_num_threads(threads);
300*4bdc9457SAndroid Build Coastguard Worker 
301*4bdc9457SAndroid Build Coastguard Worker   ruy::Matrix<float> ruy_a;
302*4bdc9457SAndroid Build Coastguard Worker   ruy::MakeSimpleLayout(nc, kc, ruy::Order::kRowMajor, ruy_a.mutable_layout());
303*4bdc9457SAndroid Build Coastguard Worker   ruy::Matrix<float> ruy_b;
304*4bdc9457SAndroid Build Coastguard Worker   ruy::MakeSimpleLayout(kc, mc, ruy::Order::kColMajor, ruy_b.mutable_layout());
305*4bdc9457SAndroid Build Coastguard Worker   ruy_b.set_data(a.data());
306*4bdc9457SAndroid Build Coastguard Worker   ruy::Matrix<float> ruy_c;
307*4bdc9457SAndroid Build Coastguard Worker   ruy::MakeSimpleLayout(nc, mc, ruy::Order::kColMajor, ruy_c.mutable_layout());
308*4bdc9457SAndroid Build Coastguard Worker 
309*4bdc9457SAndroid Build Coastguard Worker   ruy::MulParams<float, float> mul_params;
310*4bdc9457SAndroid Build Coastguard Worker 
311*4bdc9457SAndroid Build Coastguard Worker   // ruy::Context uses deferred initialization, which affects percieved GEMM performance. Initialization happens during
312*4bdc9457SAndroid Build Coastguard Worker   // the first GEMM calls, and per Benoit Jacob it takes up to ~250 milliseconds for performance to stabilize.
313*4bdc9457SAndroid Build Coastguard Worker   // Thus, on the first benchmark, we compute GEMM for 500 milliseconds (to be safe) without recording performance, and
314*4bdc9457SAndroid Build Coastguard Worker   // keep the ruy::Context object initialized (by being static) between subsequent benchmarks.
315*4bdc9457SAndroid Build Coastguard Worker   static std::once_flag warmup;
316*4bdc9457SAndroid Build Coastguard Worker   std::call_once(warmup, [&](){
317*4bdc9457SAndroid Build Coastguard Worker     auto start = std::chrono::steady_clock::now();
318*4bdc9457SAndroid Build Coastguard Worker     do {
319*4bdc9457SAndroid Build Coastguard Worker       ruy_a.set_data(k.data());
320*4bdc9457SAndroid Build Coastguard Worker       ruy_c.set_data(c.data());
321*4bdc9457SAndroid Build Coastguard Worker       mul_params.set_bias(b.data());
322*4bdc9457SAndroid Build Coastguard Worker 
323*4bdc9457SAndroid Build Coastguard Worker       ruy::Mul(ruy_a, ruy_b, mul_params, &context, &ruy_c);
324*4bdc9457SAndroid Build Coastguard Worker     } while (std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count() < 0.5);
325*4bdc9457SAndroid Build Coastguard Worker   });
326*4bdc9457SAndroid Build Coastguard Worker 
327*4bdc9457SAndroid Build Coastguard Worker   size_t buffer_index = 0;
328*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
329*4bdc9457SAndroid Build Coastguard Worker     // Use circular buffers (exceeding cache size) and prefetch to control cache state:
330*4bdc9457SAndroid Build Coastguard Worker     // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
331*4bdc9457SAndroid Build Coastguard Worker     // - K is not in cache (for any cache level)
332*4bdc9457SAndroid Build Coastguard Worker     // - B is not in cache (for any cache level)
333*4bdc9457SAndroid Build Coastguard Worker     // - C is not in cache (for any cache level)
334*4bdc9457SAndroid Build Coastguard Worker     state.PauseTiming();
335*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
336*4bdc9457SAndroid Build Coastguard Worker     buffer_index = (buffer_index + 1) % num_buffers;
337*4bdc9457SAndroid Build Coastguard Worker     state.ResumeTiming();
338*4bdc9457SAndroid Build Coastguard Worker 
339*4bdc9457SAndroid Build Coastguard Worker     ruy_a.set_data(k.data() + buffer_index * nc * kc);
340*4bdc9457SAndroid Build Coastguard Worker     ruy_c.set_data(c.data() + buffer_index * mc * nc);
341*4bdc9457SAndroid Build Coastguard Worker     mul_params.set_bias(b.data() + buffer_index * nc);
342*4bdc9457SAndroid Build Coastguard Worker 
343*4bdc9457SAndroid Build Coastguard Worker     ruy::Mul(ruy_a, ruy_b, mul_params, &context, &ruy_c);
344*4bdc9457SAndroid Build Coastguard Worker   }
345*4bdc9457SAndroid Build Coastguard Worker 
346*4bdc9457SAndroid Build Coastguard Worker   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
347*4bdc9457SAndroid Build Coastguard Worker   if (cpu_frequency != 0) {
348*4bdc9457SAndroid Build Coastguard Worker     state.counters["cpufreq"] = cpu_frequency;
349*4bdc9457SAndroid Build Coastguard Worker   }
350*4bdc9457SAndroid Build Coastguard Worker 
351*4bdc9457SAndroid Build Coastguard Worker   state.counters["FLOPS"] = benchmark::Counter(
352*4bdc9457SAndroid Build Coastguard Worker     uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
353*4bdc9457SAndroid Build Coastguard Worker }
354*4bdc9457SAndroid Build Coastguard Worker 
ruy_st(benchmark::State & state,const char * net)355*4bdc9457SAndroid Build Coastguard Worker static void ruy_st(benchmark::State& state, const char* net)
356*4bdc9457SAndroid Build Coastguard Worker {
357*4bdc9457SAndroid Build Coastguard Worker   RuyBenchmark(state, 1);
358*4bdc9457SAndroid Build Coastguard Worker }
359*4bdc9457SAndroid Build Coastguard Worker #endif  // BENCHMARK_RUY
360*4bdc9457SAndroid Build Coastguard Worker 
361*4bdc9457SAndroid Build Coastguard Worker #if XNN_PLATFORM_JIT
GEMMBenchmark(benchmark::State & state,xnn_jit_gemm_code_generator_function generator,size_t mr,size_t nr,size_t kr,size_t sr,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)362*4bdc9457SAndroid Build Coastguard Worker static void GEMMBenchmark(benchmark::State& state,
363*4bdc9457SAndroid Build Coastguard Worker   xnn_jit_gemm_code_generator_function generator,
364*4bdc9457SAndroid Build Coastguard Worker   size_t mr, size_t nr, size_t kr, size_t sr,
365*4bdc9457SAndroid Build Coastguard Worker   xnn_init_f32_minmax_params_fn init_params,
366*4bdc9457SAndroid Build Coastguard Worker   benchmark::utils::IsaCheckFunction isa_check = nullptr)
367*4bdc9457SAndroid Build Coastguard Worker {
368*4bdc9457SAndroid Build Coastguard Worker   if (isa_check && !isa_check(state)) {
369*4bdc9457SAndroid Build Coastguard Worker     return;
370*4bdc9457SAndroid Build Coastguard Worker   }
371*4bdc9457SAndroid Build Coastguard Worker 
372*4bdc9457SAndroid Build Coastguard Worker   const size_t mc = state.range(0);
373*4bdc9457SAndroid Build Coastguard Worker   const size_t nc = state.range(1);
374*4bdc9457SAndroid Build Coastguard Worker   const size_t kc = state.range(2);
375*4bdc9457SAndroid Build Coastguard Worker 
376*4bdc9457SAndroid Build Coastguard Worker   const size_t nc_stride = benchmark::utils::RoundUp(nc, nr);
377*4bdc9457SAndroid Build Coastguard Worker   const size_t kc_stride = benchmark::utils::RoundUp(kc, kr);
378*4bdc9457SAndroid Build Coastguard Worker 
379*4bdc9457SAndroid Build Coastguard Worker   std::random_device random_device;
380*4bdc9457SAndroid Build Coastguard Worker   auto rng = std::mt19937(random_device());
381*4bdc9457SAndroid Build Coastguard Worker   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
382*4bdc9457SAndroid Build Coastguard Worker 
383*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> a(mc * kc + XNN_EXTRA_BYTES / sizeof(float));
384*4bdc9457SAndroid Build Coastguard Worker   std::generate(a.begin(), a.end(), std::ref(f32rng));
385*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> k(nc * kc);
386*4bdc9457SAndroid Build Coastguard Worker   std::generate(k.begin(), k.end(), std::ref(f32rng));
387*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> b(nc);
388*4bdc9457SAndroid Build Coastguard Worker   std::generate(b.begin(), b.end(), std::ref(f32rng));
389*4bdc9457SAndroid Build Coastguard Worker 
390*4bdc9457SAndroid Build Coastguard Worker   const size_t w_elements = nc_stride * kc_stride + nc_stride;
391*4bdc9457SAndroid Build Coastguard Worker   const size_t c_elements = mc * nc;
392*4bdc9457SAndroid Build Coastguard Worker   const size_t num_buffers = 1 +
393*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
394*4bdc9457SAndroid Build Coastguard Worker       sizeof(float) * (w_elements + c_elements));
395*4bdc9457SAndroid Build Coastguard Worker 
396*4bdc9457SAndroid Build Coastguard Worker   std::vector<float, AlignedAllocator<float, 64>> w(w_elements * num_buffers);
397*4bdc9457SAndroid Build Coastguard Worker   std::fill(w.begin(), w.end(), 0.0f);
398*4bdc9457SAndroid Build Coastguard Worker   xnn_pack_f32_gemm_goi_w(1 /* groups */, nc, kc, nr, kr, sr, k.data(), b.data(), w.data(), 0, nullptr);
399*4bdc9457SAndroid Build Coastguard Worker   std::vector<float> c(c_elements * num_buffers);
400*4bdc9457SAndroid Build Coastguard Worker   std::fill(c.begin(), c.end(), std::nanf(""));
401*4bdc9457SAndroid Build Coastguard Worker 
402*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_minmax_params params;
403*4bdc9457SAndroid Build Coastguard Worker   init_params(&params,
404*4bdc9457SAndroid Build Coastguard Worker     -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
405*4bdc9457SAndroid Build Coastguard Worker 
406*4bdc9457SAndroid Build Coastguard Worker   xnn_initialize(/*allocator=*/nullptr);
407*4bdc9457SAndroid Build Coastguard Worker   xnn_code_buffer code_buffer;
408*4bdc9457SAndroid Build Coastguard Worker   xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE);
409*4bdc9457SAndroid Build Coastguard Worker   jit_gemm_params jit_params = {
410*4bdc9457SAndroid Build Coastguard Worker     .f32_minmax = {
411*4bdc9457SAndroid Build Coastguard Worker       .min = -std::numeric_limits<float>::infinity(),
412*4bdc9457SAndroid Build Coastguard Worker       .max = +std::numeric_limits<float>::infinity()
413*4bdc9457SAndroid Build Coastguard Worker     }
414*4bdc9457SAndroid Build Coastguard Worker   };
415*4bdc9457SAndroid Build Coastguard Worker   generator(&code_buffer, mr, nc % nr, kc * sizeof(float), &jit_params);
416*4bdc9457SAndroid Build Coastguard Worker   xnn_finalize_code_memory(&code_buffer);
417*4bdc9457SAndroid Build Coastguard Worker   xnn_f32_gemm_minmax_ukernel_function gemm = reinterpret_cast<xnn_f32_gemm_minmax_ukernel_function>(code_buffer.start);
418*4bdc9457SAndroid Build Coastguard Worker 
419*4bdc9457SAndroid Build Coastguard Worker   size_t buffer_index = 0;
420*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
421*4bdc9457SAndroid Build Coastguard Worker     // Use circular buffers (exceeding cache size) and prefetch to control cache state:
422*4bdc9457SAndroid Build Coastguard Worker     // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
423*4bdc9457SAndroid Build Coastguard Worker     // - W is not in cache (for any cache level)
424*4bdc9457SAndroid Build Coastguard Worker     // - C is not in cache (for any cache level)
425*4bdc9457SAndroid Build Coastguard Worker     state.PauseTiming();
426*4bdc9457SAndroid Build Coastguard Worker     benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
427*4bdc9457SAndroid Build Coastguard Worker     buffer_index = (buffer_index + 1) % num_buffers;
428*4bdc9457SAndroid Build Coastguard Worker     state.ResumeTiming();
429*4bdc9457SAndroid Build Coastguard Worker 
430*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t m = 0; m < mc; m += mr) {
431*4bdc9457SAndroid Build Coastguard Worker       const uint32_t mb = min(mc - m, mr);
432*4bdc9457SAndroid Build Coastguard Worker       gemm(
433*4bdc9457SAndroid Build Coastguard Worker         mb, nc, kc * sizeof(float),
434*4bdc9457SAndroid Build Coastguard Worker         a.data() + m * kc, kc * sizeof(float),
435*4bdc9457SAndroid Build Coastguard Worker         w.data() + buffer_index * nc_stride * (kc_stride + 1),
436*4bdc9457SAndroid Build Coastguard Worker         c.data() + (buffer_index * mc + m) * nc, nc * sizeof(float), nr * sizeof(float),
437*4bdc9457SAndroid Build Coastguard Worker         &params);
438*4bdc9457SAndroid Build Coastguard Worker     }
439*4bdc9457SAndroid Build Coastguard Worker   }
440*4bdc9457SAndroid Build Coastguard Worker 
441*4bdc9457SAndroid Build Coastguard Worker   xnn_release_code_memory(&code_buffer);
442*4bdc9457SAndroid Build Coastguard Worker 
443*4bdc9457SAndroid Build Coastguard Worker   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
444*4bdc9457SAndroid Build Coastguard Worker   if (cpu_frequency != 0) {
445*4bdc9457SAndroid Build Coastguard Worker     state.counters["cpufreq"] = cpu_frequency;
446*4bdc9457SAndroid Build Coastguard Worker   }
447*4bdc9457SAndroid Build Coastguard Worker 
448*4bdc9457SAndroid Build Coastguard Worker   state.counters["FLOPS"] = benchmark::Counter(
449*4bdc9457SAndroid Build Coastguard Worker     uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
450*4bdc9457SAndroid Build Coastguard Worker }
451*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_PLATFORM_JIT
452*4bdc9457SAndroid Build Coastguard Worker 
453*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
f32_gemm_1x8__aarch64_neonfma_ld64(benchmark::State & state,const char * net)454*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
455*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, 1, 8, 1, 1,
456*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
457*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x12__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)458*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
459*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, 1, 12, 1, 1,
460*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
461*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)462*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
463*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, 1, 8, 1, 1,
464*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
465*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State & state,const char * net)466*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State& state, const char* net) {
467*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, 1, 8, 1, 1,
468*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
469*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)470*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
471*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, 1, 8, 1, 1,
472*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
473*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)474*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
475*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, 1, 8, 1, 1,
476*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
477*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)478*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
479*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, 4, 12, 1, 1,
480*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
481*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)482*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
483*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, 4, 8, 1, 1,
484*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
485*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State & state,const char * net)486*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State& state, const char* net) {
487*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, 4, 8, 1, 1,
488*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
489*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State & state,const char * net)490*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) {
491*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, 4, 8, 1, 1,
492*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
493*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)494*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
495*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, 4, 8, 1, 1,
496*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
497*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)498*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
499*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, 4, 8, 1, 1,
500*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
501*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x2__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)502*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x2__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
503*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, 4, 2, 1, 1,
504*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
505*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x2__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)506*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x2__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
507*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, 4, 2, 1, 1,
508*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
509*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x2__aarch64_neonfma_ld64(benchmark::State & state,const char * net)510*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x2__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
511*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, 4, 2, 1, 1,
512*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
513*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch64_neonfma_ld64(benchmark::State & state,const char * net)514*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
515*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, 4, 8, 1, 1,
516*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
517*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch64_neonfma_ld128(benchmark::State & state,const char * net)518*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) {
519*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, 4, 8, 1, 1,
520*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
521*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)522*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
523*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, 5, 8, 1, 1,
524*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
525*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)526*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
527*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, 5, 8, 1, 1,
528*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
529*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_ld64(benchmark::State & state,const char * net)530*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
531*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, 6, 8, 1, 1,
532*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
533*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_ld128(benchmark::State & state,const char * net)534*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) {
535*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, 6, 8, 1, 1,
536*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
537*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)538*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
539*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, 6, 8, 1, 1,
540*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
541*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State & state,const char * net)542*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State& state, const char* net) {
543*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, 6, 8, 1, 1,
544*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
545*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State & state,const char * net)546*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) {
547*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, 6, 8, 1, 1,
548*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
549*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State & state,const char * net)550*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State& state, const char* net) {
551*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, 6, 8, 1, 1,
552*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
553*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)554*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
555*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, 6, 8, 1, 1,
556*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
557*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)558*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
559*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, 6, 8, 1, 1,
560*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
561*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8__neonfma_lane_ld64(benchmark::State & state,const char * net)562*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
563*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64, 1, 8, 1, 1,
564*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
565*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x2__neonfma_lane_ld64(benchmark::State & state,const char * net)566*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x2__neonfma_lane_ld64(benchmark::State& state, const char* net) {
567*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64, 4, 2, 1, 1,
568*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
569*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x2__neonfma_lane_ld64(benchmark::State & state,const char * net)570*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x2__neonfma_lane_ld64(benchmark::State& state, const char* net) {
571*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64, 6, 2, 1, 1,
572*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
573*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__neonfma_lane_ld64(benchmark::State & state,const char * net)574*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
575*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64, 4, 8, 1, 1,
576*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
577*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__neonfma_lane_ld128(benchmark::State & state,const char * net)578*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__neonfma_lane_ld128(benchmark::State& state, const char* net) {
579*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, 4, 8, 1, 1,
580*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
581*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__neonfma_lane_ld64(benchmark::State & state,const char * net)582*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
583*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64, 5, 8, 1, 1,
584*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
585*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__neonfma_lane_ld64(benchmark::State & state,const char * net)586*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
587*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64, 6, 8, 1, 1,
588*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
589*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__neonfma_lane_ld128(benchmark::State & state,const char * net)590*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__neonfma_lane_ld128(benchmark::State& state, const char* net) {
591*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128, 6, 8, 1, 1,
592*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params);
593*4bdc9457SAndroid Build Coastguard Worker   }
594*4bdc9457SAndroid Build Coastguard Worker 
595*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__aarch64_neonfma_ld64)
BENCHMARK_GEMM(f32_gemm_1x12__aarch64_neonfma_cortex_a53)596*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x12__aarch64_neonfma_cortex_a53)
597*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__aarch64_neonfma_cortex_a53)
598*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a53)
599*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__aarch64_neonfma_cortex_a75)
600*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a75)
601*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x2__aarch64_neonfma_cortex_a75)
602*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x2__aarch64_neonfma_prfm_cortex_a75)
603*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x2__aarch64_neonfma_ld64)
604*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_cortex_a53)
605*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a53)
606*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_cortex_a55)
607*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_cortex_a75)
608*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a75)
609*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_ld128)
610*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_ld64)
611*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x12__aarch64_neonfma_cortex_a53)
612*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__aarch64_neonfma_cortex_a75)
613*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__aarch64_neonfma_prfm_cortex_a75)
614*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a53)
615*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_prfm_cortex_a53)
616*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a55)
617*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a73)
618*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a75)
619*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_prfm_cortex_a75)
620*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_ld64)
621*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_ld128)
622*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__neonfma_lane_ld64)
623*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x2__neonfma_lane_ld64)
624*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x2__neonfma_lane_ld64)
625*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__neonfma_lane_ld64)
626*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__neonfma_lane_ld128)
627*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__neonfma_lane_ld64)
628*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__neonfma_lane_ld64)
629*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__neonfma_lane_ld128)
630*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM64
631*4bdc9457SAndroid Build Coastguard Worker 
632*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
633*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x4__aarch32_vfp_ld64(benchmark::State& state, const char* net) {
634*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64, 4, 4, 1, 1,
635*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckVFP);
636*4bdc9457SAndroid Build Coastguard Worker   }
637*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_4x8__aarch32_neon_ld64(benchmark::State & state,const char * net)638*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch32_neon_ld64(benchmark::State& state, const char* net) {
639*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64, 4, 8, 1, 1,
640*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
641*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch32_neon_cortex_a7(benchmark::State & state,const char * net)642*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, const char* net) {
643*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, 4, 8, 1, 1,
644*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
645*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch32_neon_cortex_a53(benchmark::State & state,const char * net)646*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, const char* net) {
647*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, 4, 8, 1, 1,
648*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
649*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch32_neon_prfm_cortex_a53(benchmark::State & state,const char * net)650*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch32_neon_prfm_cortex_a53(benchmark::State& state, const char* net) {
651*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, 4, 8, 1, 1,
652*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
653*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch32_neon_cortex_a55(benchmark::State & state,const char * net)654*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, const char* net) {
655*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, 4, 8, 1, 1,
656*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
657*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch32_neon_cortex_a75(benchmark::State & state,const char * net)658*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, const char* net) {
659*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, 4, 8, 1, 1,
660*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
661*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State & state,const char * net)662*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State& state, const char* net) {
663*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, 4, 8, 1, 1,
664*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
665*4bdc9457SAndroid Build Coastguard Worker   }
666*4bdc9457SAndroid Build Coastguard Worker 
667*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x4__aarch32_vfp_ld64)
BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_ld64)668*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_ld64)
669*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_cortex_a7)
670*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_cortex_a53)
671*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_prfm_cortex_a53)
672*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_cortex_a55)
673*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_cortex_a75)
674*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__aarch32_neon_prfm_cortex_a75)
675*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM
676*4bdc9457SAndroid Build Coastguard Worker 
677*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
678*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__neon_lane_ld64(benchmark::State& state, const char* net) {
679*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, 1, 8, 1, 1,
680*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
681*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x2__neon_lane_ld64(benchmark::State & state,const char * net)682*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x2__neon_lane_ld64(benchmark::State& state, const char* net) {
683*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64, 4, 2, 1, 1,
684*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
685*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x2__neon_lane_ld64(benchmark::State & state,const char * net)686*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x2__neon_lane_ld64(benchmark::State& state, const char* net) {
687*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64, 6, 2, 1, 1,
688*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
689*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__neon_lane_ld64(benchmark::State & state,const char * net)690*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__neon_lane_ld64(benchmark::State& state, const char* net) {
691*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, 4, 8, 1, 1,
692*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
693*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__neon_lane_ld128(benchmark::State & state,const char * net)694*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__neon_lane_ld128(benchmark::State& state, const char* net) {
695*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, 4, 8, 1, 1,
696*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
697*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__neon_lane_ld64(benchmark::State & state,const char * net)698*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__neon_lane_ld64(benchmark::State& state, const char* net) {
699*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, 5, 8, 1, 1,
700*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
701*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__neon_lane_ld64(benchmark::State & state,const char * net)702*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__neon_lane_ld64(benchmark::State& state, const char* net) {
703*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64, 6, 8, 1, 1,
704*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
705*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__neon_lane_ld128(benchmark::State & state,const char * net)706*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__neon_lane_ld128(benchmark::State& state, const char* net) {
707*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, 6, 8, 1, 1,
708*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
709*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8__neonfma_dup_ld64(benchmark::State & state,const char * net)710*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
711*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64, 1, 8, 1, 1,
712*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
713*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__neonfma_dup_ld64(benchmark::State & state,const char * net)714*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
715*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, 4, 8, 1, 1,
716*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
717*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__neonfma_dup_ld128(benchmark::State & state,const char * net)718*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__neonfma_dup_ld128(benchmark::State& state, const char* net) {
719*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128, 4, 8, 1, 1,
720*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
721*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__neonfma_dup_ld64(benchmark::State & state,const char * net)722*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
723*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64, 6, 8, 1, 1,
724*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
725*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__neonfma_dup_ld128(benchmark::State & state,const char * net)726*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__neonfma_dup_ld128(benchmark::State& state, const char* net) {
727*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128, 6, 8, 1, 1,
728*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
729*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8s4__neon(benchmark::State & state,const char * net)730*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8s4__neon(benchmark::State& state, const char* net) {
731*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__neon, 1, 8, 1, 4,
732*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
733*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x8s4__neonfma(benchmark::State & state,const char * net)734*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8s4__neonfma(benchmark::State& state, const char* net) {
735*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, 1, 8, 1, 4,
736*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
737*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8s4__neon(benchmark::State & state,const char * net)738*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8s4__neon(benchmark::State& state, const char* net) {
739*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__neon, 4, 8, 1, 4,
740*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
741*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8s4__neonfma(benchmark::State & state,const char * net)742*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8s4__neonfma(benchmark::State& state, const char* net) {
743*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma, 4, 8, 1, 4,
744*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
745*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8s4__neon(benchmark::State & state,const char * net)746*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8s4__neon(benchmark::State& state, const char* net) {
747*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__neon, 6, 8, 1, 4,
748*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
749*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8s4__neonfma(benchmark::State & state,const char * net)750*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8s4__neonfma(benchmark::State& state, const char* net) {
751*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, 6, 8, 1, 4,
752*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
753*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_8x8s4__neon(benchmark::State & state,const char * net)754*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_8x8s4__neon(benchmark::State& state, const char* net) {
755*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8s4__neon, 8, 8, 1, 4,
756*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
757*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_8x8s4__neonfma(benchmark::State & state,const char * net)758*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_8x8s4__neonfma(benchmark::State& state, const char* net) {
759*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, 8, 8, 1, 4,
760*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
761*4bdc9457SAndroid Build Coastguard Worker   }
f32_ppmm_4x8_unipass__neonfma(benchmark::State & state,const char * net)762*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_unipass__neonfma(benchmark::State& state, const char* net) {
763*4bdc9457SAndroid Build Coastguard Worker     PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_x32_packx_ukernel_4x__neon_st4, 4, 8,
764*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
765*4bdc9457SAndroid Build Coastguard Worker   }
f32_ppmm_4x8_twopass__neonfma(benchmark::State & state,const char * net)766*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_twopass__neonfma(benchmark::State& state, const char* net) {
767*4bdc9457SAndroid Build Coastguard Worker     PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_x32_packx_ukernel_4x__neon_st4, 4, 8,
768*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
769*4bdc9457SAndroid Build Coastguard Worker   }
770*4bdc9457SAndroid Build Coastguard Worker 
771*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__neon_lane_ld64)
BENCHMARK_GEMM(f32_gemm_4x2__neon_lane_ld64)772*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x2__neon_lane_ld64)
773*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x2__neon_lane_ld64)
774*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__neon_lane_ld64)
775*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__neon_lane_ld128)
776*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__neon_lane_ld64)
777*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__neon_lane_ld64)
778*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__neon_lane_ld128)
779*4bdc9457SAndroid Build Coastguard Worker 
780*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__neonfma_dup_ld64)
781*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__neonfma_dup_ld64)
782*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__neonfma_dup_ld128)
783*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__neonfma_dup_ld64)
784*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__neonfma_dup_ld128)
785*4bdc9457SAndroid Build Coastguard Worker 
786*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8s4__neon)
787*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8s4__neon)
788*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8s4__neon)
789*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_8x8s4__neon)
790*4bdc9457SAndroid Build Coastguard Worker 
791*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8s4__neonfma)
792*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8s4__neonfma)
793*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8s4__neonfma)
794*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_8x8s4__neonfma)
795*4bdc9457SAndroid Build Coastguard Worker 
796*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_unipass__neonfma)
797*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_twopass__neonfma)
798*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
799*4bdc9457SAndroid Build Coastguard Worker 
800*4bdc9457SAndroid Build Coastguard Worker 
801*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM && XNN_PLATFORM_JIT
802*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch32_neon_ld64(benchmark::State& state, const char* net)
803*4bdc9457SAndroid Build Coastguard Worker   {
804*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_ld64, 4, 8, 1, 1,
805*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
806*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_4x8__aarch32_neon_cortex_a7(benchmark::State & state,const char * net)807*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, const char* net)
808*4bdc9457SAndroid Build Coastguard Worker   {
809*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, 4, 8, 1, 1,
810*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
811*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_4x8__aarch32_neon_cortex_a53(benchmark::State & state,const char * net)812*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, const char* net)
813*4bdc9457SAndroid Build Coastguard Worker   {
814*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53, 4, 8, 1, 1,
815*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
816*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_4x8__aarch32_neon_cortex_a55(benchmark::State & state,const char * net)817*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, const char* net)
818*4bdc9457SAndroid Build Coastguard Worker   {
819*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, 4, 8, 1, 1,
820*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
821*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_4x8__aarch32_neon_cortex_a75(benchmark::State & state,const char * net)822*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, const char* net)
823*4bdc9457SAndroid Build Coastguard Worker   {
824*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, 4, 8, 1, 1,
825*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
826*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State & state,const char * net)827*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State& state, const char* net)
828*4bdc9457SAndroid Build Coastguard Worker   {
829*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, 4, 8, 1, 1,
830*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
831*4bdc9457SAndroid Build Coastguard Worker   }
832*4bdc9457SAndroid Build Coastguard Worker 
833*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch32_neon_cortex_a53)
BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch32_neon_cortex_a55)834*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch32_neon_cortex_a55)
835*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch32_neon_cortex_a75)
836*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch32_neon_prfm_cortex_a75)
837*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch32_neon_ld64)
838*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch32_neon_cortex_a7)
839*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM && XNN_PLATFORM_JIT
840*4bdc9457SAndroid Build Coastguard Worker 
841*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
842*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net)
843*4bdc9457SAndroid Build Coastguard Worker   {
844*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75, 1, 8, 1, 1,
845*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
846*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)847*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net)
848*4bdc9457SAndroid Build Coastguard Worker   {
849*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, 1, 8, 1, 1,
850*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
851*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)852*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net)
853*4bdc9457SAndroid Build Coastguard Worker   {
854*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75, 4, 8, 1, 1,
855*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
856*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)857*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net)
858*4bdc9457SAndroid Build Coastguard Worker   {
859*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, 4, 8, 1, 1,
860*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
861*4bdc9457SAndroid Build Coastguard Worker   }
jit_f32_gemm_6x8__aarch64_neonfma_ld128(benchmark::State & state,const char * net)862*4bdc9457SAndroid Build Coastguard Worker   static void jit_f32_gemm_6x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net)
863*4bdc9457SAndroid Build Coastguard Worker   {
864*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_ld128, 6, 8, 1, 1,
865*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
866*4bdc9457SAndroid Build Coastguard Worker   }
867*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_1x8__aarch64_neonfma_cortex_a75)
868*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_1x8__aarch64_neonfma_prfm_cortex_a75)
869*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch64_neonfma_cortex_a75)
870*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_4x8__aarch64_neonfma_prfm_cortex_a75)
871*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(jit_f32_gemm_6x8__aarch64_neonfma_ld128)
872*4bdc9457SAndroid Build Coastguard Worker 
873*4bdc9457SAndroid Build Coastguard Worker #define BENCHMARK_UPTO_MR_GEMM(name, max_mr, nr)                                \
874*4bdc9457SAndroid Build Coastguard Worker   static void name(benchmark::State &state, const char *net) {                  \
875*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(                                                              \
876*4bdc9457SAndroid Build Coastguard Worker         state,                                                                  \
877*4bdc9457SAndroid Build Coastguard Worker         xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, \
878*4bdc9457SAndroid Build Coastguard Worker         max_mr, nr, 1, 1, xnn_init_f32_minmax_scalar_params,                    \
879*4bdc9457SAndroid Build Coastguard Worker         benchmark::utils::CheckNEON);                                           \
880*4bdc9457SAndroid Build Coastguard Worker   }                                                                             \
881*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(name)
882*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_UPTO_MR_GEMM(jit_f32_gemm_upto6x8_1x8__aarch64_neonfma_prfm_cortex_a75, 1, 8);
883*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_UPTO_MR_GEMM(jit_f32_gemm_upto6x8_2x8__aarch64_neonfma_prfm_cortex_a75, 2, 8);
884*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_UPTO_MR_GEMM(jit_f32_gemm_upto6x8_3x8__aarch64_neonfma_prfm_cortex_a75, 3, 8);
885*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_UPTO_MR_GEMM(jit_f32_gemm_upto6x8_4x8__aarch64_neonfma_prfm_cortex_a75, 4, 8);
886*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_UPTO_MR_GEMM(jit_f32_gemm_upto6x8_5x8__aarch64_neonfma_prfm_cortex_a75, 5, 8);
887*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_UPTO_MR_GEMM(jit_f32_gemm_upto6x8_6x8__aarch64_neonfma_prfm_cortex_a75, 6, 8);
888*4bdc9457SAndroid Build Coastguard Worker #undef BENCHMARK_UPTO_MR_GEMM
889*4bdc9457SAndroid Build Coastguard Worker 
890*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
891*4bdc9457SAndroid Build Coastguard Worker 
892*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
f32_gemm_1x16__avx512f_broadcast(benchmark::State & state,const char * net)893*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x16__avx512f_broadcast(benchmark::State& state, const char* net) {
894*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, 1, 16, 1, 1,
895*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckAVX512F);
896*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x16__avx512f_broadcast(benchmark::State & state,const char * net)897*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x16__avx512f_broadcast(benchmark::State& state, const char* net) {
898*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast, 4, 16, 1, 1,
899*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckAVX512F);
900*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x16__avx512f_broadcast(benchmark::State & state,const char * net)901*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x16__avx512f_broadcast(benchmark::State& state, const char* net) {
902*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast, 5, 16, 1, 1,
903*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckAVX512F);
904*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x16__avx512f_broadcast(benchmark::State & state,const char * net)905*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x16__avx512f_broadcast(benchmark::State& state, const char* net) {
906*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, 6, 16, 1, 1,
907*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckAVX512F);
908*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_7x16__avx512f_broadcast(benchmark::State & state,const char * net)909*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_7x16__avx512f_broadcast(benchmark::State& state, const char* net) {
910*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, 7, 16, 1, 1,
911*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckAVX512F);
912*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_8x16__avx512f_broadcast(benchmark::State & state,const char * net)913*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_8x16__avx512f_broadcast(benchmark::State& state, const char* net) {
914*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, 8, 16, 1, 1,
915*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckAVX512F);
916*4bdc9457SAndroid Build Coastguard Worker   }
917*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_1x8__fma3_broadcast(benchmark::State & state,const char * net)918*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__fma3_broadcast(benchmark::State& state, const char* net) {
919*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, 1, 8, 1, 1,
920*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
921*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__fma3_broadcast(benchmark::State & state,const char * net)922*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__fma3_broadcast(benchmark::State& state, const char* net) {
923*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, 4, 8, 1, 1,
924*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
925*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__fma3_broadcast(benchmark::State & state,const char * net)926*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__fma3_broadcast(benchmark::State& state, const char* net) {
927*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, 5, 8, 1, 1,
928*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
929*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__fma3_broadcast(benchmark::State & state,const char * net)930*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__fma3_broadcast(benchmark::State& state, const char* net) {
931*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast, 6, 8, 1, 1,
932*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
933*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_7x8__fma3_broadcast(benchmark::State & state,const char * net)934*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_7x8__fma3_broadcast(benchmark::State& state, const char* net) {
935*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast, 7, 8, 1, 1,
936*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
937*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_8x8__fma3_broadcast(benchmark::State & state,const char * net)938*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_8x8__fma3_broadcast(benchmark::State& state, const char* net) {
939*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast, 8, 8, 1, 1,
940*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
941*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x16__fma3_broadcast(benchmark::State & state,const char * net)942*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x16__fma3_broadcast(benchmark::State& state, const char* net) {
943*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, 1, 16, 1, 1,
944*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
945*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x16__fma3_broadcast(benchmark::State & state,const char * net)946*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x16__fma3_broadcast(benchmark::State& state, const char* net) {
947*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, 4, 16, 1, 1,
948*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
949*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x16__fma3_broadcast(benchmark::State & state,const char * net)950*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x16__fma3_broadcast(benchmark::State& state, const char* net) {
951*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast, 4, 16, 1, 1,
952*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
953*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x16__fma3_broadcast(benchmark::State & state,const char * net)954*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x16__fma3_broadcast(benchmark::State& state, const char* net) {
955*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast, 5, 16, 1, 1,
956*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
957*4bdc9457SAndroid Build Coastguard Worker   }
958*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_1x16s4__fma3_broadcast(benchmark::State & state,const char * net)959*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x16s4__fma3_broadcast(benchmark::State& state, const char* net) {
960*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast, 1, 16, 1, 4,
961*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
962*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x16s4__fma3_broadcast(benchmark::State & state,const char * net)963*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x16s4__fma3_broadcast(benchmark::State& state, const char* net) {
964*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast, 4, 16, 1, 4,
965*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
966*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x16s4__fma3_broadcast(benchmark::State & state,const char * net)967*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x16s4__fma3_broadcast(benchmark::State& state, const char* net) {
968*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast, 4, 16, 1, 4,
969*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
970*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x16s4__fma3_broadcast(benchmark::State & state,const char * net)971*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x16s4__fma3_broadcast(benchmark::State& state, const char* net) {
972*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast, 5, 16, 1, 4,
973*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
974*4bdc9457SAndroid Build Coastguard Worker   }
975*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_1x8__avx_broadcast(benchmark::State & state,const char * net)976*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__avx_broadcast(benchmark::State& state, const char* net) {
977*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, 1, 8, 1, 1,
978*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
979*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__avx_broadcast(benchmark::State & state,const char * net)980*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__avx_broadcast(benchmark::State& state, const char* net) {
981*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast, 4, 8, 1, 1,
982*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
983*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__avx_broadcast(benchmark::State & state,const char * net)984*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__avx_broadcast(benchmark::State& state, const char* net) {
985*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, 5, 8, 1, 1,
986*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
987*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__avx_broadcast(benchmark::State & state,const char * net)988*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__avx_broadcast(benchmark::State& state, const char* net) {
989*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, 6, 8, 1, 1,
990*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
991*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_7x8__avx_broadcast(benchmark::State & state,const char * net)992*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_7x8__avx_broadcast(benchmark::State& state, const char* net) {
993*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast, 7, 8, 1, 1,
994*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
995*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_1x16__avx_broadcast(benchmark::State & state,const char * net)996*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x16__avx_broadcast(benchmark::State& state, const char* net) {
997*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast, 1, 16, 1, 1,
998*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
999*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x16__avx_broadcast(benchmark::State & state,const char * net)1000*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x16__avx_broadcast(benchmark::State& state, const char* net) {
1001*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, 4, 16, 1, 1,
1002*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
1003*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x16__avx_broadcast(benchmark::State & state,const char * net)1004*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x16__avx_broadcast(benchmark::State& state, const char* net) {
1005*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast, 4, 16, 1, 1,
1006*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
1007*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x16__avx_broadcast(benchmark::State & state,const char * net)1008*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x16__avx_broadcast(benchmark::State& state, const char* net) {
1009*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast, 5, 16, 1, 1,
1010*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
1011*4bdc9457SAndroid Build Coastguard Worker   }
1012*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_1x8__sse2_dup(benchmark::State & state,const char * net)1013*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__sse2_dup(benchmark::State& state, const char* net) {
1014*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, 1, 8, 1, 1,
1015*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1016*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__sse2_dup(benchmark::State & state,const char * net)1017*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__sse2_dup(benchmark::State& state, const char* net) {
1018*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup, 3, 8, 1, 1,
1019*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1020*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__sse2_dup(benchmark::State & state,const char * net)1021*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__sse2_dup(benchmark::State& state, const char* net) {
1022*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup, 4, 8, 1, 1,
1023*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1024*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__sse2_dup(benchmark::State & state,const char * net)1025*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__sse2_dup(benchmark::State& state, const char* net) {
1026*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, 5, 8, 1, 1,
1027*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1028*4bdc9457SAndroid Build Coastguard Worker   }
1029*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_1x8__sse_load1(benchmark::State & state,const char * net)1030*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__sse_load1(benchmark::State& state, const char* net) {
1031*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__sse_load1, 1, 8, 1, 1,
1032*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1033*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__sse_load1(benchmark::State & state,const char * net)1034*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__sse_load1(benchmark::State& state, const char* net) {
1035*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, 3, 8, 1, 1,
1036*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1037*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__sse_load1(benchmark::State & state,const char * net)1038*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__sse_load1(benchmark::State& state, const char* net) {
1039*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, 4, 8, 1, 1,
1040*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1041*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__sse_load1(benchmark::State & state,const char * net)1042*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__sse_load1(benchmark::State& state, const char* net) {
1043*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, 5, 8, 1, 1,
1044*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1045*4bdc9457SAndroid Build Coastguard Worker   }
1046*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_1x8__sse_dup(benchmark::State & state,const char * net)1047*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8__sse_dup(benchmark::State& state, const char* net) {
1048*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__sse_dup, 1, 8, 1, 1,
1049*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1050*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__sse_dup(benchmark::State & state,const char * net)1051*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__sse_dup(benchmark::State& state, const char* net) {
1052*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, 3, 8, 1, 1,
1053*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1054*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__sse_dup(benchmark::State & state,const char * net)1055*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__sse_dup(benchmark::State& state, const char* net) {
1056*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__sse_dup, 4, 8, 1, 1,
1057*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1058*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__sse_dup(benchmark::State & state,const char * net)1059*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__sse_dup(benchmark::State& state, const char* net) {
1060*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, 5, 8, 1, 1,
1061*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1062*4bdc9457SAndroid Build Coastguard Worker   }
1063*4bdc9457SAndroid Build Coastguard Worker 
f32_gemm_1x8s4__sse(benchmark::State & state,const char * net)1064*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_1x8s4__sse(benchmark::State& state, const char* net) {
1065*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__sse, 1, 8, 1, 4,
1066*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1067*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8s4__sse(benchmark::State & state,const char * net)1068*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8s4__sse(benchmark::State& state, const char* net) {
1069*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__sse, 3, 8, 1, 4,
1070*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1071*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8s4__sse(benchmark::State & state,const char * net)1072*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8s4__sse(benchmark::State& state, const char* net) {
1073*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__sse, 4, 8, 1, 4,
1074*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1075*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8s4__sse(benchmark::State & state,const char * net)1076*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8s4__sse(benchmark::State& state, const char* net) {
1077*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__sse, 5, 8, 1, 4,
1078*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1079*4bdc9457SAndroid Build Coastguard Worker   }
1080*4bdc9457SAndroid Build Coastguard Worker 
f32_ppmm_4x8_unipass__sse(benchmark::State & state,const char * net)1081*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_unipass__sse(benchmark::State& state, const char* net) {
1082*4bdc9457SAndroid Build Coastguard Worker     PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_x32_packx_ukernel_4x__sse, 4, 8,
1083*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1084*4bdc9457SAndroid Build Coastguard Worker   }
f32_ppmm_4x8_twopass__sse(benchmark::State & state,const char * net)1085*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_twopass__sse(benchmark::State& state, const char* net) {
1086*4bdc9457SAndroid Build Coastguard Worker     PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_x32_packx_ukernel_4x__sse, 4, 8,
1087*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_sse_params);
1088*4bdc9457SAndroid Build Coastguard Worker   }
1089*4bdc9457SAndroid Build Coastguard Worker 
1090*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x16__avx512f_broadcast)
BENCHMARK_GEMM(f32_gemm_4x16__avx512f_broadcast)1091*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x16__avx512f_broadcast)
1092*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x16__avx512f_broadcast)
1093*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x16__avx512f_broadcast)
1094*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_7x16__avx512f_broadcast)
1095*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_8x16__avx512f_broadcast)
1096*4bdc9457SAndroid Build Coastguard Worker 
1097*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__fma3_broadcast)
1098*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__fma3_broadcast)
1099*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__fma3_broadcast)
1100*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__fma3_broadcast)
1101*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_7x8__fma3_broadcast)
1102*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_8x8__fma3_broadcast)
1103*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x16__fma3_broadcast)
1104*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x16__fma3_broadcast)
1105*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x16__fma3_broadcast)
1106*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x16__fma3_broadcast)
1107*4bdc9457SAndroid Build Coastguard Worker 
1108*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x16s4__fma3_broadcast)
1109*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x16s4__fma3_broadcast)
1110*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x16s4__fma3_broadcast)
1111*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x16s4__fma3_broadcast)
1112*4bdc9457SAndroid Build Coastguard Worker 
1113*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__avx_broadcast)
1114*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__avx_broadcast)
1115*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__avx_broadcast)
1116*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__avx_broadcast)
1117*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_7x8__avx_broadcast)
1118*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x16__avx_broadcast)
1119*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x16__avx_broadcast)
1120*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x16__avx_broadcast)
1121*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x16__avx_broadcast)
1122*4bdc9457SAndroid Build Coastguard Worker 
1123*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__sse2_dup)
1124*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__sse2_dup)
1125*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__sse2_dup)
1126*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__sse2_dup)
1127*4bdc9457SAndroid Build Coastguard Worker 
1128*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__sse_load1)
1129*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__sse_load1)
1130*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__sse_load1)
1131*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__sse_load1)
1132*4bdc9457SAndroid Build Coastguard Worker 
1133*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8__sse_dup)
1134*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__sse_dup)
1135*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__sse_dup)
1136*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__sse_dup)
1137*4bdc9457SAndroid Build Coastguard Worker 
1138*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_1x8s4__sse)
1139*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8s4__sse)
1140*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8s4__sse)
1141*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8s4__sse)
1142*4bdc9457SAndroid Build Coastguard Worker 
1143*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_unipass__sse)
1144*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_twopass__sse)
1145*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1146*4bdc9457SAndroid Build Coastguard Worker 
1147*4bdc9457SAndroid Build Coastguard Worker 
1148*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMRELAXEDSIMD
1149*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
1150*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, 3, 8, 1, 1,
1151*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1152*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmrelaxedsimd_loadsplat(benchmark::State & state,const char * net)1153*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
1154*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, 4, 8, 1, 1,
1155*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1156*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmrelaxedsimd_loadsplat(benchmark::State & state,const char * net)1157*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
1158*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, 5, 8, 1, 1,
1159*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1160*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmrelaxedsimd_loadsplat(benchmark::State & state,const char * net)1161*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
1162*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, 6, 8, 1, 1,
1163*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1164*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State & state,const char * net)1165*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
1166*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, 3, 8, 1, 1,
1167*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1168*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State & state,const char * net)1169*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
1170*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, 4, 8, 1, 1,
1171*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1172*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State & state,const char * net)1173*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
1174*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, 5, 8, 1, 1,
1175*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1176*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State & state,const char * net)1177*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
1178*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, 6, 8, 1, 1,
1179*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1180*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__wasmrelaxedsimd_splat(benchmark::State & state,const char * net)1181*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
1182*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, 3, 8, 1, 1,
1183*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1184*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmrelaxedsimd_splat(benchmark::State & state,const char * net)1185*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
1186*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, 4, 8, 1, 1,
1187*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1188*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmrelaxedsimd_splat(benchmark::State & state,const char * net)1189*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
1190*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, 5, 8, 1, 1,
1191*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1192*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmrelaxedsimd_splat(benchmark::State & state,const char * net)1193*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
1194*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, 6, 8, 1, 1,
1195*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1196*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__wasmrelaxedsimd_fma_splat(benchmark::State & state,const char * net)1197*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
1198*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, 3, 8, 1, 1,
1199*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1200*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmrelaxedsimd_fma_splat(benchmark::State & state,const char * net)1201*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
1202*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, 4, 8, 1, 1,
1203*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1204*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmrelaxedsimd_fma_splat(benchmark::State & state,const char * net)1205*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
1206*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, 5, 8, 1, 1,
1207*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1208*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmrelaxedsimd_fma_splat(benchmark::State & state,const char * net)1209*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
1210*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, 6, 8, 1, 1,
1211*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1212*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8s4__wasmrelaxedsimd(benchmark::State & state,const char * net)1213*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
1214*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, 3, 8, 1, 4,
1215*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1216*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8s4__wasmrelaxedsimd(benchmark::State & state,const char * net)1217*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
1218*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, 4, 8, 1, 4,
1219*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1220*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8s4__wasmrelaxedsimd(benchmark::State & state,const char * net)1221*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
1222*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, 5, 8, 1, 4,
1223*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1224*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8s4__wasmrelaxedsimd(benchmark::State & state,const char * net)1225*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
1226*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, 6, 8, 1, 4,
1227*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1228*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8s4__wasmrelaxedsimd_fma(benchmark::State & state,const char * net)1229*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
1230*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, 3, 8, 1, 4,
1231*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1232*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8s4__wasmrelaxedsimd_fma(benchmark::State & state,const char * net)1233*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
1234*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, 4, 8, 1, 4,
1235*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1236*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8s4__wasmrelaxedsimd_fma(benchmark::State & state,const char * net)1237*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
1238*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, 5, 8, 1, 4,
1239*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1240*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8s4__wasmrelaxedsimd_fma(benchmark::State & state,const char * net)1241*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
1242*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, 6, 8, 1, 4,
1243*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1244*4bdc9457SAndroid Build Coastguard Worker   }
1245*4bdc9457SAndroid Build Coastguard Worker 
1246*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_loadsplat)
BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_loadsplat)1247*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_loadsplat)
1248*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_loadsplat)
1249*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_loadsplat)
1250*4bdc9457SAndroid Build Coastguard Worker 
1251*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_fma_loadsplat)
1252*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_fma_loadsplat)
1253*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_fma_loadsplat)
1254*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_fma_loadsplat)
1255*4bdc9457SAndroid Build Coastguard Worker 
1256*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_splat)
1257*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_splat)
1258*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_splat)
1259*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_splat)
1260*4bdc9457SAndroid Build Coastguard Worker 
1261*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_fma_splat)
1262*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_fma_splat)
1263*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_fma_splat)
1264*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_fma_splat)
1265*4bdc9457SAndroid Build Coastguard Worker 
1266*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8s4__wasmrelaxedsimd)
1267*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8s4__wasmrelaxedsimd)
1268*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8s4__wasmrelaxedsimd)
1269*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8s4__wasmrelaxedsimd)
1270*4bdc9457SAndroid Build Coastguard Worker 
1271*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8s4__wasmrelaxedsimd_fma)
1272*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8s4__wasmrelaxedsimd_fma)
1273*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8s4__wasmrelaxedsimd_fma)
1274*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8s4__wasmrelaxedsimd_fma)
1275*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_WASMRELAXEDSIMD
1276*4bdc9457SAndroid Build Coastguard Worker 
1277*4bdc9457SAndroid Build Coastguard Worker 
1278*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1279*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
1280*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, 3, 8, 1, 1,
1281*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1282*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmsimd_arm_loadsplat(benchmark::State & state,const char * net)1283*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
1284*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, 4, 8, 1, 1,
1285*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1286*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmsimd_arm_loadsplat(benchmark::State & state,const char * net)1287*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
1288*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, 5, 8, 1, 1,
1289*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1290*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmsimd_arm_loadsplat(benchmark::State & state,const char * net)1291*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
1292*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, 6, 8, 1, 1,
1293*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1294*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)1295*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
1296*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, 3, 8, 1, 1,
1297*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1298*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)1299*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
1300*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, 4, 8, 1, 1,
1301*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1302*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)1303*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
1304*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, 5, 8, 1, 1,
1305*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1306*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)1307*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
1308*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, 6, 8, 1, 1,
1309*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1310*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)1311*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
1312*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat, 3, 8, 1, 1,
1313*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1314*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)1315*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
1316*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat, 4, 8, 1, 1,
1317*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1318*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)1319*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
1320*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat, 5, 8, 1, 1,
1321*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1322*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)1323*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
1324*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, 6, 8, 1, 1,
1325*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1326*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)1327*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
1328*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat, 3, 8, 1, 1,
1329*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1330*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)1331*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
1332*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat, 4, 8, 1, 1,
1333*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1334*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)1335*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
1336*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat, 5, 8, 1, 1,
1337*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1338*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)1339*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
1340*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, 6, 8, 1, 1,
1341*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1342*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1343*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1344*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, 3, 8, 1, 4,
1345*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1346*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1347*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1348*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm, 4, 8, 1, 4,
1349*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1350*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1351*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1352*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm, 5, 8, 1, 4,
1353*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1354*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1355*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1356*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, 6, 8, 1, 4,
1357*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1358*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_3x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1359*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_3x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1360*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, 3, 8, 1, 4,
1361*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1362*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_4x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1363*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_4x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1364*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86, 4, 8, 1, 4,
1365*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1366*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_5x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1367*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_5x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1368*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86, 5, 8, 1, 4,
1369*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1370*4bdc9457SAndroid Build Coastguard Worker   }
f32_gemm_6x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1371*4bdc9457SAndroid Build Coastguard Worker   static void f32_gemm_6x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1372*4bdc9457SAndroid Build Coastguard Worker     GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, 6, 8, 1, 4,
1373*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1374*4bdc9457SAndroid Build Coastguard Worker   }
1375*4bdc9457SAndroid Build Coastguard Worker 
f32_ppmm_4x8_unipass__wasmsimd_arm_splat(benchmark::State & state,const char * net)1376*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_unipass__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
1377*4bdc9457SAndroid Build Coastguard Worker     PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
1378*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1379*4bdc9457SAndroid Build Coastguard Worker   }
f32_ppmm_4x8_unipass__wasmsimd_x86_splat(benchmark::State & state,const char * net)1380*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_unipass__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
1381*4bdc9457SAndroid Build Coastguard Worker     PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
1382*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1383*4bdc9457SAndroid Build Coastguard Worker   }
1384*4bdc9457SAndroid Build Coastguard Worker 
f32_ppmm_4x8_twopass__wasmsimd_arm_splat(benchmark::State & state,const char * net)1385*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_twopass__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
1386*4bdc9457SAndroid Build Coastguard Worker     PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
1387*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1388*4bdc9457SAndroid Build Coastguard Worker   }
f32_ppmm_4x8_twopass__wasmsimd_x86_splat(benchmark::State & state,const char * net)1389*4bdc9457SAndroid Build Coastguard Worker   static void f32_ppmm_4x8_twopass__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
1390*4bdc9457SAndroid Build Coastguard Worker     PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
1391*4bdc9457SAndroid Build Coastguard Worker       xnn_init_f32_minmax_wasmsimd_params);
1392*4bdc9457SAndroid Build Coastguard Worker   }
1393*4bdc9457SAndroid Build Coastguard Worker 
1394*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_arm_loadsplat)
BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_arm_loadsplat)1395*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_arm_loadsplat)
1396*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_arm_loadsplat)
1397*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_arm_loadsplat)
1398*4bdc9457SAndroid Build Coastguard Worker 
1399*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_x86_loadsplat)
1400*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_x86_loadsplat)
1401*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_x86_loadsplat)
1402*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_x86_loadsplat)
1403*4bdc9457SAndroid Build Coastguard Worker 
1404*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_arm_splat)
1405*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_arm_splat)
1406*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_arm_splat)
1407*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_arm_splat)
1408*4bdc9457SAndroid Build Coastguard Worker 
1409*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_x86_splat)
1410*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_x86_splat)
1411*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_x86_splat)
1412*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_x86_splat)
1413*4bdc9457SAndroid Build Coastguard Worker 
1414*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8s4__wasmsimd_arm)
1415*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8s4__wasmsimd_arm)
1416*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8s4__wasmsimd_arm)
1417*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8s4__wasmsimd_arm)
1418*4bdc9457SAndroid Build Coastguard Worker 
1419*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_3x8s4__wasmsimd_x86)
1420*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_4x8s4__wasmsimd_x86)
1421*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_5x8s4__wasmsimd_x86)
1422*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_gemm_6x8s4__wasmsimd_x86)
1423*4bdc9457SAndroid Build Coastguard Worker 
1424*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_unipass__wasmsimd_arm_splat)
1425*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_unipass__wasmsimd_x86_splat)
1426*4bdc9457SAndroid Build Coastguard Worker 
1427*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_twopass__wasmsimd_arm_splat)
1428*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_GEMM(f32_ppmm_4x8_twopass__wasmsimd_x86_splat)
1429*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1430*4bdc9457SAndroid Build Coastguard Worker 
1431*4bdc9457SAndroid Build Coastguard Worker 
1432*4bdc9457SAndroid Build Coastguard Worker static void f32_gemm_1x4__scalar(benchmark::State& state, const char* net) {
1433*4bdc9457SAndroid Build Coastguard Worker   GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x4__scalar, 1, 4, 1, 1,
1434*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1435*4bdc9457SAndroid Build Coastguard Worker }
f32_gemm_2x4__scalar(benchmark::State & state,const char * net)1436*4bdc9457SAndroid Build Coastguard Worker static void f32_gemm_2x4__scalar(benchmark::State& state, const char* net) {
1437*4bdc9457SAndroid Build Coastguard Worker   GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_2x4__scalar, 2, 4, 1, 1,
1438*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1439*4bdc9457SAndroid Build Coastguard Worker }
f32_gemm_4x4__scalar(benchmark::State & state,const char * net)1440*4bdc9457SAndroid Build Coastguard Worker static void f32_gemm_4x4__scalar(benchmark::State& state, const char* net) {
1441*4bdc9457SAndroid Build Coastguard Worker   GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x4__scalar, 4, 4, 1, 1,
1442*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1443*4bdc9457SAndroid Build Coastguard Worker }
1444*4bdc9457SAndroid Build Coastguard Worker 
f32_ppmm_2x4_unipass__scalar(benchmark::State & state,const char * net)1445*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_2x4_unipass__scalar(benchmark::State& state, const char* net) {
1446*4bdc9457SAndroid Build Coastguard Worker   PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_x32_packx_ukernel_2x__scalar, 2, 4,
1447*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1448*4bdc9457SAndroid Build Coastguard Worker }
f32_ppmm_4x2_unipass__scalar(benchmark::State & state,const char * net)1449*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_4x2_unipass__scalar(benchmark::State& state, const char* net) {
1450*4bdc9457SAndroid Build Coastguard Worker   PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_x32_packx_ukernel_4x__scalar, 4, 2,
1451*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1452*4bdc9457SAndroid Build Coastguard Worker }
f32_ppmm_4x4_unipass__scalar(benchmark::State & state,const char * net)1453*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_4x4_unipass__scalar(benchmark::State& state, const char* net) {
1454*4bdc9457SAndroid Build Coastguard Worker   PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_x32_packx_ukernel_4x__scalar, 4, 4,
1455*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1456*4bdc9457SAndroid Build Coastguard Worker }
f32_ppmm_3x3_unipass__scalar(benchmark::State & state,const char * net)1457*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_3x3_unipass__scalar(benchmark::State& state, const char* net) {
1458*4bdc9457SAndroid Build Coastguard Worker   PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_x32_packx_ukernel_3x__scalar, 3, 3,
1459*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1460*4bdc9457SAndroid Build Coastguard Worker }
1461*4bdc9457SAndroid Build Coastguard Worker 
f32_ppmm_2x4_twopass__scalar(benchmark::State & state,const char * net)1462*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_2x4_twopass__scalar(benchmark::State& state, const char* net) {
1463*4bdc9457SAndroid Build Coastguard Worker   PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_x32_packx_ukernel_2x__scalar, 2, 4,
1464*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1465*4bdc9457SAndroid Build Coastguard Worker }
f32_ppmm_4x2_twopass__scalar(benchmark::State & state,const char * net)1466*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_4x2_twopass__scalar(benchmark::State& state, const char* net) {
1467*4bdc9457SAndroid Build Coastguard Worker   PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_x32_packx_ukernel_4x__scalar, 4, 2,
1468*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1469*4bdc9457SAndroid Build Coastguard Worker }
f32_ppmm_4x4_twopass__scalar(benchmark::State & state,const char * net)1470*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_4x4_twopass__scalar(benchmark::State& state, const char* net) {
1471*4bdc9457SAndroid Build Coastguard Worker   PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_x32_packx_ukernel_4x__scalar, 4, 4,
1472*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1473*4bdc9457SAndroid Build Coastguard Worker }
f32_ppmm_3x3_twopass__scalar(benchmark::State & state,const char * net)1474*4bdc9457SAndroid Build Coastguard Worker static void f32_ppmm_3x3_twopass__scalar(benchmark::State& state, const char* net) {
1475*4bdc9457SAndroid Build Coastguard Worker   PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_x32_packx_ukernel_3x__scalar, 3, 3,
1476*4bdc9457SAndroid Build Coastguard Worker     xnn_init_f32_minmax_scalar_params);
1477*4bdc9457SAndroid Build Coastguard Worker }
1478*4bdc9457SAndroid Build Coastguard Worker 
1479*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_gemm_1x4__scalar)
1480*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_gemm_2x4__scalar)
1481*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_gemm_4x4__scalar)
1482*4bdc9457SAndroid Build Coastguard Worker 
1483*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_2x4_unipass__scalar)
1484*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_4x2_unipass__scalar)
1485*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_4x4_unipass__scalar)
1486*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_3x3_unipass__scalar)
1487*4bdc9457SAndroid Build Coastguard Worker 
1488*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_2x4_twopass__scalar)
1489*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_4x2_twopass__scalar)
1490*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_4x4_twopass__scalar)
1491*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(f32_ppmm_3x3_twopass__scalar)
1492*4bdc9457SAndroid Build Coastguard Worker 
1493*4bdc9457SAndroid Build Coastguard Worker 
1494*4bdc9457SAndroid Build Coastguard Worker #ifdef BENCHMARK_RUY
1495*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_GEMM(ruy_st)
1496*4bdc9457SAndroid Build Coastguard Worker #endif  // BENCHMARK_RUY
1497*4bdc9457SAndroid Build Coastguard Worker 
1498*4bdc9457SAndroid Build Coastguard Worker #ifndef XNNPACK_BENCHMARK_NO_MAIN
1499*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_MAIN();
1500*4bdc9457SAndroid Build Coastguard Worker #endif
1501