xref: /aosp_15_r20/external/XNNPACK/bench/f32-spmm.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cfloat>
8 #include <cmath>
9 #include <functional>
10 #include <random>
11 #include <vector>
12 
13 #include <benchmark/benchmark.h>
14 #include "bench/spmm.h"
15 #include "bench/utils.h"
16 
17 #include <xnnpack.h>
18 #include <xnnpack/aligned-allocator.h>
19 #include <xnnpack/common.h>
20 #include <xnnpack/microfnptr.h>
21 #include <xnnpack/microparams-init.h>
22 #include <xnnpack/spmm.h>
23 
24 
f32_spmm(benchmark::State & state,xnn_f32_spmm_minmax_ukernel_function spmm,uint32_t mr,uint32_t nr,float sparsity,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)25 static void f32_spmm(benchmark::State& state,
26   xnn_f32_spmm_minmax_ukernel_function spmm, uint32_t mr, uint32_t nr, float sparsity,
27   xnn_init_f32_minmax_params_fn init_params,
28   benchmark::utils::IsaCheckFunction isa_check = nullptr)
29 {
30   if (isa_check && !isa_check(state)) {
31     return;
32   }
33 
34   const size_t mc = state.range(0);
35   const size_t nc = state.range(1);
36   const size_t kc = state.range(2);
37 
38   std::random_device random_device;
39   auto rng = std::mt19937(random_device());
40   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
41 
42   // if using blocks, generate the reduced matrix first and then extrude along
43   // the block dimension (n), to get the full matrix
44   size_t ncols = nc / nr + nc % nr;
45   std::vector<float> b(ncols * kc);
46   std::vector<float> bias(nc);
47   std::vector<float> w;
48   std::vector<uint32_t> nmap;
49   std::vector<int32_t> dmap;
50   const size_t sparse_end = std::min(size_t(float(b.size()) * sparsity), b.size());
51   const size_t num_nonzeroes = nr * (b.size() - sparse_end);
52 
53   const size_t w_elements = num_nonzeroes + nc;
54   const size_t c_elements = mc * nc;
55   const size_t dmap_elements = num_nonzeroes / nr;
56   const size_t nmap_elements = nc;
57   const size_t num_buffers = 1 +
58     benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
59       sizeof(float) * (w_elements + c_elements) + sizeof(uint32_t) * (dmap_elements + nmap_elements));
60 
61   // Micro-kernel can access one element beyond w and dmap for software pipelining.
62   w.reserve(num_buffers * w_elements + 1);
63   dmap.reserve(num_buffers * dmap_elements + 1);
64   nmap.resize(num_buffers * nmap_elements);
65 
66   std::vector<size_t> a_offsets(num_buffers);
67 
68   for (size_t buffer_index = 0; buffer_index < num_buffers; buffer_index++) {
69     // Re-generate weights. Note: each re-generation produces the number of non-zeroes.
70     std::fill(b.begin(), b.begin() + sparse_end, 0.0f);
71     std::generate(b.begin() + sparse_end, b.end(), std::ref(f32rng));
72     std::shuffle(b.begin(), b.end(), rng);
73     std::generate(bias.begin(), bias.end(), std::ref(f32rng));
74 
75     uint32_t first_j = 0, last_j = 0;
76     bool is_first_nonzero = true;
77     for (uint32_t i = 0; i < nc / nr; i++) {
78       for (uint32_t n = 0; n < nr; n++)
79         w.push_back(bias[nr * i + n]);
80       for (uint32_t j = 0; j < kc; j++) {
81         if (b[i * kc + j] != 0.0f) {
82           for (size_t l = 0; l < nr; l++)
83             w.push_back(b[i * kc + j] + static_cast<float>(i));
84           if (is_first_nonzero) {
85             first_j = j;
86           } else {
87             const ptrdiff_t increment = int32_t(j - last_j) * int32_t(mc) * int32_t(sizeof(float));
88             dmap.push_back(increment);
89           }
90           last_j = j;
91           is_first_nonzero = false;
92           nmap[buffer_index * nmap_elements + i] += 1;
93         }
94       }
95     }
96     for (uint32_t i = nc / nr; i < ncols; i++) {
97       w.push_back(bias[i]);
98       for (uint32_t j = 0; j < kc; j++) {
99         if (b[i * kc + j] != 0.0f) {
100           w.push_back(b[i * kc + j]);
101           if (is_first_nonzero) {
102             first_j = j;
103           } else {
104             const ptrdiff_t increment = int32_t(j - last_j) * int32_t(mc) * int32_t(sizeof(float));
105             dmap.push_back(increment);
106           }
107           last_j = j;
108           is_first_nonzero = false;
109           nmap[buffer_index * nmap_elements + i] += 1;
110         }
111       }
112     }
113     {
114       const ptrdiff_t increment = int32_t(first_j - last_j) * int32_t(mc) * int32_t(sizeof(float));
115       dmap.push_back(increment);
116     }
117 
118     a_offsets[buffer_index] = first_j * mc;
119   }
120 
121   // Micro-kernel can access one element beyond w and dmap for software pipelining.
122   w.resize(w.size() + 1);
123   dmap.resize(dmap.size() + 1);
124 
125   std::vector<float, AlignedAllocator<float, 64>> a(kc * mc);
126   std::vector<float, AlignedAllocator<float, 64>> c(num_buffers * c_elements);
127 
128   std::generate(a.begin(), a.end(), std::ref(f32rng));
129   std::fill(c.begin(), c.end(), nanf(""));
130 
131   xnn_f32_minmax_params params;
132   init_params(&params, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
133 
134   size_t buffer_index = 0;
135   for (auto _ : state) {
136     // Use circular buffers (exceeding cache size) and prefetch to control cache state:
137     // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
138     // - W, Kmap, and Nmap is not in cache (for any cache level)
139     // - C is not in cache (for any cache level)
140     state.PauseTiming();
141     benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
142     buffer_index = (buffer_index + 1) % num_buffers;
143     state.ResumeTiming();
144 
145     spmm(mc * sizeof(float), nc,
146       a.data() + a_offsets[buffer_index],
147       w.data() + buffer_index * w_elements,
148       dmap.data() + buffer_index * dmap_elements,
149       nmap.data() + buffer_index * nmap_elements,
150       c.data() + buffer_index * c_elements, mc * sizeof(float),
151       &params);
152   }
153 
154   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
155   if (cpu_frequency != 0) {
156     state.counters["cpufreq"] = cpu_frequency;
157   }
158 
159   state.counters["FLOPS"] = benchmark::Counter(
160     uint64_t(state.iterations()) * 2 * mc * num_nonzeroes, benchmark::Counter::kIsRate);
161 
162   state.counters["EffFLOPS"] = benchmark::Counter(
163     uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
164 }
165 
166 
167 #if XNN_ARCH_ARM64
spmm80_4x1__neonfma(benchmark::State & state,const char * net)168   static void spmm80_4x1__neonfma(benchmark::State& state, const char* net) {
169     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neonfma, 4, 1, 0.8f,
170       xnn_init_f32_minmax_scalar_params);
171   }
172 
spmm80_4x2__neonfma(benchmark::State & state,const char * net)173   static void spmm80_4x2__neonfma(benchmark::State& state, const char* net) {
174     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x2__neonfma, 4, 2, 0.8f,
175       xnn_init_f32_minmax_scalar_params);
176   }
177 
spmm80_4x4__neonfma(benchmark::State & state,const char * net)178   static void spmm80_4x4__neonfma(benchmark::State& state, const char* net) {
179     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x4__neonfma, 4, 4, 0.8f,
180       xnn_init_f32_minmax_scalar_params);
181   }
182 
spmm80_8x1__neonfma(benchmark::State & state,const char * net)183   static void spmm80_8x1__neonfma(benchmark::State& state, const char* net) {
184     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neonfma, 8, 1, 0.8f,
185       xnn_init_f32_minmax_scalar_params);
186   }
187 
spmm80_8x2__neonfma(benchmark::State & state,const char * net)188   static void spmm80_8x2__neonfma(benchmark::State& state, const char* net) {
189     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x2__neonfma, 8, 2, 0.8f,
190       xnn_init_f32_minmax_scalar_params);
191   }
192 
spmm80_8x4__neonfma(benchmark::State & state,const char * net)193   static void spmm80_8x4__neonfma(benchmark::State& state, const char* net) {
194     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x4__neonfma, 8, 4, 0.8f,
195       xnn_init_f32_minmax_scalar_params);
196   }
197 
spmm80_12x1__neonfma(benchmark::State & state,const char * net)198   static void spmm80_12x1__neonfma(benchmark::State& state, const char* net) {
199     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x1__neonfma, 12, 1, 0.8f,
200       xnn_init_f32_minmax_scalar_params);
201   }
202 
spmm80_12x2__neonfma(benchmark::State & state,const char * net)203   static void spmm80_12x2__neonfma(benchmark::State& state, const char* net) {
204     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x2__neonfma, 12, 2, 0.8f,
205       xnn_init_f32_minmax_scalar_params);
206   }
207 
spmm80_12x4__neonfma(benchmark::State & state,const char * net)208   static void spmm80_12x4__neonfma(benchmark::State& state, const char* net) {
209     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x4__neonfma, 12, 4, 0.8f,
210       xnn_init_f32_minmax_scalar_params);
211   }
212 
spmm80_16x1__neonfma(benchmark::State & state,const char * net)213   static void spmm80_16x1__neonfma(benchmark::State& state, const char* net) {
214     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neonfma, 16, 1, 0.8f,
215       xnn_init_f32_minmax_scalar_params);
216   }
217 
spmm80_16x2__neonfma(benchmark::State & state,const char * net)218   static void spmm80_16x2__neonfma(benchmark::State& state, const char* net) {
219     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x2__neonfma, 16, 2, 0.8f,
220       xnn_init_f32_minmax_scalar_params);
221   }
222 
spmm80_16x4__neonfma(benchmark::State & state,const char * net)223   static void spmm80_16x4__neonfma(benchmark::State& state, const char* net) {
224     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x4__neonfma, 16, 4, 0.8f,
225       xnn_init_f32_minmax_scalar_params);
226   }
227 
spmm80_32x1__neonfma(benchmark::State & state,const char * net)228   static void spmm80_32x1__neonfma(benchmark::State& state, const char* net) {
229     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neonfma, 32, 1, 0.8f,
230       xnn_init_f32_minmax_scalar_params);
231   }
232 
spmm80_32x2__neonfma(benchmark::State & state,const char * net)233   static void spmm80_32x2__neonfma(benchmark::State& state, const char* net) {
234     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x2__neonfma, 32, 2, 0.8f,
235       xnn_init_f32_minmax_scalar_params);
236   }
237 
spmm80_32x4__neonfma(benchmark::State & state,const char * net)238   static void spmm80_32x4__neonfma(benchmark::State& state, const char* net) {
239     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x4__neonfma, 32, 4, 0.8f,
240       xnn_init_f32_minmax_scalar_params);
241   }
242 
spmm80_4x1__neonfma_x2(benchmark::State & state,const char * net)243   static void spmm80_4x1__neonfma_x2(benchmark::State& state, const char* net) {
244     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, 4, 1, 0.8f,
245       xnn_init_f32_minmax_scalar_params);
246   }
247 
spmm80_8x1__neonfma_x2(benchmark::State & state,const char * net)248   static void spmm80_8x1__neonfma_x2(benchmark::State& state, const char* net) {
249     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, 8, 1, 0.8f,
250       xnn_init_f32_minmax_scalar_params);
251   }
252 
spmm80_16x1__neonfma_x2(benchmark::State & state,const char * net)253   static void spmm80_16x1__neonfma_x2(benchmark::State& state, const char* net) {
254     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, 16, 1, 0.8f,
255       xnn_init_f32_minmax_scalar_params);
256   }
257 
spmm80_32x1__neonfma_x2(benchmark::State & state,const char * net)258   static void spmm80_32x1__neonfma_x2(benchmark::State& state, const char* net) {
259     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, 32, 1, 0.8f,
260       xnn_init_f32_minmax_scalar_params);
261   }
262 
spmm80_4x1__neonfma_pipelined(benchmark::State & state,const char * net)263   static void spmm80_4x1__neonfma_pipelined(benchmark::State& state, const char* net) {
264     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, 4, 1, 0.8f,
265       xnn_init_f32_minmax_scalar_params);
266   }
267 
spmm80_8x1__neonfma_pipelined(benchmark::State & state,const char * net)268   static void spmm80_8x1__neonfma_pipelined(benchmark::State& state, const char* net) {
269     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, 8, 1, 0.8f,
270       xnn_init_f32_minmax_scalar_params);
271   }
272 
spmm80_16x1__neonfma_pipelined(benchmark::State & state,const char * net)273   static void spmm80_16x1__neonfma_pipelined(benchmark::State& state, const char* net) {
274     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, 16, 1, 0.8f,
275       xnn_init_f32_minmax_scalar_params);
276   }
277 
spmm80_32x1__neonfma_pipelined(benchmark::State & state,const char * net)278   static void spmm80_32x1__neonfma_pipelined(benchmark::State& state, const char* net) {
279     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, 32, 1, 0.8f,
280       xnn_init_f32_minmax_scalar_params);
281   }
282 
283   BENCHMARK_SPMM(spmm80_4x1__neonfma)
BENCHMARK_SPMM(spmm80_4x1__neonfma_pipelined)284   BENCHMARK_SPMM(spmm80_4x1__neonfma_pipelined)
285   BENCHMARK_SPMM(spmm80_4x1__neonfma_x2)
286   BENCHMARK_SPMM(spmm80_4x2__neonfma)
287   BENCHMARK_SPMM(spmm80_4x4__neonfma)
288   BENCHMARK_SPMM(spmm80_8x1__neonfma)
289   BENCHMARK_SPMM(spmm80_8x1__neonfma_pipelined)
290   BENCHMARK_SPMM(spmm80_8x1__neonfma_x2)
291   BENCHMARK_SPMM(spmm80_8x2__neonfma)
292   BENCHMARK_SPMM(spmm80_8x4__neonfma)
293   BENCHMARK_SPMM(spmm80_12x1__neonfma)
294   BENCHMARK_SPMM(spmm80_12x2__neonfma)
295   BENCHMARK_SPMM(spmm80_12x4__neonfma)
296   BENCHMARK_SPMM(spmm80_16x1__neonfma)
297   BENCHMARK_SPMM(spmm80_16x1__neonfma_pipelined)
298   BENCHMARK_SPMM(spmm80_16x1__neonfma_x2)
299   BENCHMARK_SPMM(spmm80_16x2__neonfma)
300   BENCHMARK_SPMM(spmm80_16x4__neonfma)
301   BENCHMARK_SPMM(spmm80_32x1__neonfma)
302   BENCHMARK_SPMM(spmm80_32x1__neonfma_pipelined)
303   BENCHMARK_SPMM(spmm80_32x1__neonfma_x2)
304   BENCHMARK_SPMM(spmm80_32x2__neonfma)
305   BENCHMARK_SPMM(spmm80_32x4__neonfma)
306 #endif  // XNN_ARCH_ARM64
307 
308 
309 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
310   static void spmm80_4x1__neon(benchmark::State& state, const char* net) {
311     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neon, 4, 1, 0.8f,
312       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
313   }
314 
spmm80_8x1__neon(benchmark::State & state,const char * net)315   static void spmm80_8x1__neon(benchmark::State& state, const char* net) {
316     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neon, 8, 1, 0.8f,
317       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
318   }
319 
spmm80_12x1__neon(benchmark::State & state,const char * net)320   static void spmm80_12x1__neon(benchmark::State& state, const char* net) {
321     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x1__neon, 12, 1, 0.8f,
322       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
323   }
324 
spmm80_16x1__neon(benchmark::State & state,const char * net)325   static void spmm80_16x1__neon(benchmark::State& state, const char* net) {
326     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neon, 16, 1, 0.8f,
327       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
328   }
329 
spmm80_32x1__neon(benchmark::State & state,const char * net)330   static void spmm80_32x1__neon(benchmark::State& state, const char* net) {
331     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neon, 32, 1, 0.8f,
332       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
333   }
334 
spmm80_4x1__neon_x2(benchmark::State & state,const char * net)335   static void spmm80_4x1__neon_x2(benchmark::State& state, const char* net) {
336     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, 4, 1, 0.8f,
337       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
338   }
339 
spmm80_8x1__neon_x2(benchmark::State & state,const char * net)340   static void spmm80_8x1__neon_x2(benchmark::State& state, const char* net) {
341     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, 8, 1, 0.8f,
342       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
343   }
344 
spmm80_16x1__neon_x2(benchmark::State & state,const char * net)345   static void spmm80_16x1__neon_x2(benchmark::State& state, const char* net) {
346     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, 16, 1, 0.8f,
347       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
348   }
349 
spmm80_32x1__neon_x2(benchmark::State & state,const char * net)350   static void spmm80_32x1__neon_x2(benchmark::State& state, const char* net) {
351     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, 32, 1, 0.8f,
352       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
353   }
354 
spmm80_4x1__neon_pipelined(benchmark::State & state,const char * net)355   static void spmm80_4x1__neon_pipelined(benchmark::State& state, const char* net) {
356     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, 4, 1, 0.8f,
357       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
358   }
359 
spmm80_8x1__neon_pipelined(benchmark::State & state,const char * net)360   static void spmm80_8x1__neon_pipelined(benchmark::State& state, const char* net) {
361     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, 8, 1, 0.8f,
362       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
363   }
364 
spmm80_16x1__neon_pipelined(benchmark::State & state,const char * net)365   static void spmm80_16x1__neon_pipelined(benchmark::State& state, const char* net) {
366     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, 16, 1, 0.8f,
367       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
368   }
369 
spmm80_32x1__neon_pipelined(benchmark::State & state,const char * net)370   static void spmm80_32x1__neon_pipelined(benchmark::State& state, const char* net) {
371     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, 32, 1, 0.8f,
372       xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
373   }
374 
375   BENCHMARK_SPMM(spmm80_4x1__neon)
BENCHMARK_SPMM(spmm80_4x1__neon_pipelined)376   BENCHMARK_SPMM(spmm80_4x1__neon_pipelined)
377   BENCHMARK_SPMM(spmm80_4x1__neon_x2)
378   BENCHMARK_SPMM(spmm80_8x1__neon)
379   BENCHMARK_SPMM(spmm80_8x1__neon_pipelined)
380   BENCHMARK_SPMM(spmm80_8x1__neon_x2)
381   BENCHMARK_SPMM(spmm80_12x1__neon)
382   BENCHMARK_SPMM(spmm80_16x1__neon)
383   BENCHMARK_SPMM(spmm80_16x1__neon_pipelined)
384   BENCHMARK_SPMM(spmm80_16x1__neon_x2)
385   BENCHMARK_SPMM(spmm80_32x1__neon)
386   BENCHMARK_SPMM(spmm80_32x1__neon_pipelined)
387   BENCHMARK_SPMM(spmm80_32x1__neon_x2)
388 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
389 
390 
391 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
392   static void spmm80_4x1__sse(benchmark::State& state, const char* net) {
393     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__sse, 4, 1, 0.8f,
394       xnn_init_f32_minmax_sse_params);
395   }
396 
spmm80_8x1__sse(benchmark::State & state,const char * net)397   static void spmm80_8x1__sse(benchmark::State& state, const char* net) {
398     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__sse, 8, 1, 0.8f,
399       xnn_init_f32_minmax_sse_params);
400   }
401 
spmm80_16x1__sse(benchmark::State & state,const char * net)402   static void spmm80_16x1__sse(benchmark::State& state, const char* net) {
403     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__sse, 16, 1, 0.8f,
404       xnn_init_f32_minmax_sse_params);
405   }
406 
spmm80_32x1__sse(benchmark::State & state,const char * net)407   static void spmm80_32x1__sse(benchmark::State& state, const char* net) {
408     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__sse, 32, 1, 0.8f,
409       xnn_init_f32_minmax_sse_params);
410   }
411 
412   BENCHMARK_SPMM(spmm80_4x1__sse)
BENCHMARK_SPMM(spmm80_8x1__sse)413   BENCHMARK_SPMM(spmm80_8x1__sse)
414   BENCHMARK_SPMM(spmm80_16x1__sse)
415   BENCHMARK_SPMM(spmm80_32x1__sse)
416 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
417 
418 
419 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
420   static void spmm80_4x1__wasmsimd_arm(benchmark::State& state, const char* net) {
421     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, 4, 1, 0.8f,
422       xnn_init_f32_minmax_wasmsimd_params);
423   }
424 
spmm80_8x1__wasmsimd_arm(benchmark::State & state,const char * net)425   static void spmm80_8x1__wasmsimd_arm(benchmark::State& state, const char* net) {
426     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, 8, 1, 0.8f,
427       xnn_init_f32_minmax_wasmsimd_params);
428   }
429 
spmm80_16x1__wasmsimd_arm(benchmark::State & state,const char * net)430   static void spmm80_16x1__wasmsimd_arm(benchmark::State& state, const char* net) {
431     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, 16, 1, 0.8f,
432       xnn_init_f32_minmax_wasmsimd_params);
433   }
434 
spmm80_32x1__wasmsimd_arm(benchmark::State & state,const char * net)435   static void spmm80_32x1__wasmsimd_arm(benchmark::State& state, const char* net) {
436     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, 32, 1, 0.8f,
437       xnn_init_f32_minmax_wasmsimd_params);
438   }
439 
spmm80_4x1__wasmsimd_x86(benchmark::State & state,const char * net)440   static void spmm80_4x1__wasmsimd_x86(benchmark::State& state, const char* net) {
441     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, 4, 1, 0.8f,
442       xnn_init_f32_minmax_wasmsimd_params);
443   }
444 
spmm80_8x1__wasmsimd_x86(benchmark::State & state,const char * net)445   static void spmm80_8x1__wasmsimd_x86(benchmark::State& state, const char* net) {
446     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, 8, 1, 0.8f,
447       xnn_init_f32_minmax_wasmsimd_params);
448   }
449 
spmm80_16x1__wasmsimd_x86(benchmark::State & state,const char * net)450   static void spmm80_16x1__wasmsimd_x86(benchmark::State& state, const char* net) {
451     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, 16, 1, 0.8f,
452       xnn_init_f32_minmax_wasmsimd_params);
453   }
454 
spmm80_32x1__wasmsimd_x86(benchmark::State & state,const char * net)455   static void spmm80_32x1__wasmsimd_x86(benchmark::State& state, const char* net) {
456     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, 32, 1, 0.8f,
457       xnn_init_f32_minmax_wasmsimd_params);
458   }
459 
spmm80_32x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)460   static void spmm80_32x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
461     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, 32, 1, 0.8f,
462       xnn_init_f32_minmax_wasmsimd_params);
463   }
464 
spmm80_4x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)465   static void spmm80_4x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
466     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, 4, 1, 0.8f,
467       xnn_init_f32_minmax_wasmsimd_params);
468   }
469 
spmm80_8x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)470   static void spmm80_8x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
471     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, 8, 1, 0.8f,
472       xnn_init_f32_minmax_wasmsimd_params);
473   }
474 
spmm80_16x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)475   static void spmm80_16x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
476     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, 16, 1, 0.8f,
477       xnn_init_f32_minmax_wasmsimd_params);
478   }
479 
spmm80_32x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)480   static void spmm80_32x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
481     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, 32, 1, 0.8f,
482       xnn_init_f32_minmax_wasmsimd_params);
483   }
484 
spmm80_4x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)485   static void spmm80_4x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
486     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, 4, 1, 0.8f,
487       xnn_init_f32_minmax_wasmsimd_params);
488   }
489 
spmm80_8x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)490   static void spmm80_8x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
491     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, 8, 1, 0.8f,
492       xnn_init_f32_minmax_wasmsimd_params);
493   }
494 
spmm80_16x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)495   static void spmm80_16x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
496     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, 16, 1, 0.8f,
497       xnn_init_f32_minmax_wasmsimd_params);
498   }
499 
spmm80_32x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)500   static void spmm80_32x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
501     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, 32, 1, 0.8f,
502       xnn_init_f32_minmax_wasmsimd_params);
503   }
504 
spmm80_4x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)505   static void spmm80_4x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
506     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, 4, 1, 0.8f,
507       xnn_init_f32_minmax_wasmsimd_params);
508   }
509 
spmm80_8x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)510   static void spmm80_8x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
511     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, 8, 1, 0.8f,
512       xnn_init_f32_minmax_wasmsimd_params);
513   }
514 
spmm80_16x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)515   static void spmm80_16x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
516     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, 16, 1, 0.8f,
517       xnn_init_f32_minmax_wasmsimd_params);
518   }
519 
spmm80_32x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)520   static void spmm80_32x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
521     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, 32, 1, 0.8f,
522       xnn_init_f32_minmax_wasmsimd_params);
523   }
524 
spmm80_4x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)525   static void spmm80_4x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
526     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, 4, 1, 0.8f,
527       xnn_init_f32_minmax_wasmsimd_params);
528   }
529 
spmm80_8x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)530   static void spmm80_8x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
531     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, 8, 1, 0.8f,
532       xnn_init_f32_minmax_wasmsimd_params);
533   }
534 
spmm80_16x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)535   static void spmm80_16x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
536     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, 16, 1, 0.8f,
537       xnn_init_f32_minmax_wasmsimd_params);
538   }
spmm80_4x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)539   static void spmm80_4x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
540     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, 4, 1, 0.8f,
541       xnn_init_f32_minmax_wasmsimd_params);
542   }
543 
spmm80_8x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)544   static void spmm80_8x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
545     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, 8, 1, 0.8f,
546       xnn_init_f32_minmax_wasmsimd_params);
547   }
548 
spmm80_16x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)549   static void spmm80_16x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
550     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, 16, 1, 0.8f,
551       xnn_init_f32_minmax_wasmsimd_params);
552   }
553 
spmm80_32x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)554   static void spmm80_32x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
555     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, 32, 1, 0.8f,
556       xnn_init_f32_minmax_wasmsimd_params);
557   }
558 
spmm80_4x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)559   static void spmm80_4x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
560     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, 4, 1, 0.8f,
561       xnn_init_f32_minmax_wasmsimd_params);
562   }
563 
spmm80_8x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)564   static void spmm80_8x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
565     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, 8, 1, 0.8f,
566       xnn_init_f32_minmax_wasmsimd_params);
567   }
568 
spmm80_16x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)569   static void spmm80_16x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
570     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, 16, 1, 0.8f,
571       xnn_init_f32_minmax_wasmsimd_params);
572   }
573 
spmm80_32x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)574   static void spmm80_32x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
575     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, 32, 1, 0.8f,
576       xnn_init_f32_minmax_wasmsimd_params);
577   }
578 
spmm80_32x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)579   static void spmm80_32x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
580     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, 32, 1, 0.8f,
581       xnn_init_f32_minmax_wasmsimd_params);
582   }
583 
spmm80_4x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)584   static void spmm80_4x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
585     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, 4, 1, 0.8f,
586       xnn_init_f32_minmax_wasmsimd_params);
587   }
588 
spmm80_8x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)589   static void spmm80_8x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
590     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, 8, 1, 0.8f,
591       xnn_init_f32_minmax_wasmsimd_params);
592   }
593 
spmm80_16x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)594   static void spmm80_16x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
595     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, 16, 1, 0.8f,
596       xnn_init_f32_minmax_wasmsimd_params);
597   }
598 
spmm80_32x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)599   static void spmm80_32x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
600     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, 32, 1, 0.8f,
601       xnn_init_f32_minmax_wasmsimd_params);
602   }
603 
spmm80_4x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)604   static void spmm80_4x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
605     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, 4, 1, 0.8f,
606       xnn_init_f32_minmax_wasmsimd_params);
607   }
608 
spmm80_8x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)609   static void spmm80_8x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
610     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, 8, 1, 0.8f,
611       xnn_init_f32_minmax_wasmsimd_params);
612   }
613 
spmm80_16x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)614   static void spmm80_16x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
615     f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, 16, 1, 0.8f,
616       xnn_init_f32_minmax_wasmsimd_params);
617   }
618 
619   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm)
BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_x2)620   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_x2)
621   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_x4)
622   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86)
623   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_x2)
624   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_x4)
625   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm)
626   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_x2)
627   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_x4)
628   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86)
629   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_x2)
630   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_x4)
631   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm)
632   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_x2)
633   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_x4)
634   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86)
635   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_x2)
636   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_x4)
637   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm)
638   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_x2)
639   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_x4)
640   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86)
641   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_x2)
642   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_x4)
643   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_pipelined)
644   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_pipelined_x2)
645   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_pipelined)
646   BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_pipelined_x2)
647   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_pipelined)
648   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_pipelined_x2)
649   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_pipelined)
650   BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_pipelined_x2)
651   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_pipelined)
652   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_pipelined_x2)
653   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_pipelined)
654   BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_pipelined_x2)
655   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_pipelined)
656   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_pipelined_x2)
657   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_pipelined)
658   BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_pipelined_x2)
659 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
660 
661 
662 static void spmm80_1x1__scalar(benchmark::State& state, const char* net) {
663   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_1x1__scalar, 1, 1, 0.8f,
664     xnn_init_f32_minmax_scalar_params);
665 }
666 
spmm80_2x1__scalar(benchmark::State & state,const char * net)667 static void spmm80_2x1__scalar(benchmark::State& state, const char* net) {
668   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_2x1__scalar, 2, 1, 0.8f,
669     xnn_init_f32_minmax_scalar_params);
670 }
671 
spmm80_4x1__scalar(benchmark::State & state,const char * net)672 static void spmm80_4x1__scalar(benchmark::State& state, const char* net) {
673   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__scalar, 4, 1, 0.8f,
674     xnn_init_f32_minmax_scalar_params);
675 }
676 
spmm80_8x1__scalar(benchmark::State & state,const char * net)677 static void spmm80_8x1__scalar(benchmark::State& state, const char* net) {
678   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__scalar, 8, 1, 0.8f,
679     xnn_init_f32_minmax_scalar_params);
680 }
681 
spmm80_8x2__scalar(benchmark::State & state,const char * net)682 static void spmm80_8x2__scalar(benchmark::State& state, const char* net) {
683   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x2__scalar, 8, 2, 0.8f,
684     xnn_init_f32_minmax_scalar_params);
685 }
686 
spmm80_8x4__scalar(benchmark::State & state,const char * net)687 static void spmm80_8x4__scalar(benchmark::State& state, const char* net) {
688   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x4__scalar, 8, 4, 0.8f,
689     xnn_init_f32_minmax_scalar_params);
690 }
691 
spmm80_1x1__scalar_pipelined(benchmark::State & state,const char * net)692 static void spmm80_1x1__scalar_pipelined(benchmark::State& state, const char* net) {
693   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, 1, 1, 0.8f,
694     xnn_init_f32_minmax_scalar_params);
695 }
696 
spmm80_2x1__scalar_pipelined(benchmark::State & state,const char * net)697 static void spmm80_2x1__scalar_pipelined(benchmark::State& state, const char* net) {
698   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, 2, 1, 0.8f,
699     xnn_init_f32_minmax_scalar_params);
700 }
701 
spmm80_4x1__scalar_pipelined(benchmark::State & state,const char * net)702 static void spmm80_4x1__scalar_pipelined(benchmark::State& state, const char* net) {
703   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, 4, 1, 0.8f,
704     xnn_init_f32_minmax_scalar_params);
705 }
706 
spmm80_8x1__scalar_pipelined(benchmark::State & state,const char * net)707 static void spmm80_8x1__scalar_pipelined(benchmark::State& state, const char* net) {
708   f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, 8, 1, 0.8f,
709     xnn_init_f32_minmax_scalar_params);
710 }
711 
712 BENCHMARK_SPMM(spmm80_1x1__scalar)
713 BENCHMARK_SPMM(spmm80_2x1__scalar)
714 BENCHMARK_SPMM(spmm80_4x1__scalar)
715 BENCHMARK_SPMM(spmm80_8x1__scalar)
716 BENCHMARK_SPMM(spmm80_8x2__scalar)
717 BENCHMARK_SPMM(spmm80_8x4__scalar)
718 BENCHMARK_SPMM(spmm80_1x1__scalar_pipelined)
719 BENCHMARK_SPMM(spmm80_2x1__scalar_pipelined)
720 BENCHMARK_SPMM(spmm80_4x1__scalar_pipelined)
721 BENCHMARK_SPMM(spmm80_8x1__scalar_pipelined)
722 
723 
724 #ifndef XNNPACK_BENCHMARK_NO_MAIN
725 BENCHMARK_MAIN();
726 #endif
727