1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cfloat>
8 #include <cmath>
9 #include <functional>
10 #include <random>
11 #include <vector>
12
13 #include <benchmark/benchmark.h>
14 #include "bench/spmm.h"
15 #include "bench/utils.h"
16
17 #include <xnnpack.h>
18 #include <xnnpack/aligned-allocator.h>
19 #include <xnnpack/common.h>
20 #include <xnnpack/microfnptr.h>
21 #include <xnnpack/microparams-init.h>
22 #include <xnnpack/spmm.h>
23
24
f32_spmm(benchmark::State & state,xnn_f32_spmm_minmax_ukernel_function spmm,uint32_t mr,uint32_t nr,float sparsity,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)25 static void f32_spmm(benchmark::State& state,
26 xnn_f32_spmm_minmax_ukernel_function spmm, uint32_t mr, uint32_t nr, float sparsity,
27 xnn_init_f32_minmax_params_fn init_params,
28 benchmark::utils::IsaCheckFunction isa_check = nullptr)
29 {
30 if (isa_check && !isa_check(state)) {
31 return;
32 }
33
34 const size_t mc = state.range(0);
35 const size_t nc = state.range(1);
36 const size_t kc = state.range(2);
37
38 std::random_device random_device;
39 auto rng = std::mt19937(random_device());
40 auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
41
42 // if using blocks, generate the reduced matrix first and then extrude along
43 // the block dimension (n), to get the full matrix
44 size_t ncols = nc / nr + nc % nr;
45 std::vector<float> b(ncols * kc);
46 std::vector<float> bias(nc);
47 std::vector<float> w;
48 std::vector<uint32_t> nmap;
49 std::vector<int32_t> dmap;
50 const size_t sparse_end = std::min(size_t(float(b.size()) * sparsity), b.size());
51 const size_t num_nonzeroes = nr * (b.size() - sparse_end);
52
53 const size_t w_elements = num_nonzeroes + nc;
54 const size_t c_elements = mc * nc;
55 const size_t dmap_elements = num_nonzeroes / nr;
56 const size_t nmap_elements = nc;
57 const size_t num_buffers = 1 +
58 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
59 sizeof(float) * (w_elements + c_elements) + sizeof(uint32_t) * (dmap_elements + nmap_elements));
60
61 // Micro-kernel can access one element beyond w and dmap for software pipelining.
62 w.reserve(num_buffers * w_elements + 1);
63 dmap.reserve(num_buffers * dmap_elements + 1);
64 nmap.resize(num_buffers * nmap_elements);
65
66 std::vector<size_t> a_offsets(num_buffers);
67
68 for (size_t buffer_index = 0; buffer_index < num_buffers; buffer_index++) {
69 // Re-generate weights. Note: each re-generation produces the number of non-zeroes.
70 std::fill(b.begin(), b.begin() + sparse_end, 0.0f);
71 std::generate(b.begin() + sparse_end, b.end(), std::ref(f32rng));
72 std::shuffle(b.begin(), b.end(), rng);
73 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
74
75 uint32_t first_j = 0, last_j = 0;
76 bool is_first_nonzero = true;
77 for (uint32_t i = 0; i < nc / nr; i++) {
78 for (uint32_t n = 0; n < nr; n++)
79 w.push_back(bias[nr * i + n]);
80 for (uint32_t j = 0; j < kc; j++) {
81 if (b[i * kc + j] != 0.0f) {
82 for (size_t l = 0; l < nr; l++)
83 w.push_back(b[i * kc + j] + static_cast<float>(i));
84 if (is_first_nonzero) {
85 first_j = j;
86 } else {
87 const ptrdiff_t increment = int32_t(j - last_j) * int32_t(mc) * int32_t(sizeof(float));
88 dmap.push_back(increment);
89 }
90 last_j = j;
91 is_first_nonzero = false;
92 nmap[buffer_index * nmap_elements + i] += 1;
93 }
94 }
95 }
96 for (uint32_t i = nc / nr; i < ncols; i++) {
97 w.push_back(bias[i]);
98 for (uint32_t j = 0; j < kc; j++) {
99 if (b[i * kc + j] != 0.0f) {
100 w.push_back(b[i * kc + j]);
101 if (is_first_nonzero) {
102 first_j = j;
103 } else {
104 const ptrdiff_t increment = int32_t(j - last_j) * int32_t(mc) * int32_t(sizeof(float));
105 dmap.push_back(increment);
106 }
107 last_j = j;
108 is_first_nonzero = false;
109 nmap[buffer_index * nmap_elements + i] += 1;
110 }
111 }
112 }
113 {
114 const ptrdiff_t increment = int32_t(first_j - last_j) * int32_t(mc) * int32_t(sizeof(float));
115 dmap.push_back(increment);
116 }
117
118 a_offsets[buffer_index] = first_j * mc;
119 }
120
121 // Micro-kernel can access one element beyond w and dmap for software pipelining.
122 w.resize(w.size() + 1);
123 dmap.resize(dmap.size() + 1);
124
125 std::vector<float, AlignedAllocator<float, 64>> a(kc * mc);
126 std::vector<float, AlignedAllocator<float, 64>> c(num_buffers * c_elements);
127
128 std::generate(a.begin(), a.end(), std::ref(f32rng));
129 std::fill(c.begin(), c.end(), nanf(""));
130
131 xnn_f32_minmax_params params;
132 init_params(¶ms, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
133
134 size_t buffer_index = 0;
135 for (auto _ : state) {
136 // Use circular buffers (exceeding cache size) and prefetch to control cache state:
137 // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
138 // - W, Kmap, and Nmap is not in cache (for any cache level)
139 // - C is not in cache (for any cache level)
140 state.PauseTiming();
141 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
142 buffer_index = (buffer_index + 1) % num_buffers;
143 state.ResumeTiming();
144
145 spmm(mc * sizeof(float), nc,
146 a.data() + a_offsets[buffer_index],
147 w.data() + buffer_index * w_elements,
148 dmap.data() + buffer_index * dmap_elements,
149 nmap.data() + buffer_index * nmap_elements,
150 c.data() + buffer_index * c_elements, mc * sizeof(float),
151 ¶ms);
152 }
153
154 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
155 if (cpu_frequency != 0) {
156 state.counters["cpufreq"] = cpu_frequency;
157 }
158
159 state.counters["FLOPS"] = benchmark::Counter(
160 uint64_t(state.iterations()) * 2 * mc * num_nonzeroes, benchmark::Counter::kIsRate);
161
162 state.counters["EffFLOPS"] = benchmark::Counter(
163 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
164 }
165
166
167 #if XNN_ARCH_ARM64
spmm80_4x1__neonfma(benchmark::State & state,const char * net)168 static void spmm80_4x1__neonfma(benchmark::State& state, const char* net) {
169 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neonfma, 4, 1, 0.8f,
170 xnn_init_f32_minmax_scalar_params);
171 }
172
spmm80_4x2__neonfma(benchmark::State & state,const char * net)173 static void spmm80_4x2__neonfma(benchmark::State& state, const char* net) {
174 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x2__neonfma, 4, 2, 0.8f,
175 xnn_init_f32_minmax_scalar_params);
176 }
177
spmm80_4x4__neonfma(benchmark::State & state,const char * net)178 static void spmm80_4x4__neonfma(benchmark::State& state, const char* net) {
179 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x4__neonfma, 4, 4, 0.8f,
180 xnn_init_f32_minmax_scalar_params);
181 }
182
spmm80_8x1__neonfma(benchmark::State & state,const char * net)183 static void spmm80_8x1__neonfma(benchmark::State& state, const char* net) {
184 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neonfma, 8, 1, 0.8f,
185 xnn_init_f32_minmax_scalar_params);
186 }
187
spmm80_8x2__neonfma(benchmark::State & state,const char * net)188 static void spmm80_8x2__neonfma(benchmark::State& state, const char* net) {
189 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x2__neonfma, 8, 2, 0.8f,
190 xnn_init_f32_minmax_scalar_params);
191 }
192
spmm80_8x4__neonfma(benchmark::State & state,const char * net)193 static void spmm80_8x4__neonfma(benchmark::State& state, const char* net) {
194 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x4__neonfma, 8, 4, 0.8f,
195 xnn_init_f32_minmax_scalar_params);
196 }
197
spmm80_12x1__neonfma(benchmark::State & state,const char * net)198 static void spmm80_12x1__neonfma(benchmark::State& state, const char* net) {
199 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x1__neonfma, 12, 1, 0.8f,
200 xnn_init_f32_minmax_scalar_params);
201 }
202
spmm80_12x2__neonfma(benchmark::State & state,const char * net)203 static void spmm80_12x2__neonfma(benchmark::State& state, const char* net) {
204 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x2__neonfma, 12, 2, 0.8f,
205 xnn_init_f32_minmax_scalar_params);
206 }
207
spmm80_12x4__neonfma(benchmark::State & state,const char * net)208 static void spmm80_12x4__neonfma(benchmark::State& state, const char* net) {
209 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x4__neonfma, 12, 4, 0.8f,
210 xnn_init_f32_minmax_scalar_params);
211 }
212
spmm80_16x1__neonfma(benchmark::State & state,const char * net)213 static void spmm80_16x1__neonfma(benchmark::State& state, const char* net) {
214 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neonfma, 16, 1, 0.8f,
215 xnn_init_f32_minmax_scalar_params);
216 }
217
spmm80_16x2__neonfma(benchmark::State & state,const char * net)218 static void spmm80_16x2__neonfma(benchmark::State& state, const char* net) {
219 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x2__neonfma, 16, 2, 0.8f,
220 xnn_init_f32_minmax_scalar_params);
221 }
222
spmm80_16x4__neonfma(benchmark::State & state,const char * net)223 static void spmm80_16x4__neonfma(benchmark::State& state, const char* net) {
224 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x4__neonfma, 16, 4, 0.8f,
225 xnn_init_f32_minmax_scalar_params);
226 }
227
spmm80_32x1__neonfma(benchmark::State & state,const char * net)228 static void spmm80_32x1__neonfma(benchmark::State& state, const char* net) {
229 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neonfma, 32, 1, 0.8f,
230 xnn_init_f32_minmax_scalar_params);
231 }
232
spmm80_32x2__neonfma(benchmark::State & state,const char * net)233 static void spmm80_32x2__neonfma(benchmark::State& state, const char* net) {
234 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x2__neonfma, 32, 2, 0.8f,
235 xnn_init_f32_minmax_scalar_params);
236 }
237
spmm80_32x4__neonfma(benchmark::State & state,const char * net)238 static void spmm80_32x4__neonfma(benchmark::State& state, const char* net) {
239 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x4__neonfma, 32, 4, 0.8f,
240 xnn_init_f32_minmax_scalar_params);
241 }
242
spmm80_4x1__neonfma_x2(benchmark::State & state,const char * net)243 static void spmm80_4x1__neonfma_x2(benchmark::State& state, const char* net) {
244 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2, 4, 1, 0.8f,
245 xnn_init_f32_minmax_scalar_params);
246 }
247
spmm80_8x1__neonfma_x2(benchmark::State & state,const char * net)248 static void spmm80_8x1__neonfma_x2(benchmark::State& state, const char* net) {
249 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2, 8, 1, 0.8f,
250 xnn_init_f32_minmax_scalar_params);
251 }
252
spmm80_16x1__neonfma_x2(benchmark::State & state,const char * net)253 static void spmm80_16x1__neonfma_x2(benchmark::State& state, const char* net) {
254 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2, 16, 1, 0.8f,
255 xnn_init_f32_minmax_scalar_params);
256 }
257
spmm80_32x1__neonfma_x2(benchmark::State & state,const char * net)258 static void spmm80_32x1__neonfma_x2(benchmark::State& state, const char* net) {
259 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2, 32, 1, 0.8f,
260 xnn_init_f32_minmax_scalar_params);
261 }
262
spmm80_4x1__neonfma_pipelined(benchmark::State & state,const char * net)263 static void spmm80_4x1__neonfma_pipelined(benchmark::State& state, const char* net) {
264 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined, 4, 1, 0.8f,
265 xnn_init_f32_minmax_scalar_params);
266 }
267
spmm80_8x1__neonfma_pipelined(benchmark::State & state,const char * net)268 static void spmm80_8x1__neonfma_pipelined(benchmark::State& state, const char* net) {
269 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined, 8, 1, 0.8f,
270 xnn_init_f32_minmax_scalar_params);
271 }
272
spmm80_16x1__neonfma_pipelined(benchmark::State & state,const char * net)273 static void spmm80_16x1__neonfma_pipelined(benchmark::State& state, const char* net) {
274 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined, 16, 1, 0.8f,
275 xnn_init_f32_minmax_scalar_params);
276 }
277
spmm80_32x1__neonfma_pipelined(benchmark::State & state,const char * net)278 static void spmm80_32x1__neonfma_pipelined(benchmark::State& state, const char* net) {
279 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined, 32, 1, 0.8f,
280 xnn_init_f32_minmax_scalar_params);
281 }
282
283 BENCHMARK_SPMM(spmm80_4x1__neonfma)
BENCHMARK_SPMM(spmm80_4x1__neonfma_pipelined)284 BENCHMARK_SPMM(spmm80_4x1__neonfma_pipelined)
285 BENCHMARK_SPMM(spmm80_4x1__neonfma_x2)
286 BENCHMARK_SPMM(spmm80_4x2__neonfma)
287 BENCHMARK_SPMM(spmm80_4x4__neonfma)
288 BENCHMARK_SPMM(spmm80_8x1__neonfma)
289 BENCHMARK_SPMM(spmm80_8x1__neonfma_pipelined)
290 BENCHMARK_SPMM(spmm80_8x1__neonfma_x2)
291 BENCHMARK_SPMM(spmm80_8x2__neonfma)
292 BENCHMARK_SPMM(spmm80_8x4__neonfma)
293 BENCHMARK_SPMM(spmm80_12x1__neonfma)
294 BENCHMARK_SPMM(spmm80_12x2__neonfma)
295 BENCHMARK_SPMM(spmm80_12x4__neonfma)
296 BENCHMARK_SPMM(spmm80_16x1__neonfma)
297 BENCHMARK_SPMM(spmm80_16x1__neonfma_pipelined)
298 BENCHMARK_SPMM(spmm80_16x1__neonfma_x2)
299 BENCHMARK_SPMM(spmm80_16x2__neonfma)
300 BENCHMARK_SPMM(spmm80_16x4__neonfma)
301 BENCHMARK_SPMM(spmm80_32x1__neonfma)
302 BENCHMARK_SPMM(spmm80_32x1__neonfma_pipelined)
303 BENCHMARK_SPMM(spmm80_32x1__neonfma_x2)
304 BENCHMARK_SPMM(spmm80_32x2__neonfma)
305 BENCHMARK_SPMM(spmm80_32x4__neonfma)
306 #endif // XNN_ARCH_ARM64
307
308
309 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
310 static void spmm80_4x1__neon(benchmark::State& state, const char* net) {
311 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neon, 4, 1, 0.8f,
312 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
313 }
314
spmm80_8x1__neon(benchmark::State & state,const char * net)315 static void spmm80_8x1__neon(benchmark::State& state, const char* net) {
316 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neon, 8, 1, 0.8f,
317 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
318 }
319
spmm80_12x1__neon(benchmark::State & state,const char * net)320 static void spmm80_12x1__neon(benchmark::State& state, const char* net) {
321 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_12x1__neon, 12, 1, 0.8f,
322 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
323 }
324
spmm80_16x1__neon(benchmark::State & state,const char * net)325 static void spmm80_16x1__neon(benchmark::State& state, const char* net) {
326 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neon, 16, 1, 0.8f,
327 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
328 }
329
spmm80_32x1__neon(benchmark::State & state,const char * net)330 static void spmm80_32x1__neon(benchmark::State& state, const char* net) {
331 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neon, 32, 1, 0.8f,
332 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
333 }
334
spmm80_4x1__neon_x2(benchmark::State & state,const char * net)335 static void spmm80_4x1__neon_x2(benchmark::State& state, const char* net) {
336 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neon_x2, 4, 1, 0.8f,
337 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
338 }
339
spmm80_8x1__neon_x2(benchmark::State & state,const char * net)340 static void spmm80_8x1__neon_x2(benchmark::State& state, const char* net) {
341 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neon_x2, 8, 1, 0.8f,
342 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
343 }
344
spmm80_16x1__neon_x2(benchmark::State & state,const char * net)345 static void spmm80_16x1__neon_x2(benchmark::State& state, const char* net) {
346 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neon_x2, 16, 1, 0.8f,
347 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
348 }
349
spmm80_32x1__neon_x2(benchmark::State & state,const char * net)350 static void spmm80_32x1__neon_x2(benchmark::State& state, const char* net) {
351 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neon_x2, 32, 1, 0.8f,
352 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
353 }
354
spmm80_4x1__neon_pipelined(benchmark::State & state,const char * net)355 static void spmm80_4x1__neon_pipelined(benchmark::State& state, const char* net) {
356 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined, 4, 1, 0.8f,
357 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
358 }
359
spmm80_8x1__neon_pipelined(benchmark::State & state,const char * net)360 static void spmm80_8x1__neon_pipelined(benchmark::State& state, const char* net) {
361 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined, 8, 1, 0.8f,
362 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
363 }
364
spmm80_16x1__neon_pipelined(benchmark::State & state,const char * net)365 static void spmm80_16x1__neon_pipelined(benchmark::State& state, const char* net) {
366 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined, 16, 1, 0.8f,
367 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
368 }
369
spmm80_32x1__neon_pipelined(benchmark::State & state,const char * net)370 static void spmm80_32x1__neon_pipelined(benchmark::State& state, const char* net) {
371 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined, 32, 1, 0.8f,
372 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
373 }
374
375 BENCHMARK_SPMM(spmm80_4x1__neon)
BENCHMARK_SPMM(spmm80_4x1__neon_pipelined)376 BENCHMARK_SPMM(spmm80_4x1__neon_pipelined)
377 BENCHMARK_SPMM(spmm80_4x1__neon_x2)
378 BENCHMARK_SPMM(spmm80_8x1__neon)
379 BENCHMARK_SPMM(spmm80_8x1__neon_pipelined)
380 BENCHMARK_SPMM(spmm80_8x1__neon_x2)
381 BENCHMARK_SPMM(spmm80_12x1__neon)
382 BENCHMARK_SPMM(spmm80_16x1__neon)
383 BENCHMARK_SPMM(spmm80_16x1__neon_pipelined)
384 BENCHMARK_SPMM(spmm80_16x1__neon_x2)
385 BENCHMARK_SPMM(spmm80_32x1__neon)
386 BENCHMARK_SPMM(spmm80_32x1__neon_pipelined)
387 BENCHMARK_SPMM(spmm80_32x1__neon_x2)
388 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
389
390
391 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
392 static void spmm80_4x1__sse(benchmark::State& state, const char* net) {
393 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__sse, 4, 1, 0.8f,
394 xnn_init_f32_minmax_sse_params);
395 }
396
spmm80_8x1__sse(benchmark::State & state,const char * net)397 static void spmm80_8x1__sse(benchmark::State& state, const char* net) {
398 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__sse, 8, 1, 0.8f,
399 xnn_init_f32_minmax_sse_params);
400 }
401
spmm80_16x1__sse(benchmark::State & state,const char * net)402 static void spmm80_16x1__sse(benchmark::State& state, const char* net) {
403 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__sse, 16, 1, 0.8f,
404 xnn_init_f32_minmax_sse_params);
405 }
406
spmm80_32x1__sse(benchmark::State & state,const char * net)407 static void spmm80_32x1__sse(benchmark::State& state, const char* net) {
408 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__sse, 32, 1, 0.8f,
409 xnn_init_f32_minmax_sse_params);
410 }
411
412 BENCHMARK_SPMM(spmm80_4x1__sse)
BENCHMARK_SPMM(spmm80_8x1__sse)413 BENCHMARK_SPMM(spmm80_8x1__sse)
414 BENCHMARK_SPMM(spmm80_16x1__sse)
415 BENCHMARK_SPMM(spmm80_32x1__sse)
416 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
417
418
419 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
420 static void spmm80_4x1__wasmsimd_arm(benchmark::State& state, const char* net) {
421 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm, 4, 1, 0.8f,
422 xnn_init_f32_minmax_wasmsimd_params);
423 }
424
spmm80_8x1__wasmsimd_arm(benchmark::State & state,const char * net)425 static void spmm80_8x1__wasmsimd_arm(benchmark::State& state, const char* net) {
426 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm, 8, 1, 0.8f,
427 xnn_init_f32_minmax_wasmsimd_params);
428 }
429
spmm80_16x1__wasmsimd_arm(benchmark::State & state,const char * net)430 static void spmm80_16x1__wasmsimd_arm(benchmark::State& state, const char* net) {
431 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm, 16, 1, 0.8f,
432 xnn_init_f32_minmax_wasmsimd_params);
433 }
434
spmm80_32x1__wasmsimd_arm(benchmark::State & state,const char * net)435 static void spmm80_32x1__wasmsimd_arm(benchmark::State& state, const char* net) {
436 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm, 32, 1, 0.8f,
437 xnn_init_f32_minmax_wasmsimd_params);
438 }
439
spmm80_4x1__wasmsimd_x86(benchmark::State & state,const char * net)440 static void spmm80_4x1__wasmsimd_x86(benchmark::State& state, const char* net) {
441 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86, 4, 1, 0.8f,
442 xnn_init_f32_minmax_wasmsimd_params);
443 }
444
spmm80_8x1__wasmsimd_x86(benchmark::State & state,const char * net)445 static void spmm80_8x1__wasmsimd_x86(benchmark::State& state, const char* net) {
446 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86, 8, 1, 0.8f,
447 xnn_init_f32_minmax_wasmsimd_params);
448 }
449
spmm80_16x1__wasmsimd_x86(benchmark::State & state,const char * net)450 static void spmm80_16x1__wasmsimd_x86(benchmark::State& state, const char* net) {
451 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86, 16, 1, 0.8f,
452 xnn_init_f32_minmax_wasmsimd_params);
453 }
454
spmm80_32x1__wasmsimd_x86(benchmark::State & state,const char * net)455 static void spmm80_32x1__wasmsimd_x86(benchmark::State& state, const char* net) {
456 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86, 32, 1, 0.8f,
457 xnn_init_f32_minmax_wasmsimd_params);
458 }
459
spmm80_32x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)460 static void spmm80_32x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
461 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2, 32, 1, 0.8f,
462 xnn_init_f32_minmax_wasmsimd_params);
463 }
464
spmm80_4x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)465 static void spmm80_4x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
466 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2, 4, 1, 0.8f,
467 xnn_init_f32_minmax_wasmsimd_params);
468 }
469
spmm80_8x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)470 static void spmm80_8x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
471 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2, 8, 1, 0.8f,
472 xnn_init_f32_minmax_wasmsimd_params);
473 }
474
spmm80_16x1__wasmsimd_arm_x2(benchmark::State & state,const char * net)475 static void spmm80_16x1__wasmsimd_arm_x2(benchmark::State& state, const char* net) {
476 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2, 16, 1, 0.8f,
477 xnn_init_f32_minmax_wasmsimd_params);
478 }
479
spmm80_32x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)480 static void spmm80_32x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
481 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2, 32, 1, 0.8f,
482 xnn_init_f32_minmax_wasmsimd_params);
483 }
484
spmm80_4x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)485 static void spmm80_4x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
486 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2, 4, 1, 0.8f,
487 xnn_init_f32_minmax_wasmsimd_params);
488 }
489
spmm80_8x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)490 static void spmm80_8x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
491 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2, 8, 1, 0.8f,
492 xnn_init_f32_minmax_wasmsimd_params);
493 }
494
spmm80_16x1__wasmsimd_x86_x2(benchmark::State & state,const char * net)495 static void spmm80_16x1__wasmsimd_x86_x2(benchmark::State& state, const char* net) {
496 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2, 16, 1, 0.8f,
497 xnn_init_f32_minmax_wasmsimd_params);
498 }
499
spmm80_32x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)500 static void spmm80_32x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
501 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4, 32, 1, 0.8f,
502 xnn_init_f32_minmax_wasmsimd_params);
503 }
504
spmm80_4x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)505 static void spmm80_4x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
506 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4, 4, 1, 0.8f,
507 xnn_init_f32_minmax_wasmsimd_params);
508 }
509
spmm80_8x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)510 static void spmm80_8x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
511 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4, 8, 1, 0.8f,
512 xnn_init_f32_minmax_wasmsimd_params);
513 }
514
spmm80_16x1__wasmsimd_arm_x4(benchmark::State & state,const char * net)515 static void spmm80_16x1__wasmsimd_arm_x4(benchmark::State& state, const char* net) {
516 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4, 16, 1, 0.8f,
517 xnn_init_f32_minmax_wasmsimd_params);
518 }
519
spmm80_32x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)520 static void spmm80_32x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
521 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4, 32, 1, 0.8f,
522 xnn_init_f32_minmax_wasmsimd_params);
523 }
524
spmm80_4x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)525 static void spmm80_4x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
526 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4, 4, 1, 0.8f,
527 xnn_init_f32_minmax_wasmsimd_params);
528 }
529
spmm80_8x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)530 static void spmm80_8x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
531 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4, 8, 1, 0.8f,
532 xnn_init_f32_minmax_wasmsimd_params);
533 }
534
spmm80_16x1__wasmsimd_x86_x4(benchmark::State & state,const char * net)535 static void spmm80_16x1__wasmsimd_x86_x4(benchmark::State& state, const char* net) {
536 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4, 16, 1, 0.8f,
537 xnn_init_f32_minmax_wasmsimd_params);
538 }
spmm80_4x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)539 static void spmm80_4x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
540 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined, 4, 1, 0.8f,
541 xnn_init_f32_minmax_wasmsimd_params);
542 }
543
spmm80_8x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)544 static void spmm80_8x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
545 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined, 8, 1, 0.8f,
546 xnn_init_f32_minmax_wasmsimd_params);
547 }
548
spmm80_16x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)549 static void spmm80_16x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
550 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined, 16, 1, 0.8f,
551 xnn_init_f32_minmax_wasmsimd_params);
552 }
553
spmm80_32x1__wasmsimd_arm_pipelined(benchmark::State & state,const char * net)554 static void spmm80_32x1__wasmsimd_arm_pipelined(benchmark::State& state, const char* net) {
555 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined, 32, 1, 0.8f,
556 xnn_init_f32_minmax_wasmsimd_params);
557 }
558
spmm80_4x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)559 static void spmm80_4x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
560 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined, 4, 1, 0.8f,
561 xnn_init_f32_minmax_wasmsimd_params);
562 }
563
spmm80_8x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)564 static void spmm80_8x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
565 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined, 8, 1, 0.8f,
566 xnn_init_f32_minmax_wasmsimd_params);
567 }
568
spmm80_16x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)569 static void spmm80_16x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
570 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined, 16, 1, 0.8f,
571 xnn_init_f32_minmax_wasmsimd_params);
572 }
573
spmm80_32x1__wasmsimd_x86_pipelined(benchmark::State & state,const char * net)574 static void spmm80_32x1__wasmsimd_x86_pipelined(benchmark::State& state, const char* net) {
575 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined, 32, 1, 0.8f,
576 xnn_init_f32_minmax_wasmsimd_params);
577 }
578
spmm80_32x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)579 static void spmm80_32x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
580 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2, 32, 1, 0.8f,
581 xnn_init_f32_minmax_wasmsimd_params);
582 }
583
spmm80_4x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)584 static void spmm80_4x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
585 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2, 4, 1, 0.8f,
586 xnn_init_f32_minmax_wasmsimd_params);
587 }
588
spmm80_8x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)589 static void spmm80_8x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
590 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2, 8, 1, 0.8f,
591 xnn_init_f32_minmax_wasmsimd_params);
592 }
593
spmm80_16x1__wasmsimd_arm_pipelined_x2(benchmark::State & state,const char * net)594 static void spmm80_16x1__wasmsimd_arm_pipelined_x2(benchmark::State& state, const char* net) {
595 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2, 16, 1, 0.8f,
596 xnn_init_f32_minmax_wasmsimd_params);
597 }
598
spmm80_32x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)599 static void spmm80_32x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
600 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2, 32, 1, 0.8f,
601 xnn_init_f32_minmax_wasmsimd_params);
602 }
603
spmm80_4x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)604 static void spmm80_4x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
605 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2, 4, 1, 0.8f,
606 xnn_init_f32_minmax_wasmsimd_params);
607 }
608
spmm80_8x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)609 static void spmm80_8x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
610 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2, 8, 1, 0.8f,
611 xnn_init_f32_minmax_wasmsimd_params);
612 }
613
spmm80_16x1__wasmsimd_x86_pipelined_x2(benchmark::State & state,const char * net)614 static void spmm80_16x1__wasmsimd_x86_pipelined_x2(benchmark::State& state, const char* net) {
615 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2, 16, 1, 0.8f,
616 xnn_init_f32_minmax_wasmsimd_params);
617 }
618
619 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm)
BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_x2)620 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_x2)
621 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_x4)
622 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86)
623 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_x2)
624 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_x4)
625 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm)
626 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_x2)
627 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_x4)
628 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86)
629 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_x2)
630 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_x4)
631 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm)
632 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_x2)
633 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_x4)
634 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86)
635 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_x2)
636 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_x4)
637 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm)
638 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_x2)
639 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_x4)
640 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86)
641 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_x2)
642 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_x4)
643 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_pipelined)
644 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_arm_pipelined_x2)
645 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_pipelined)
646 BENCHMARK_SPMM(spmm80_4x1__wasmsimd_x86_pipelined_x2)
647 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_pipelined)
648 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_arm_pipelined_x2)
649 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_pipelined)
650 BENCHMARK_SPMM(spmm80_8x1__wasmsimd_x86_pipelined_x2)
651 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_pipelined)
652 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_arm_pipelined_x2)
653 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_pipelined)
654 BENCHMARK_SPMM(spmm80_16x1__wasmsimd_x86_pipelined_x2)
655 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_pipelined)
656 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_arm_pipelined_x2)
657 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_pipelined)
658 BENCHMARK_SPMM(spmm80_32x1__wasmsimd_x86_pipelined_x2)
659 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
660
661
662 static void spmm80_1x1__scalar(benchmark::State& state, const char* net) {
663 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_1x1__scalar, 1, 1, 0.8f,
664 xnn_init_f32_minmax_scalar_params);
665 }
666
spmm80_2x1__scalar(benchmark::State & state,const char * net)667 static void spmm80_2x1__scalar(benchmark::State& state, const char* net) {
668 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_2x1__scalar, 2, 1, 0.8f,
669 xnn_init_f32_minmax_scalar_params);
670 }
671
spmm80_4x1__scalar(benchmark::State & state,const char * net)672 static void spmm80_4x1__scalar(benchmark::State& state, const char* net) {
673 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__scalar, 4, 1, 0.8f,
674 xnn_init_f32_minmax_scalar_params);
675 }
676
spmm80_8x1__scalar(benchmark::State & state,const char * net)677 static void spmm80_8x1__scalar(benchmark::State& state, const char* net) {
678 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__scalar, 8, 1, 0.8f,
679 xnn_init_f32_minmax_scalar_params);
680 }
681
spmm80_8x2__scalar(benchmark::State & state,const char * net)682 static void spmm80_8x2__scalar(benchmark::State& state, const char* net) {
683 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x2__scalar, 8, 2, 0.8f,
684 xnn_init_f32_minmax_scalar_params);
685 }
686
spmm80_8x4__scalar(benchmark::State & state,const char * net)687 static void spmm80_8x4__scalar(benchmark::State& state, const char* net) {
688 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x4__scalar, 8, 4, 0.8f,
689 xnn_init_f32_minmax_scalar_params);
690 }
691
spmm80_1x1__scalar_pipelined(benchmark::State & state,const char * net)692 static void spmm80_1x1__scalar_pipelined(benchmark::State& state, const char* net) {
693 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, 1, 1, 0.8f,
694 xnn_init_f32_minmax_scalar_params);
695 }
696
spmm80_2x1__scalar_pipelined(benchmark::State & state,const char * net)697 static void spmm80_2x1__scalar_pipelined(benchmark::State& state, const char* net) {
698 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, 2, 1, 0.8f,
699 xnn_init_f32_minmax_scalar_params);
700 }
701
spmm80_4x1__scalar_pipelined(benchmark::State & state,const char * net)702 static void spmm80_4x1__scalar_pipelined(benchmark::State& state, const char* net) {
703 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, 4, 1, 0.8f,
704 xnn_init_f32_minmax_scalar_params);
705 }
706
spmm80_8x1__scalar_pipelined(benchmark::State & state,const char * net)707 static void spmm80_8x1__scalar_pipelined(benchmark::State& state, const char* net) {
708 f32_spmm(state, xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, 8, 1, 0.8f,
709 xnn_init_f32_minmax_scalar_params);
710 }
711
712 BENCHMARK_SPMM(spmm80_1x1__scalar)
713 BENCHMARK_SPMM(spmm80_2x1__scalar)
714 BENCHMARK_SPMM(spmm80_4x1__scalar)
715 BENCHMARK_SPMM(spmm80_8x1__scalar)
716 BENCHMARK_SPMM(spmm80_8x2__scalar)
717 BENCHMARK_SPMM(spmm80_8x4__scalar)
718 BENCHMARK_SPMM(spmm80_1x1__scalar_pipelined)
719 BENCHMARK_SPMM(spmm80_2x1__scalar_pipelined)
720 BENCHMARK_SPMM(spmm80_4x1__scalar_pipelined)
721 BENCHMARK_SPMM(spmm80_8x1__scalar_pipelined)
722
723
724 #ifndef XNNPACK_BENCHMARK_NO_MAIN
725 BENCHMARK_MAIN();
726 #endif
727