1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstring>
9 #include <functional>
10 #include <random>
11 #include <vector>
12
13 #include <xnnpack.h>
14
15 #include <benchmark/benchmark.h>
16 #include "bench/end2end.h"
17 #include "bench/utils.h"
18 #include "models/models.h"
19
20 #include <xnnpack.h>
21 #include <xnnpack/dwconv.h>
22 #include <xnnpack/microfnptr.h>
23 #include <xnnpack/microparams-init.h>
24
25
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv_minmax,xnn_f32_dwconv_unipass_ukernel_function dwconv,xnn_init_f32_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)26 static void DWConvEnd2EndBenchmark(
27 benchmark::State& state,
28 models::ExecutionPlanFactory model_factory,
29 xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv_minmax,
30 xnn_f32_dwconv_unipass_ukernel_function dwconv,
31 xnn_init_f32_minmax_params_fn init_params,
32 uint8_t channel_tile, uint8_t primary_tile,
33 benchmark::utils::IsaCheckFunction isa_check = nullptr)
34 {
35 if (isa_check && !isa_check(state)) {
36 return;
37 }
38 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
39 state.SkipWithError("failed to initialize XNNPACK");
40 return;
41 }
42
43 // Override microkernels chosen in xnn_initialize
44 for (size_t i = 0; i < XNN_MAX_F32_DWCONV_UKERNELS; i++) {
45 // Replace only the microkernel with the matching kernel size.
46 if (xnn_params.f32.dwconv[i].primary_tile == primary_tile) {
47 std::memset(&xnn_params.f32.dwconv[i], 0, sizeof(xnn_params.f32.dwconv[i]));
48
49 // Note: do not directly assign to xnn_params.f32.dwconv[i] because it breaks older gcc.
50 xnn_params.f32.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv_minmax);
51 xnn_params.f32.dwconv[i].linear.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
52 xnn_params.f32.dwconv[i].channel_tile = channel_tile;
53 xnn_params.f32.dwconv[i].primary_tile = primary_tile;
54 xnn_params.f32.dwconv[i].incremental_tile = 0;
55 xnn_params.f32.dwconv[i].init.f32 = init_params;
56 break;
57 }
58 }
59
60 auto execution_plan = model_factory(nullptr);
61 if (execution_plan.empty()) {
62 state.SkipWithError("failed to create a model");
63 return;
64 }
65
66 for (auto _ : state) {
67 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
68 xnn_status status = xnn_run_operator(op.get(), nullptr);
69 if (status != xnn_status_success) {
70 state.SkipWithError("failed to run a model");
71 return;
72 }
73 }
74 }
75
76 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
77 if (cpu_frequency != 0) {
78 state.counters["cpufreq"] = cpu_frequency;
79 }
80 }
81
82 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
f32_dwconv_up4x9__aarch64_neonfma(benchmark::State & state,models::ExecutionPlanFactory model)83 static void f32_dwconv_up4x9__aarch64_neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
84 DWConvEnd2EndBenchmark(state, model,
85 xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma,
86 nullptr /* dwconv */,
87 xnn_init_f32_minmax_scalar_params,
88 4 /* channel tile */, 9 /* primary tile */);
89 }
90
f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State & state,models::ExecutionPlanFactory model)91 static void f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
92 DWConvEnd2EndBenchmark(state, model,
93 xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55,
94 nullptr /* dwconv */,
95 xnn_init_f32_minmax_scalar_params,
96 4 /* channel tile */, 9 /* primary tile */);
97 }
98
99 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma);
100 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55);
101 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
102
103 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
f32_dwconv_up4x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)104 static void f32_dwconv_up4x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
105 DWConvEnd2EndBenchmark(state, model,
106 xnn_f32_dwconv_minmax_ukernel_up4x9__neon,
107 nullptr /* dwconv */,
108 xnn_init_f32_minmax_scalar_params,
109 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
110 }
111
f32_dwconv_up4x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)112 static void f32_dwconv_up4x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
113 DWConvEnd2EndBenchmark(state, model,
114 xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2,
115 nullptr /* dwconv */,
116 xnn_init_f32_minmax_scalar_params,
117 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
118 }
119
f32_dwconv_up8x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)120 static void f32_dwconv_up8x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
121 DWConvEnd2EndBenchmark(state, model,
122 xnn_f32_dwconv_minmax_ukernel_up8x9__neon,
123 nullptr /* dwconv */,
124 xnn_init_f32_minmax_scalar_params,
125 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
126 }
127
f32_dwconv_up8x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)128 static void f32_dwconv_up8x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
129 DWConvEnd2EndBenchmark(state, model,
130 xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2,
131 nullptr /* dwconv */,
132 xnn_init_f32_minmax_scalar_params,
133 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
134 }
135
f32_dwconv_up16x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)136 static void f32_dwconv_up16x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
137 DWConvEnd2EndBenchmark(state, model,
138 xnn_f32_dwconv_minmax_ukernel_up16x9__neon,
139 nullptr /* dwconv */,
140 xnn_init_f32_minmax_scalar_params,
141 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
142 }
143
f32_dwconv_up16x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)144 static void f32_dwconv_up16x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
145 DWConvEnd2EndBenchmark(state, model,
146 xnn_f32_dwconv_minmax_ukernel_up16x9__neon_acc2,
147 nullptr /* dwconv */,
148 xnn_init_f32_minmax_scalar_params,
149 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
150 }
151
f32_dwconv_up4x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)152 static void f32_dwconv_up4x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
153 DWConvEnd2EndBenchmark(state, model,
154 xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma,
155 nullptr /* dwconv */,
156 xnn_init_f32_minmax_scalar_params,
157 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
158 }
159
f32_dwconv_up4x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)160 static void f32_dwconv_up4x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
161 DWConvEnd2EndBenchmark(state, model,
162 xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2,
163 nullptr /* dwconv */,
164 xnn_init_f32_minmax_scalar_params,
165 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
166 }
167
f32_dwconv_up8x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)168 static void f32_dwconv_up8x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
169 DWConvEnd2EndBenchmark(state, model,
170 xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma,
171 nullptr /* dwconv */,
172 xnn_init_f32_minmax_scalar_params,
173 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
174 }
175
f32_dwconv_up8x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)176 static void f32_dwconv_up8x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
177 DWConvEnd2EndBenchmark(state, model,
178 xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2,
179 nullptr /* dwconv */,
180 xnn_init_f32_minmax_scalar_params,
181 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
182 }
183
f32_dwconv_up16x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)184 static void f32_dwconv_up16x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
185 DWConvEnd2EndBenchmark(state, model,
186 xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma,
187 nullptr /* dwconv */,
188 xnn_init_f32_minmax_scalar_params,
189 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
190 }
191
f32_dwconv_up16x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)192 static void f32_dwconv_up16x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
193 DWConvEnd2EndBenchmark(state, model,
194 xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma_acc2,
195 nullptr /* dwconv */,
196 xnn_init_f32_minmax_scalar_params,
197 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
198 }
199
200 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma);
201 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma_acc2);
202 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma);
203 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma_acc2);
204 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma);
205 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma_acc2);
206
207 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon);
208 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon_acc2);
209 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon);
210 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon_acc2);
211 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon);
212 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon_acc2);
213 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
214
215
216 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
f32_dwconv_up4x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)217 static void f32_dwconv_up4x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
218 DWConvEnd2EndBenchmark(state, model,
219 xnn_f32_dwconv_minmax_ukernel_up4x9__sse,
220 nullptr /* dwconv */,
221 xnn_init_f32_minmax_sse_params,
222 4 /* channel tile */, 9 /* primary tile */);
223 }
f32_dwconv_up4x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)224 static void f32_dwconv_up4x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
225 DWConvEnd2EndBenchmark(state, model,
226 xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2,
227 nullptr /* dwconv */,
228 xnn_init_f32_minmax_sse_params,
229 4 /* channel tile */, 9 /* primary tile */);
230 }
f32_dwconv_up8x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)231 static void f32_dwconv_up8x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
232 DWConvEnd2EndBenchmark(state, model,
233 xnn_f32_dwconv_minmax_ukernel_up8x9__sse,
234 nullptr /* dwconv */,
235 xnn_init_f32_minmax_sse_params,
236 8 /* channel tile */, 9 /* primary tile */);
237 }
f32_dwconv_up8x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)238 static void f32_dwconv_up8x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
239 DWConvEnd2EndBenchmark(state, model,
240 xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2,
241 nullptr /* dwconv */,
242 xnn_init_f32_minmax_sse_params,
243 8 /* channel tile */, 9 /* primary tile */);
244 }
245
f32_dwconv_up8x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)246 static void f32_dwconv_up8x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
247 DWConvEnd2EndBenchmark(state, model,
248 xnn_f32_dwconv_minmax_ukernel_up8x9__avx,
249 nullptr /* dwconv */,
250 xnn_init_f32_minmax_avx_params,
251 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
252 }
f32_dwconv_up8x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)253 static void f32_dwconv_up8x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
254 DWConvEnd2EndBenchmark(state, model,
255 xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2,
256 nullptr /* dwconv */,
257 xnn_init_f32_minmax_avx_params,
258 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
259 }
f32_dwconv_up16x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)260 static void f32_dwconv_up16x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
261 DWConvEnd2EndBenchmark(state, model,
262 xnn_f32_dwconv_minmax_ukernel_up16x9__avx,
263 nullptr /* dwconv */,
264 xnn_init_f32_minmax_avx_params,
265 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
266 }
f32_dwconv_up16x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)267 static void f32_dwconv_up16x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
268 DWConvEnd2EndBenchmark(state, model,
269 xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2,
270 nullptr /* dwconv */,
271 xnn_init_f32_minmax_avx_params,
272 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
273 }
274
f32_dwconv_up8x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)275 static void f32_dwconv_up8x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
276 DWConvEnd2EndBenchmark(state, model,
277 xnn_f32_dwconv_minmax_ukernel_up8x9__fma3,
278 nullptr /* dwconv */,
279 xnn_init_f32_minmax_avx_params,
280 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
281 }
f32_dwconv_up8x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)282 static void f32_dwconv_up8x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
283 DWConvEnd2EndBenchmark(state, model,
284 xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2,
285 nullptr /* dwconv */,
286 xnn_init_f32_minmax_avx_params,
287 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
288 }
f32_dwconv_up16x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)289 static void f32_dwconv_up16x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
290 DWConvEnd2EndBenchmark(state, model,
291 xnn_f32_dwconv_minmax_ukernel_up16x9__fma3,
292 nullptr /* dwconv */,
293 xnn_init_f32_minmax_avx_params,
294 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
295 }
f32_dwconv_up16x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)296 static void f32_dwconv_up16x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
297 DWConvEnd2EndBenchmark(state, model,
298 xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2,
299 nullptr /* dwconv */,
300 xnn_init_f32_minmax_avx_params,
301 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
302 }
303
f32_dwconv_up16x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)304 static void f32_dwconv_up16x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
305 DWConvEnd2EndBenchmark(state, model,
306 xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f,
307 nullptr /* dwconv */,
308 xnn_init_f32_minmax_scalar_params,
309 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
310 }
f32_dwconv_up16x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)311 static void f32_dwconv_up16x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
312 DWConvEnd2EndBenchmark(state, model,
313 xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2,
314 nullptr /* dwconv */,
315 xnn_init_f32_minmax_scalar_params,
316 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
317 }
f32_dwconv_up32x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)318 static void f32_dwconv_up32x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
319 DWConvEnd2EndBenchmark(state, model,
320 xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f,
321 nullptr /* dwconv */,
322 xnn_init_f32_minmax_scalar_params,
323 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
324 }
f32_dwconv_up32x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)325 static void f32_dwconv_up32x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
326 DWConvEnd2EndBenchmark(state, model,
327 xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2,
328 nullptr /* dwconv */,
329 xnn_init_f32_minmax_scalar_params,
330 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
331 }
332
333 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f);
334 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f_acc2);
335 BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f);
336 BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f_acc2);
337
338 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3);
339 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3_acc2);
340 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3);
341 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3_acc2);
342
343 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx);
344 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx_acc2);
345 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx);
346 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx_acc2);
347
348 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse);
349 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse_acc2);
350 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse);
351 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse_acc2);
352 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
353
354 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
f32_dwconv_up4x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)355 static void f32_dwconv_up4x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
356 DWConvEnd2EndBenchmark(state, model,
357 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm,
358 xnn_f32_dwconv_ukernel_up4x9__wasmsimd,
359 xnn_init_f32_minmax_scalar_params,
360 4 /* channel tile */, 9 /* primary tile */);
361 }
362
f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)363 static void f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
364 DWConvEnd2EndBenchmark(state, model,
365 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2,
366 xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2,
367 xnn_init_f32_minmax_scalar_params,
368 4 /* channel tile */, 9 /* primary tile */);
369 }
370
f32_dwconv_up8x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)371 static void f32_dwconv_up8x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
372 DWConvEnd2EndBenchmark(state, model,
373 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm,
374 xnn_f32_dwconv_ukernel_up8x9__wasmsimd,
375 xnn_init_f32_minmax_scalar_params,
376 8 /* channel tile */, 9 /* primary tile */);
377 }
378
f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)379 static void f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
380 DWConvEnd2EndBenchmark(state, model,
381 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2,
382 xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2,
383 xnn_init_f32_minmax_scalar_params,
384 8 /* channel tile */, 9 /* primary tile */);
385 }
386
f32_dwconv_up4x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)387 static void f32_dwconv_up4x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
388 DWConvEnd2EndBenchmark(state, model,
389 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86,
390 xnn_f32_dwconv_ukernel_up4x9__wasmsimd,
391 xnn_init_f32_minmax_scalar_params,
392 4 /* channel tile */, 9 /* primary tile */);
393 }
394
f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)395 static void f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
396 DWConvEnd2EndBenchmark(state, model,
397 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2,
398 xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2,
399 xnn_init_f32_minmax_scalar_params,
400 4 /* channel tile */, 9 /* primary tile */);
401 }
402
f32_dwconv_up8x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)403 static void f32_dwconv_up8x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
404 DWConvEnd2EndBenchmark(state, model,
405 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86,
406 xnn_f32_dwconv_ukernel_up8x9__wasmsimd,
407 xnn_init_f32_minmax_scalar_params,
408 8 /* channel tile */, 9 /* primary tile */);
409 }
410
f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)411 static void f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
412 DWConvEnd2EndBenchmark(state, model,
413 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2,
414 xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2,
415 xnn_init_f32_minmax_scalar_params,
416 8 /* channel tile */, 9 /* primary tile */);
417 }
418
419 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm);
420 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm_acc2);
421 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm);
422 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm_acc2);
423
424 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86);
425 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86_acc2);
426 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86);
427 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86_acc2);
428 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
429
f32_dwconv_up1x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)430 static void f32_dwconv_up1x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
431 DWConvEnd2EndBenchmark(state, model,
432 xnn_f32_dwconv_minmax_ukernel_up1x9__scalar,
433 xnn_f32_dwconv_ukernel_up1x9__scalar,
434 xnn_init_f32_minmax_scalar_params,
435 1 /* channel tile */, 9 /* primary tile */);
436 }
437
f32_dwconv_up1x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)438 static void f32_dwconv_up1x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
439 DWConvEnd2EndBenchmark(state, model,
440 xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2,
441 xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
442 xnn_init_f32_minmax_scalar_params,
443 1 /* channel tile */, 9 /* primary tile */);
444 }
445
f32_dwconv_up2x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)446 static void f32_dwconv_up2x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
447 DWConvEnd2EndBenchmark(state, model,
448 xnn_f32_dwconv_minmax_ukernel_up2x9__scalar,
449 xnn_f32_dwconv_ukernel_up2x9__scalar,
450 xnn_init_f32_minmax_scalar_params,
451 2 /* channel tile */, 9 /* primary tile */);
452 }
453
f32_dwconv_up2x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)454 static void f32_dwconv_up2x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
455 DWConvEnd2EndBenchmark(state, model,
456 xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2,
457 xnn_f32_dwconv_ukernel_up2x9__scalar_acc2,
458 xnn_init_f32_minmax_scalar_params,
459 2 /* channel tile */, 9 /* primary tile */);
460 }
461
462 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar);
463 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar_acc2);
464 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar);
465 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar_acc2);
466
467 #ifndef XNNPACK_BENCHMARK_NO_MAIN
468 BENCHMARK_MAIN();
469 #endif
470