xref: /aosp_15_r20/external/XNNPACK/bench/f32-dwconv-e2e.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstring>
9 #include <functional>
10 #include <random>
11 #include <vector>
12 
13 #include <xnnpack.h>
14 
15 #include <benchmark/benchmark.h>
16 #include "bench/end2end.h"
17 #include "bench/utils.h"
18 #include "models/models.h"
19 
20 #include <xnnpack.h>
21 #include <xnnpack/dwconv.h>
22 #include <xnnpack/microfnptr.h>
23 #include <xnnpack/microparams-init.h>
24 
25 
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv_minmax,xnn_f32_dwconv_unipass_ukernel_function dwconv,xnn_init_f32_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)26 static void DWConvEnd2EndBenchmark(
27   benchmark::State& state,
28   models::ExecutionPlanFactory model_factory,
29   xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv_minmax,
30   xnn_f32_dwconv_unipass_ukernel_function dwconv,
31   xnn_init_f32_minmax_params_fn init_params,
32   uint8_t channel_tile, uint8_t primary_tile,
33   benchmark::utils::IsaCheckFunction isa_check = nullptr)
34 {
35   if (isa_check && !isa_check(state)) {
36     return;
37   }
38   if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
39     state.SkipWithError("failed to initialize XNNPACK");
40     return;
41   }
42 
43   // Override microkernels chosen in xnn_initialize
44   for (size_t i = 0; i < XNN_MAX_F32_DWCONV_UKERNELS; i++) {
45     // Replace only the microkernel with the matching kernel size.
46     if (xnn_params.f32.dwconv[i].primary_tile == primary_tile) {
47       std::memset(&xnn_params.f32.dwconv[i], 0, sizeof(xnn_params.f32.dwconv[i]));
48 
49       // Note: do not directly assign to xnn_params.f32.dwconv[i] because it breaks older gcc.
50       xnn_params.f32.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv_minmax);
51       xnn_params.f32.dwconv[i].linear.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
52       xnn_params.f32.dwconv[i].channel_tile = channel_tile;
53       xnn_params.f32.dwconv[i].primary_tile = primary_tile;
54       xnn_params.f32.dwconv[i].incremental_tile = 0;
55       xnn_params.f32.dwconv[i].init.f32 = init_params;
56       break;
57     }
58   }
59 
60   auto execution_plan = model_factory(nullptr);
61   if (execution_plan.empty()) {
62     state.SkipWithError("failed to create a model");
63     return;
64   }
65 
66   for (auto _ : state) {
67     for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
68       xnn_status status = xnn_run_operator(op.get(), nullptr);
69       if (status != xnn_status_success) {
70         state.SkipWithError("failed to run a model");
71         return;
72       }
73     }
74   }
75 
76   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
77   if (cpu_frequency != 0) {
78     state.counters["cpufreq"] = cpu_frequency;
79   }
80 }
81 
82 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
f32_dwconv_up4x9__aarch64_neonfma(benchmark::State & state,models::ExecutionPlanFactory model)83   static void f32_dwconv_up4x9__aarch64_neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
84     DWConvEnd2EndBenchmark(state, model,
85       xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma,
86       nullptr /* dwconv */,
87       xnn_init_f32_minmax_scalar_params,
88       4 /* channel tile */, 9 /* primary tile */);
89   }
90 
f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State & state,models::ExecutionPlanFactory model)91   static void f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
92     DWConvEnd2EndBenchmark(state, model,
93       xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55,
94       nullptr /* dwconv */,
95       xnn_init_f32_minmax_scalar_params,
96       4 /* channel tile */, 9 /* primary tile */);
97   }
98 
99   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma);
100   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55);
101 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
102 
103 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
f32_dwconv_up4x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)104   static void f32_dwconv_up4x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
105     DWConvEnd2EndBenchmark(state, model,
106       xnn_f32_dwconv_minmax_ukernel_up4x9__neon,
107       nullptr /* dwconv */,
108       xnn_init_f32_minmax_scalar_params,
109       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
110   }
111 
f32_dwconv_up4x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)112   static void f32_dwconv_up4x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
113     DWConvEnd2EndBenchmark(state, model,
114       xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2,
115       nullptr /* dwconv */,
116       xnn_init_f32_minmax_scalar_params,
117       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
118   }
119 
f32_dwconv_up8x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)120   static void f32_dwconv_up8x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
121     DWConvEnd2EndBenchmark(state, model,
122       xnn_f32_dwconv_minmax_ukernel_up8x9__neon,
123       nullptr /* dwconv */,
124       xnn_init_f32_minmax_scalar_params,
125       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
126   }
127 
f32_dwconv_up8x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)128   static void f32_dwconv_up8x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
129     DWConvEnd2EndBenchmark(state, model,
130       xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2,
131       nullptr /* dwconv */,
132       xnn_init_f32_minmax_scalar_params,
133       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
134   }
135 
f32_dwconv_up16x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)136   static void f32_dwconv_up16x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
137     DWConvEnd2EndBenchmark(state, model,
138       xnn_f32_dwconv_minmax_ukernel_up16x9__neon,
139       nullptr /* dwconv */,
140       xnn_init_f32_minmax_scalar_params,
141       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
142   }
143 
f32_dwconv_up16x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)144   static void f32_dwconv_up16x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
145     DWConvEnd2EndBenchmark(state, model,
146       xnn_f32_dwconv_minmax_ukernel_up16x9__neon_acc2,
147       nullptr /* dwconv */,
148       xnn_init_f32_minmax_scalar_params,
149       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
150   }
151 
f32_dwconv_up4x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)152   static void f32_dwconv_up4x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
153     DWConvEnd2EndBenchmark(state, model,
154       xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma,
155       nullptr /* dwconv */,
156       xnn_init_f32_minmax_scalar_params,
157       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
158   }
159 
f32_dwconv_up4x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)160   static void f32_dwconv_up4x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
161     DWConvEnd2EndBenchmark(state, model,
162       xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2,
163       nullptr /* dwconv */,
164       xnn_init_f32_minmax_scalar_params,
165       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
166   }
167 
f32_dwconv_up8x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)168   static void f32_dwconv_up8x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
169     DWConvEnd2EndBenchmark(state, model,
170       xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma,
171       nullptr /* dwconv */,
172       xnn_init_f32_minmax_scalar_params,
173       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
174   }
175 
f32_dwconv_up8x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)176   static void f32_dwconv_up8x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
177     DWConvEnd2EndBenchmark(state, model,
178       xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2,
179       nullptr /* dwconv */,
180       xnn_init_f32_minmax_scalar_params,
181       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
182   }
183 
f32_dwconv_up16x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)184   static void f32_dwconv_up16x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
185     DWConvEnd2EndBenchmark(state, model,
186       xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma,
187       nullptr /* dwconv */,
188       xnn_init_f32_minmax_scalar_params,
189       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
190   }
191 
f32_dwconv_up16x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)192   static void f32_dwconv_up16x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
193     DWConvEnd2EndBenchmark(state, model,
194       xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma_acc2,
195       nullptr /* dwconv */,
196       xnn_init_f32_minmax_scalar_params,
197       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
198   }
199 
200   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma);
201   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma_acc2);
202   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma);
203   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma_acc2);
204   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma);
205   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma_acc2);
206 
207   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon);
208   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon_acc2);
209   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon);
210   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon_acc2);
211   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon);
212   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon_acc2);
213 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
214 
215 
216 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
f32_dwconv_up4x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)217   static void f32_dwconv_up4x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
218     DWConvEnd2EndBenchmark(state, model,
219       xnn_f32_dwconv_minmax_ukernel_up4x9__sse,
220       nullptr /* dwconv */,
221       xnn_init_f32_minmax_sse_params,
222       4 /* channel tile */, 9 /* primary tile */);
223   }
f32_dwconv_up4x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)224   static void f32_dwconv_up4x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
225     DWConvEnd2EndBenchmark(state, model,
226       xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2,
227       nullptr /* dwconv */,
228       xnn_init_f32_minmax_sse_params,
229       4 /* channel tile */, 9 /* primary tile */);
230   }
f32_dwconv_up8x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)231   static void f32_dwconv_up8x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
232     DWConvEnd2EndBenchmark(state, model,
233       xnn_f32_dwconv_minmax_ukernel_up8x9__sse,
234       nullptr /* dwconv */,
235       xnn_init_f32_minmax_sse_params,
236       8 /* channel tile */, 9 /* primary tile */);
237   }
f32_dwconv_up8x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)238   static void f32_dwconv_up8x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
239     DWConvEnd2EndBenchmark(state, model,
240       xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2,
241       nullptr /* dwconv */,
242       xnn_init_f32_minmax_sse_params,
243       8 /* channel tile */, 9 /* primary tile */);
244   }
245 
f32_dwconv_up8x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)246   static void f32_dwconv_up8x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
247     DWConvEnd2EndBenchmark(state, model,
248       xnn_f32_dwconv_minmax_ukernel_up8x9__avx,
249       nullptr /* dwconv */,
250       xnn_init_f32_minmax_avx_params,
251       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
252   }
f32_dwconv_up8x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)253   static void f32_dwconv_up8x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
254     DWConvEnd2EndBenchmark(state, model,
255       xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2,
256       nullptr /* dwconv */,
257       xnn_init_f32_minmax_avx_params,
258       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
259   }
f32_dwconv_up16x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)260   static void f32_dwconv_up16x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
261     DWConvEnd2EndBenchmark(state, model,
262       xnn_f32_dwconv_minmax_ukernel_up16x9__avx,
263       nullptr /* dwconv */,
264       xnn_init_f32_minmax_avx_params,
265       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
266   }
f32_dwconv_up16x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)267   static void f32_dwconv_up16x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
268     DWConvEnd2EndBenchmark(state, model,
269       xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2,
270       nullptr /* dwconv */,
271       xnn_init_f32_minmax_avx_params,
272       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
273   }
274 
f32_dwconv_up8x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)275   static void f32_dwconv_up8x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
276     DWConvEnd2EndBenchmark(state, model,
277       xnn_f32_dwconv_minmax_ukernel_up8x9__fma3,
278       nullptr /* dwconv */,
279       xnn_init_f32_minmax_avx_params,
280       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
281   }
f32_dwconv_up8x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)282   static void f32_dwconv_up8x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
283     DWConvEnd2EndBenchmark(state, model,
284       xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2,
285       nullptr /* dwconv */,
286       xnn_init_f32_minmax_avx_params,
287       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
288   }
f32_dwconv_up16x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)289   static void f32_dwconv_up16x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
290     DWConvEnd2EndBenchmark(state, model,
291       xnn_f32_dwconv_minmax_ukernel_up16x9__fma3,
292       nullptr /* dwconv */,
293       xnn_init_f32_minmax_avx_params,
294       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
295   }
f32_dwconv_up16x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)296   static void f32_dwconv_up16x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
297     DWConvEnd2EndBenchmark(state, model,
298       xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2,
299       nullptr /* dwconv */,
300       xnn_init_f32_minmax_avx_params,
301       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
302   }
303 
f32_dwconv_up16x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)304   static void f32_dwconv_up16x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
305     DWConvEnd2EndBenchmark(state, model,
306       xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f,
307       nullptr /* dwconv */,
308       xnn_init_f32_minmax_scalar_params,
309       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
310   }
f32_dwconv_up16x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)311   static void f32_dwconv_up16x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
312     DWConvEnd2EndBenchmark(state, model,
313       xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2,
314       nullptr /* dwconv */,
315       xnn_init_f32_minmax_scalar_params,
316       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
317   }
f32_dwconv_up32x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)318   static void f32_dwconv_up32x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
319     DWConvEnd2EndBenchmark(state, model,
320       xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f,
321       nullptr /* dwconv */,
322       xnn_init_f32_minmax_scalar_params,
323       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
324   }
f32_dwconv_up32x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)325   static void f32_dwconv_up32x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
326     DWConvEnd2EndBenchmark(state, model,
327       xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2,
328       nullptr /* dwconv */,
329       xnn_init_f32_minmax_scalar_params,
330       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
331   }
332 
333   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f);
334   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f_acc2);
335   BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f);
336   BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f_acc2);
337 
338   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3);
339   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3_acc2);
340   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3);
341   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3_acc2);
342 
343   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx);
344   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx_acc2);
345   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx);
346   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx_acc2);
347 
348   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse);
349   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse_acc2);
350   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse);
351   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse_acc2);
352 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
353 
354 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
f32_dwconv_up4x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)355   static void f32_dwconv_up4x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
356     DWConvEnd2EndBenchmark(state, model,
357       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm,
358       xnn_f32_dwconv_ukernel_up4x9__wasmsimd,
359       xnn_init_f32_minmax_scalar_params,
360       4 /* channel tile */, 9 /* primary tile */);
361   }
362 
f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)363   static void f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
364     DWConvEnd2EndBenchmark(state, model,
365       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2,
366       xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2,
367       xnn_init_f32_minmax_scalar_params,
368       4 /* channel tile */, 9 /* primary tile */);
369   }
370 
f32_dwconv_up8x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)371   static void f32_dwconv_up8x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
372     DWConvEnd2EndBenchmark(state, model,
373       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm,
374       xnn_f32_dwconv_ukernel_up8x9__wasmsimd,
375       xnn_init_f32_minmax_scalar_params,
376       8 /* channel tile */, 9 /* primary tile */);
377   }
378 
f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)379   static void f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
380     DWConvEnd2EndBenchmark(state, model,
381       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2,
382       xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2,
383       xnn_init_f32_minmax_scalar_params,
384       8 /* channel tile */, 9 /* primary tile */);
385   }
386 
f32_dwconv_up4x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)387   static void f32_dwconv_up4x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
388     DWConvEnd2EndBenchmark(state, model,
389       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86,
390       xnn_f32_dwconv_ukernel_up4x9__wasmsimd,
391       xnn_init_f32_minmax_scalar_params,
392       4 /* channel tile */, 9 /* primary tile */);
393   }
394 
f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)395   static void f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
396     DWConvEnd2EndBenchmark(state, model,
397       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2,
398       xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2,
399       xnn_init_f32_minmax_scalar_params,
400       4 /* channel tile */, 9 /* primary tile */);
401   }
402 
f32_dwconv_up8x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)403   static void f32_dwconv_up8x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
404     DWConvEnd2EndBenchmark(state, model,
405       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86,
406       xnn_f32_dwconv_ukernel_up8x9__wasmsimd,
407       xnn_init_f32_minmax_scalar_params,
408       8 /* channel tile */, 9 /* primary tile */);
409   }
410 
f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)411   static void f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
412     DWConvEnd2EndBenchmark(state, model,
413       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2,
414       xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2,
415       xnn_init_f32_minmax_scalar_params,
416       8 /* channel tile */, 9 /* primary tile */);
417   }
418 
419   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm);
420   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm_acc2);
421   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm);
422   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm_acc2);
423 
424   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86);
425   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86_acc2);
426   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86);
427   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86_acc2);
428 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
429 
f32_dwconv_up1x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)430 static void f32_dwconv_up1x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
431   DWConvEnd2EndBenchmark(state, model,
432     xnn_f32_dwconv_minmax_ukernel_up1x9__scalar,
433     xnn_f32_dwconv_ukernel_up1x9__scalar,
434     xnn_init_f32_minmax_scalar_params,
435     1 /* channel tile */, 9 /* primary tile */);
436 }
437 
f32_dwconv_up1x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)438 static void f32_dwconv_up1x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
439   DWConvEnd2EndBenchmark(state, model,
440     xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2,
441     xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
442     xnn_init_f32_minmax_scalar_params,
443     1 /* channel tile */, 9 /* primary tile */);
444 }
445 
f32_dwconv_up2x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)446 static void f32_dwconv_up2x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
447   DWConvEnd2EndBenchmark(state, model,
448     xnn_f32_dwconv_minmax_ukernel_up2x9__scalar,
449     xnn_f32_dwconv_ukernel_up2x9__scalar,
450     xnn_init_f32_minmax_scalar_params,
451     2 /* channel tile */, 9 /* primary tile */);
452 }
453 
f32_dwconv_up2x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)454 static void f32_dwconv_up2x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
455   DWConvEnd2EndBenchmark(state, model,
456     xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2,
457     xnn_f32_dwconv_ukernel_up2x9__scalar_acc2,
458     xnn_init_f32_minmax_scalar_params,
459     2 /* channel tile */, 9 /* primary tile */);
460 }
461 
462 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar);
463 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar_acc2);
464 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar);
465 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar_acc2);
466 
467 #ifndef XNNPACK_BENCHMARK_NO_MAIN
468 BENCHMARK_MAIN();
469 #endif
470