xref: /aosp_15_r20/external/XNNPACK/bench/qu8-dwconv-e2e.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <xnnpack.h>
13 
14 #include <benchmark/benchmark.h>
15 #include "bench/end2end.h"
16 #include "bench/utils.h"
17 #include "models/models.h"
18 
19 #include <xnnpack.h>
20 #include <xnnpack/dwconv.h>
21 #include <xnnpack/microfnptr.h>
22 #include <xnnpack/microparams-init.h>
23 
24 
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_qu8_conv_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)25 static void DWConvEnd2EndBenchmark(
26   benchmark::State& state,
27   models::ExecutionPlanFactory model_factory,
28   xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,
29   xnn_init_qu8_conv_minmax_params_fn init_params,
30   uint8_t channel_tile, uint8_t primary_tile,
31   benchmark::utils::IsaCheckFunction isa_check = nullptr)
32 {
33   if (isa_check && !isa_check(state)) {
34     return;
35   }
36   if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
37     state.SkipWithError("failed to initialize XNNPACK");
38     return;
39   }
40 
41   // Override microkernels chosen in xnn_initialize
42   for (size_t i = 0; i < XNN_MAX_QU8_DWCONV_UKERNELS; i++) {
43     // Replace only the microkernel the matching kernel size.
44     if (xnn_params.qu8.dwconv[i].primary_tile == primary_tile) {
45       // Note: do not directly assign to xnn_params.qu8.dwconv[i] because it breaks older gcc.
46       xnn_params.qu8.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
47       xnn_params.qu8.dwconv[i].channel_tile = channel_tile;
48       xnn_params.qu8.dwconv[i].primary_tile = primary_tile;
49       xnn_params.qu8.dwconv[i].incremental_tile = 0;
50       xnn_params.qu8.dwconv[i].init.qu8 = init_params;
51       break;
52     }
53   }
54 
55   auto execution_plan = model_factory(nullptr);
56   if (execution_plan.empty()) {
57     state.SkipWithError("failed to create a model");
58     return;
59   }
60 
61   for (auto _ : state) {
62     for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
63       xnn_status status = xnn_run_operator(op.get(), nullptr);
64       if (status != xnn_status_success) {
65         state.SkipWithError("failed to run a model");
66         return;
67       }
68     }
69   }
70 
71   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
72   if (cpu_frequency != 0) {
73     state.counters["cpufreq"] = cpu_frequency;
74   }
75 }
76 
77 
78 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
qu8_dwconv_up8x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)79   static void qu8_dwconv_up8x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
80     DWConvEnd2EndBenchmark(state, model,
81       xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8,
82       xnn_init_qu8_conv_minmax_rndnu_neon_params,
83       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
84   }
qu8_dwconv_up16x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)85   static void qu8_dwconv_up16x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
86     DWConvEnd2EndBenchmark(state, model,
87       xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8,
88       xnn_init_qu8_conv_minmax_rndnu_neon_params,
89       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
90   }
qu8_dwconv_up24x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)91   static void qu8_dwconv_up24x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
92     DWConvEnd2EndBenchmark(state, model,
93       xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8,
94       xnn_init_qu8_conv_minmax_rndnu_neon_params,
95       24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
96   }
qu8_dwconv_up32x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)97   static void qu8_dwconv_up32x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
98     DWConvEnd2EndBenchmark(state, model,
99       xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8,
100       xnn_init_qu8_conv_minmax_rndnu_neon_params,
101       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
102   }
qu8_dwconv_up8x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)103   static void qu8_dwconv_up8x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
104     DWConvEnd2EndBenchmark(state, model,
105       xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16,
106       xnn_init_qu8_conv_minmax_rndnu_neon_params,
107       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
108   }
qu8_dwconv_up16x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)109   static void qu8_dwconv_up16x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
110     DWConvEnd2EndBenchmark(state, model,
111       xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16,
112       xnn_init_qu8_conv_minmax_rndnu_neon_params,
113       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
114   }
qu8_dwconv_up24x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)115   static void qu8_dwconv_up24x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
116     DWConvEnd2EndBenchmark(state, model,
117       xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16,
118       xnn_init_qu8_conv_minmax_rndnu_neon_params,
119       24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
120   }
qu8_dwconv_up32x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)121   static void qu8_dwconv_up32x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
122     DWConvEnd2EndBenchmark(state, model,
123       xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16,
124       xnn_init_qu8_conv_minmax_rndnu_neon_params,
125       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
126   }
127 
128   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul8);
129   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul8);
130   BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul8);
131   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul8);
132   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul16);
133   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul16);
134   BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul16);
135   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul16);
136 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
137 
138 
139 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)140   static void qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
141     DWConvEnd2EndBenchmark(state, model,
142       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32,
143       xnn_init_qu8_conv_minmax_fp32_avx512_params,
144       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
145   }
qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)146   static void qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
147     DWConvEnd2EndBenchmark(state, model,
148       xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32,
149       xnn_init_qu8_conv_minmax_fp32_avx512_params,
150       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
151   }
qu8_dwconv_up8x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)152   static void qu8_dwconv_up8x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
153     DWConvEnd2EndBenchmark(state, model,
154       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32,
155       xnn_init_qu8_conv_minmax_fp32_avx2_params,
156       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
157   }
qu8_dwconv_up16x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)158   static void qu8_dwconv_up16x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
159     DWConvEnd2EndBenchmark(state, model,
160       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32,
161       xnn_init_qu8_conv_minmax_fp32_avx2_params,
162       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
163   }
qu8_dwconv_up32x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)164   static void qu8_dwconv_up32x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
165     DWConvEnd2EndBenchmark(state, model,
166       xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32,
167       xnn_init_qu8_conv_minmax_fp32_avx2_params,
168       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
169   }
qu8_dwconv_up8x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)170   static void qu8_dwconv_up8x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
171     DWConvEnd2EndBenchmark(state, model,
172       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16,
173       xnn_init_qu8_conv_minmax_fp32_sse2_params,
174       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
175   }
qu8_dwconv_up16x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)176   static void qu8_dwconv_up16x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
177     DWConvEnd2EndBenchmark(state, model,
178       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16,
179       xnn_init_qu8_conv_minmax_fp32_sse2_params,
180       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
181   }
qu8_dwconv_up8x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)182   static void qu8_dwconv_up8x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
183     DWConvEnd2EndBenchmark(state, model,
184       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32,
185       xnn_init_qu8_conv_minmax_fp32_sse2_params,
186       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
187   }
qu8_dwconv_up16x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)188   static void qu8_dwconv_up16x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
189     DWConvEnd2EndBenchmark(state, model,
190       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32,
191       xnn_init_qu8_conv_minmax_fp32_sse2_params,
192       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
193   }
qu8_dwconv_up8x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)194   static void qu8_dwconv_up8x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
195     DWConvEnd2EndBenchmark(state, model,
196       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16,
197       xnn_init_qu8_conv_minmax_fp32_sse2_params,
198       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
199   }
qu8_dwconv_up16x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)200   static void qu8_dwconv_up16x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
201     DWConvEnd2EndBenchmark(state, model,
202       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16,
203       xnn_init_qu8_conv_minmax_fp32_sse2_params,
204       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
205   }
qu8_dwconv_up8x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)206   static void qu8_dwconv_up8x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
207     DWConvEnd2EndBenchmark(state, model,
208       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32,
209       xnn_init_qu8_conv_minmax_fp32_sse2_params,
210       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
211   }
qu8_dwconv_up16x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)212   static void qu8_dwconv_up16x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
213     DWConvEnd2EndBenchmark(state, model,
214       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32,
215       xnn_init_qu8_conv_minmax_fp32_sse2_params,
216       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
217   }
qu8_dwconv_up8x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)218   static void qu8_dwconv_up8x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
219     DWConvEnd2EndBenchmark(state, model,
220       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16,
221       xnn_init_qu8_conv_minmax_fp32_sse2_params,
222       8 /* channel tile */, 9 /* primary tile */);
223   }
qu8_dwconv_up16x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)224   static void qu8_dwconv_up16x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
225     DWConvEnd2EndBenchmark(state, model,
226       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16,
227       xnn_init_qu8_conv_minmax_fp32_sse2_params,
228       16 /* channel tile */, 9 /* primary tile */);
229   }
230 
231   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx512skx_mul32);
232   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx512skx_mul32);
233 
234   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx2_mul32);
235   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx2_mul32);
236   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx2_mul32);
237 
238   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul16);
239   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul16);
240   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul32);
241   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul32);
242 
243   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul16);
244   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul16);
245   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul32);
246   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul32);
247 
248   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse2_mul16);
249   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse2_mul16);
250 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
251 
252 
253 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)254   static void qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
255     DWConvEnd2EndBenchmark(state, model,
256       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16,
257       xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
258       8 /* channel tile */, 9 /* primary tile */);
259   }
qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)260   static void qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
261     DWConvEnd2EndBenchmark(state, model,
262       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16,
263       xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
264       16 /* channel tile */, 9 /* primary tile */);
265   }
266 
267   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__wasmsimd_mul16);
268   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__wasmsimd_mul16);
269 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
270 
271 
272 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up1x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)273   static void qu8_dwconv_up1x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
274     DWConvEnd2EndBenchmark(state, model,
275       xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic,
276       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
277       1 /* channel tile */, 9 /* primary tile */);
278   }
qu8_dwconv_up2x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)279   static void qu8_dwconv_up2x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
280     DWConvEnd2EndBenchmark(state, model,
281       xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic,
282       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
283       2 /* channel tile */, 9 /* primary tile */);
284   }
qu8_dwconv_up4x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)285   static void qu8_dwconv_up4x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
286     DWConvEnd2EndBenchmark(state, model,
287       xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic,
288       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
289       4 /* channel tile */, 9 /* primary tile */);
290   }
291 
292   BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__wasm_fmagic);
293   BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__wasm_fmagic);
294   BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__wasm_fmagic);
295 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
296 
297 
qu8_dwconv_up1x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)298 static void qu8_dwconv_up1x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
299   DWConvEnd2EndBenchmark(state, model,
300     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic,
301     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
302     1 /* channel tile */, 9 /* primary tile */);
303 }
qu8_dwconv_up2x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)304 static void qu8_dwconv_up2x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
305   DWConvEnd2EndBenchmark(state, model,
306     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic,
307     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
308     2 /* channel tile */, 9 /* primary tile */);
309 }
qu8_dwconv_up4x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)310 static void qu8_dwconv_up4x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
311   DWConvEnd2EndBenchmark(state, model,
312     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic,
313     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
314     4 /* channel tile */, 9 /* primary tile */);
315 }
316 
qu8_dwconv_up1x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)317 static void qu8_dwconv_up1x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
318   DWConvEnd2EndBenchmark(state, model,
319     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic,
320     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
321     1 /* channel tile */, 9 /* primary tile */);
322 }
qu8_dwconv_up2x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)323 static void qu8_dwconv_up2x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
324   DWConvEnd2EndBenchmark(state, model,
325     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic,
326     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
327     2 /* channel tile */, 9 /* primary tile */);
328 }
qu8_dwconv_up4x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)329 static void qu8_dwconv_up4x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
330   DWConvEnd2EndBenchmark(state, model,
331     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic,
332     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
333     4 /* channel tile */, 9 /* primary tile */);
334 }
335 
qu8_dwconv_up1x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)336 static void qu8_dwconv_up1x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
337   DWConvEnd2EndBenchmark(state, model,
338     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf,
339     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
340     1 /* channel tile */, 9 /* primary tile */);
341 }
qu8_dwconv_up2x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)342 static void qu8_dwconv_up2x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
343   DWConvEnd2EndBenchmark(state, model,
344     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf,
345     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
346     2 /* channel tile */, 9 /* primary tile */);
347 }
qu8_dwconv_up4x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)348 static void qu8_dwconv_up4x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
349   DWConvEnd2EndBenchmark(state, model,
350     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf,
351     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
352     4 /* channel tile */, 9 /* primary tile */);
353 }
354 
355 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_fmagic);
356 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_fmagic);
357 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_fmagic);
358 
359 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_imagic);
360 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_imagic);
361 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_imagic);
362 
363 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_lrintf);
364 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_lrintf);
365 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_lrintf);
366 
367 
368 #ifndef XNNPACK_BENCHMARK_NO_MAIN
369 BENCHMARK_MAIN();
370 #endif
371