xref: /aosp_15_r20/external/XNNPACK/bench/qs8-dwconv-e2e.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2021 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <algorithm>
7*4bdc9457SAndroid Build Coastguard Worker #include <cmath>
8*4bdc9457SAndroid Build Coastguard Worker #include <functional>
9*4bdc9457SAndroid Build Coastguard Worker #include <random>
10*4bdc9457SAndroid Build Coastguard Worker #include <vector>
11*4bdc9457SAndroid Build Coastguard Worker 
12*4bdc9457SAndroid Build Coastguard Worker #include <benchmark/benchmark.h>
13*4bdc9457SAndroid Build Coastguard Worker #include "bench/end2end.h"
14*4bdc9457SAndroid Build Coastguard Worker #include "bench/utils.h"
15*4bdc9457SAndroid Build Coastguard Worker #include "models/models.h"
16*4bdc9457SAndroid Build Coastguard Worker 
17*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
18*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/dwconv.h>
19*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microfnptr.h>
20*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
21*4bdc9457SAndroid Build Coastguard Worker 
22*4bdc9457SAndroid Build Coastguard Worker 
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_qs8_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_qs8_conv_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)23*4bdc9457SAndroid Build Coastguard Worker static void DWConvEnd2EndBenchmark(
24*4bdc9457SAndroid Build Coastguard Worker   benchmark::State& state,
25*4bdc9457SAndroid Build Coastguard Worker   models::ExecutionPlanFactory model_factory,
26*4bdc9457SAndroid Build Coastguard Worker   xnn_qs8_dwconv_minmax_unipass_ukernel_function dwconv,
27*4bdc9457SAndroid Build Coastguard Worker   xnn_init_qs8_conv_minmax_params_fn init_params,
28*4bdc9457SAndroid Build Coastguard Worker   uint8_t channel_tile, uint8_t primary_tile,
29*4bdc9457SAndroid Build Coastguard Worker   benchmark::utils::IsaCheckFunction isa_check = nullptr)
30*4bdc9457SAndroid Build Coastguard Worker {
31*4bdc9457SAndroid Build Coastguard Worker   if (isa_check && !isa_check(state)) {
32*4bdc9457SAndroid Build Coastguard Worker     return;
33*4bdc9457SAndroid Build Coastguard Worker   }
34*4bdc9457SAndroid Build Coastguard Worker   if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
35*4bdc9457SAndroid Build Coastguard Worker     state.SkipWithError("failed to initialize XNNPACK");
36*4bdc9457SAndroid Build Coastguard Worker     return;
37*4bdc9457SAndroid Build Coastguard Worker   }
38*4bdc9457SAndroid Build Coastguard Worker 
39*4bdc9457SAndroid Build Coastguard Worker   // Override microkernels chosen in xnn_initialize
40*4bdc9457SAndroid Build Coastguard Worker   for (size_t i = 0; i < XNN_MAX_QS8_DWCONV_UKERNELS; i++) {
41*4bdc9457SAndroid Build Coastguard Worker     // Replace only the microkernel the matching kernel size.
42*4bdc9457SAndroid Build Coastguard Worker     if (xnn_params.qs8.dwconv[i].primary_tile == primary_tile) {
43*4bdc9457SAndroid Build Coastguard Worker       // Note: do not directly assign to xnn_params.qs8.dwconv[i] because it breaks older gcc.
44*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
45*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[i].channel_tile = channel_tile;
46*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[i].primary_tile = primary_tile;
47*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[i].incremental_tile = 0;
48*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[i].init.qs8 = init_params;
49*4bdc9457SAndroid Build Coastguard Worker       break;
50*4bdc9457SAndroid Build Coastguard Worker     }
51*4bdc9457SAndroid Build Coastguard Worker   }
52*4bdc9457SAndroid Build Coastguard Worker 
53*4bdc9457SAndroid Build Coastguard Worker   auto execution_plan = model_factory(nullptr);
54*4bdc9457SAndroid Build Coastguard Worker   if (execution_plan.empty()) {
55*4bdc9457SAndroid Build Coastguard Worker     state.SkipWithError("failed to create a model");
56*4bdc9457SAndroid Build Coastguard Worker     return;
57*4bdc9457SAndroid Build Coastguard Worker   }
58*4bdc9457SAndroid Build Coastguard Worker 
59*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
60*4bdc9457SAndroid Build Coastguard Worker     for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
61*4bdc9457SAndroid Build Coastguard Worker       xnn_status status = xnn_run_operator(op.get(), nullptr);
62*4bdc9457SAndroid Build Coastguard Worker       if (status != xnn_status_success) {
63*4bdc9457SAndroid Build Coastguard Worker         state.SkipWithError("failed to run a model");
64*4bdc9457SAndroid Build Coastguard Worker         return;
65*4bdc9457SAndroid Build Coastguard Worker       }
66*4bdc9457SAndroid Build Coastguard Worker     }
67*4bdc9457SAndroid Build Coastguard Worker   }
68*4bdc9457SAndroid Build Coastguard Worker 
69*4bdc9457SAndroid Build Coastguard Worker   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
70*4bdc9457SAndroid Build Coastguard Worker   if (cpu_frequency != 0) {
71*4bdc9457SAndroid Build Coastguard Worker     state.counters["cpufreq"] = cpu_frequency;
72*4bdc9457SAndroid Build Coastguard Worker   }
73*4bdc9457SAndroid Build Coastguard Worker }
74*4bdc9457SAndroid Build Coastguard Worker 
75*4bdc9457SAndroid Build Coastguard Worker 
76*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
qs8_dwconv_up8x9__neon_mul8_ld64(benchmark::State & state,models::ExecutionPlanFactory model)77*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__neon_mul8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
78*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
79*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64,
80*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
81*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
82*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__neon_mul8_ld64(benchmark::State & state,models::ExecutionPlanFactory model)83*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__neon_mul8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
84*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
85*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64,
86*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
87*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
88*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__neon_mul8_ld128(benchmark::State & state,models::ExecutionPlanFactory model)89*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__neon_mul8_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
90*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
91*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128,
92*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
93*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
94*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__neon_mla8_ld64(benchmark::State & state,models::ExecutionPlanFactory model)95*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__neon_mla8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
96*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
97*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64,
98*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
99*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
100*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__neon_mla8_ld64(benchmark::State & state,models::ExecutionPlanFactory model)101*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__neon_mla8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
102*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
103*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64,
104*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
105*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
106*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__neon_mla8_ld128(benchmark::State & state,models::ExecutionPlanFactory model)107*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__neon_mla8_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
108*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
109*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128,
110*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
111*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
112*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)113*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
114*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
115*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16,
116*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
117*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
118*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)119*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
120*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
121*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16,
122*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
123*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
124*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up24x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)125*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up24x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
126*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
127*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16,
128*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
129*4bdc9457SAndroid Build Coastguard Worker       24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
130*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up32x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)131*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up32x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
132*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
133*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16,
134*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_rndnu_neon_params,
135*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
136*4bdc9457SAndroid Build Coastguard Worker   }
137*4bdc9457SAndroid Build Coastguard Worker 
138*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__neon_mul8_ld64);
139*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mul8_ld64);
140*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mul8_ld128);
141*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__neon_mla8_ld64);
142*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mla8_ld64);
143*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mla8_ld128);
144*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__neon_mul16);
145*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mul16);
146*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up24x9__neon_mul16);
147*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__neon_mul16);
148*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
149*4bdc9457SAndroid Build Coastguard Worker 
150*4bdc9457SAndroid Build Coastguard Worker 
151*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
qs8_dwconv_up16x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)152*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
153*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
154*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32,
155*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx512_params,
156*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
157*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up32x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)158*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up32x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
159*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
160*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32,
161*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx512_params,
162*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
163*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__avx2_mul16_vpmovsx(benchmark::State & state,models::ExecutionPlanFactory model)164*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx2_mul16_vpmovsx(benchmark::State& state, models::ExecutionPlanFactory model) {
165*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
166*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx,
167*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
168*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
169*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up32x9__avx2_mul16_vpmovsx(benchmark::State & state,models::ExecutionPlanFactory model)170*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up32x9__avx2_mul16_vpmovsx(benchmark::State& state, models::ExecutionPlanFactory model) {
171*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
172*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx,
173*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
174*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
175*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__avx2_mul16_vpunpck(benchmark::State & state,models::ExecutionPlanFactory model)176*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx2_mul16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
177*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
178*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck,
179*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
180*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
181*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up32x9__avx2_mul16_vpunpck(benchmark::State & state,models::ExecutionPlanFactory model)182*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up32x9__avx2_mul16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
183*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
184*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck,
185*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
186*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
187*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__avx2_mul16_add16_vpunpck(benchmark::State & state,models::ExecutionPlanFactory model)188*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx2_mul16_add16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
189*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
190*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck,
191*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
192*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
193*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up32x9__avx2_mul16_add16_vpunpck(benchmark::State & state,models::ExecutionPlanFactory model)194*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up32x9__avx2_mul16_add16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
195*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
196*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck,
197*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
198*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
199*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)200*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
201*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
202*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32,
203*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
204*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
205*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)206*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
207*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
208*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32,
209*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
210*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
211*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up32x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)212*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up32x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
213*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
214*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32,
215*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_avx2_params,
216*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
217*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__xop_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)218*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__xop_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
219*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
220*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16,
221*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
222*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
223*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__xop_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)224*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__xop_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
225*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
226*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16,
227*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
228*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
229*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__xop_mul32(benchmark::State & state,models::ExecutionPlanFactory model)230*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__xop_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
231*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
232*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32,
233*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
234*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
235*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__xop_mul32(benchmark::State & state,models::ExecutionPlanFactory model)236*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__xop_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
237*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
238*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32,
239*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
240*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
241*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)242*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
243*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
244*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16,
245*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
246*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
247*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)248*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
249*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
250*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16,
251*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
252*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
253*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__avx_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)254*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__avx_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
255*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
256*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16,
257*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
258*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
259*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__avx_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)260*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
261*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
262*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16,
263*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
264*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
265*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)266*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
267*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
268*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32,
269*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
270*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
271*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)272*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
273*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
274*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32,
275*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
276*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
277*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)278*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
279*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
280*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16,
281*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
282*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
283*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)284*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
285*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
286*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16,
287*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
288*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
289*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__sse41_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)290*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__sse41_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
291*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
292*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16,
293*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
294*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
295*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__sse41_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)296*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__sse41_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
297*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
298*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16,
299*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
300*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
301*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)302*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
303*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
304*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32,
305*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
306*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
307*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)308*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
309*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
310*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32,
311*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse4_params,
312*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
313*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)314*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
315*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
316*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16,
317*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse2_params,
318*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */);
319*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)320*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
321*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
322*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16,
323*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse2_params,
324*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */);
325*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up8x9__sse2_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)326*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__sse2_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
327*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
328*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16,
329*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse2_params,
330*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */);
331*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__sse2_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)332*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__sse2_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
333*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
334*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16,
335*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_sse2_params,
336*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */);
337*4bdc9457SAndroid Build Coastguard Worker   }
338*4bdc9457SAndroid Build Coastguard Worker 
339*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx512skx_mul32);
340*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx512skx_mul32);
341*4bdc9457SAndroid Build Coastguard Worker 
342*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul16_vpmovsx);
343*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul16_vpmovsx);
344*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul16_vpunpck);
345*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul16_vpunpck);
346*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul16_add16_vpunpck);
347*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul16_add16_vpunpck);
348*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx2_mul32);
349*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul32);
350*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul32);
351*4bdc9457SAndroid Build Coastguard Worker 
352*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__xop_mul16_add16);
353*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__xop_mul16_add16);
354*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__xop_mul32);
355*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__xop_mul32);
356*4bdc9457SAndroid Build Coastguard Worker 
357*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx_mul16);
358*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx_mul16);
359*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx_mul16_add16);
360*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx_mul16_add16);
361*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx_mul32);
362*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx_mul32);
363*4bdc9457SAndroid Build Coastguard Worker 
364*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse41_mul16);
365*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse41_mul16);
366*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse41_mul16_add16);
367*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse41_mul16_add16);
368*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse41_mul32);
369*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse41_mul32);
370*4bdc9457SAndroid Build Coastguard Worker 
371*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse2_mul16);
372*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse2_mul16);
373*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse2_mul16_add16);
374*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse2_mul16_add16);
375*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
376*4bdc9457SAndroid Build Coastguard Worker 
377*4bdc9457SAndroid Build Coastguard Worker 
378*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qs8_dwconv_up8x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)379*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
380*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
381*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16,
382*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
383*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */);
384*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)385*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
386*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
387*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16,
388*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
389*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */);
390*4bdc9457SAndroid Build Coastguard Worker   }
391*4bdc9457SAndroid Build Coastguard Worker 
qs8_dwconv_up8x9__wasmsimd_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)392*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up8x9__wasmsimd_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
393*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
394*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16,
395*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
396*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */);
397*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up16x9__wasmsimd_mul16_add16(benchmark::State & state,models::ExecutionPlanFactory model)398*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up16x9__wasmsimd_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
399*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
400*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16,
401*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
402*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */);
403*4bdc9457SAndroid Build Coastguard Worker   }
404*4bdc9457SAndroid Build Coastguard Worker 
405*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__wasmsimd_mul16);
406*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__wasmsimd_mul16);
407*4bdc9457SAndroid Build Coastguard Worker 
408*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__wasmsimd_mul16_add16);
409*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__wasmsimd_mul16_add16);
410*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
411*4bdc9457SAndroid Build Coastguard Worker 
412*4bdc9457SAndroid Build Coastguard Worker 
413*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qs8_dwconv_up1x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)414*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up1x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
415*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
416*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic,
417*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
418*4bdc9457SAndroid Build Coastguard Worker       1 /* channel tile */, 9 /* primary tile */);
419*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up2x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)420*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up2x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
421*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
422*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic,
423*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
424*4bdc9457SAndroid Build Coastguard Worker       2 /* channel tile */, 9 /* primary tile */);
425*4bdc9457SAndroid Build Coastguard Worker   }
qs8_dwconv_up4x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)426*4bdc9457SAndroid Build Coastguard Worker   static void qs8_dwconv_up4x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
427*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
428*4bdc9457SAndroid Build Coastguard Worker       xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic,
429*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
430*4bdc9457SAndroid Build Coastguard Worker       4 /* channel tile */, 9 /* primary tile */);
431*4bdc9457SAndroid Build Coastguard Worker   }
432*4bdc9457SAndroid Build Coastguard Worker 
433*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__wasm_fmagic);
434*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__wasm_fmagic);
435*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__wasm_fmagic);
436*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
437*4bdc9457SAndroid Build Coastguard Worker 
438*4bdc9457SAndroid Build Coastguard Worker 
qs8_dwconv_up1x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)439*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up1x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
440*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
441*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic,
442*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
443*4bdc9457SAndroid Build Coastguard Worker     1 /* channel tile */, 9 /* primary tile */);
444*4bdc9457SAndroid Build Coastguard Worker }
qs8_dwconv_up2x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)445*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up2x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
446*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
447*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic,
448*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
449*4bdc9457SAndroid Build Coastguard Worker     2 /* channel tile */, 9 /* primary tile */);
450*4bdc9457SAndroid Build Coastguard Worker }
qs8_dwconv_up4x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)451*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up4x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
452*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
453*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic,
454*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
455*4bdc9457SAndroid Build Coastguard Worker     4 /* channel tile */, 9 /* primary tile */);
456*4bdc9457SAndroid Build Coastguard Worker }
457*4bdc9457SAndroid Build Coastguard Worker 
qs8_dwconv_up1x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)458*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up1x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
459*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
460*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic,
461*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
462*4bdc9457SAndroid Build Coastguard Worker     1 /* channel tile */, 9 /* primary tile */);
463*4bdc9457SAndroid Build Coastguard Worker }
qs8_dwconv_up2x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)464*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up2x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
465*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
466*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic,
467*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
468*4bdc9457SAndroid Build Coastguard Worker     2 /* channel tile */, 9 /* primary tile */);
469*4bdc9457SAndroid Build Coastguard Worker }
qs8_dwconv_up4x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)470*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up4x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
471*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
472*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic,
473*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
474*4bdc9457SAndroid Build Coastguard Worker     4 /* channel tile */, 9 /* primary tile */);
475*4bdc9457SAndroid Build Coastguard Worker }
476*4bdc9457SAndroid Build Coastguard Worker 
qs8_dwconv_up1x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)477*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up1x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
478*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
479*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf,
480*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
481*4bdc9457SAndroid Build Coastguard Worker     1 /* channel tile */, 9 /* primary tile */);
482*4bdc9457SAndroid Build Coastguard Worker }
qs8_dwconv_up2x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)483*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up2x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
484*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
485*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf,
486*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
487*4bdc9457SAndroid Build Coastguard Worker     2 /* channel tile */, 9 /* primary tile */);
488*4bdc9457SAndroid Build Coastguard Worker }
qs8_dwconv_up4x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)489*4bdc9457SAndroid Build Coastguard Worker static void qs8_dwconv_up4x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
490*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
491*4bdc9457SAndroid Build Coastguard Worker     xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf,
492*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
493*4bdc9457SAndroid Build Coastguard Worker     4 /* channel tile */, 9 /* primary tile */);
494*4bdc9457SAndroid Build Coastguard Worker }
495*4bdc9457SAndroid Build Coastguard Worker 
496*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__scalar_fmagic);
497*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__scalar_fmagic);
498*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__scalar_fmagic);
499*4bdc9457SAndroid Build Coastguard Worker 
500*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__scalar_imagic);
501*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__scalar_imagic);
502*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__scalar_imagic);
503*4bdc9457SAndroid Build Coastguard Worker 
504*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__scalar_lrintf);
505*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__scalar_lrintf);
506*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__scalar_lrintf);
507*4bdc9457SAndroid Build Coastguard Worker 
508*4bdc9457SAndroid Build Coastguard Worker 
509*4bdc9457SAndroid Build Coastguard Worker #ifndef XNNPACK_BENCHMARK_NO_MAIN
510*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_MAIN();
511*4bdc9457SAndroid Build Coastguard Worker #endif
512