xref: /aosp_15_r20/external/XNNPACK/bench/global-average-pooling.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <algorithm>
10 #include <cfloat>
11 #include <cmath>
12 #include <functional>
13 #include <limits>
14 #include <random>
15 #include <vector>
16 
17 #include <xnnpack.h>
18 
19 #include <benchmark/benchmark.h>
20 #include <fp16.h>
21 #include "bench/utils.h"
22 
23 #ifndef XNN_NO_QU8_OPERATORS
global_average_pooling_qu8(benchmark::State & state)24 static void global_average_pooling_qu8(benchmark::State& state) {
25   const size_t batch_size = state.range(0);
26   const size_t input_height = state.range(1);
27   const size_t input_width = state.range(2);
28   const size_t channels = state.range(3);
29 
30   std::random_device random_device;
31   auto rng = std::mt19937(random_device());
32   auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
33 
34   std::vector<uint8_t> input(batch_size * input_height * input_width * channels);
35   std::generate(input.begin(), input.end(), std::ref(u8rng));
36   std::vector<uint8_t> output(batch_size * channels);
37 
38   xnn_status status = xnn_initialize(nullptr /* allocator */);
39   if (status != xnn_status_success) {
40     state.SkipWithError("failed to initialize XNNPACK");
41   }
42 
43   xnn_operator_t global_pooling_op = nullptr;
44   status = xnn_create_global_average_pooling_nwc_qu8(
45     channels, channels /* input stride */, channels /* output stride */,
46     127 /* input zero point */, 0.75f /* input scale */,
47     127 /* output zero point */, 1.25f /* output scale */,
48     0, 255,
49     0 /* flags */, &global_pooling_op);
50   if (status != xnn_status_success) {
51     state.SkipWithError("failed to create Global Average Pooling operator");
52   }
53 
54   status = xnn_setup_global_average_pooling_nwc_qu8(
55     global_pooling_op,
56     batch_size, input_height * input_width,
57     input.data(), output.data(),
58     nullptr /* thread pool */);
59   if (status != xnn_status_success) {
60     state.SkipWithError("failed to setup Global Average Pooling operator");
61   }
62 
63   for (auto _ : state) {
64     xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
65   }
66 
67   status = xnn_delete_operator(global_pooling_op);
68   if (status != xnn_status_success) {
69     state.SkipWithError("failed to delete Global Average Pooling operator");
70   }
71   global_pooling_op = nullptr;
72 
73   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
74   if (cpu_frequency != 0) {
75     state.counters["cpufreq"] = cpu_frequency;
76   }
77 
78   state.counters["bytes"] = benchmark::Counter(
79     uint64_t(state.iterations()) *
80       batch_size * (input_height * input_width + 1) * channels * sizeof(uint8_t),
81     benchmark::Counter::kIsRate);
82 }
83 #endif  // XNN_NO_QU8_OPERATORS
84 
85 #ifndef XNN_NO_QS8_OPERATORS
global_average_pooling_qs8(benchmark::State & state)86 static void global_average_pooling_qs8(benchmark::State& state) {
87   const size_t batch_size = state.range(0);
88   const size_t input_height = state.range(1);
89   const size_t input_width = state.range(2);
90   const size_t channels = state.range(3);
91 
92   std::random_device random_device;
93   auto rng = std::mt19937(random_device());
94   auto i8rng = std::bind(
95     std::uniform_int_distribution<uint32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
96 
97   std::vector<int8_t> input(batch_size * input_height * input_width * channels);
98   std::generate(input.begin(), input.end(), std::ref(i8rng));
99   std::vector<int8_t> output(batch_size * channels);
100 
101   xnn_status status = xnn_initialize(nullptr /* allocator */);
102   if (status != xnn_status_success) {
103     state.SkipWithError("failed to initialize XNNPACK");
104   }
105 
106   xnn_operator_t global_pooling_op = nullptr;
107   status = xnn_create_global_average_pooling_nwc_qs8(
108     channels, channels /* input stride */, channels /* output stride */,
109     -1 /* input zero point */, 0.75f /* input scale */,
110     -1 /* output zero point */, 1.25f /* output scale */,
111     -128, 127,
112     0 /* flags */, &global_pooling_op);
113   if (status != xnn_status_success) {
114     state.SkipWithError("failed to create Global Average Pooling operator");
115   }
116 
117   status = xnn_setup_global_average_pooling_nwc_qs8(
118     global_pooling_op,
119     batch_size, input_height * input_width,
120     input.data(), output.data(),
121     nullptr /* thread pool */);
122   if (status != xnn_status_success) {
123     state.SkipWithError("failed to setup Global Average Pooling operator");
124   }
125 
126   for (auto _ : state) {
127     xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
128   }
129 
130   status = xnn_delete_operator(global_pooling_op);
131   if (status != xnn_status_success) {
132     state.SkipWithError("failed to delete Global Average Pooling operator");
133   }
134   global_pooling_op = nullptr;
135 
136   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
137   if (cpu_frequency != 0) {
138     state.counters["cpufreq"] = cpu_frequency;
139   }
140 
141   state.counters["bytes"] = benchmark::Counter(
142     uint64_t(state.iterations()) *
143       batch_size * (input_height * input_width + 1) * channels * sizeof(int8_t),
144     benchmark::Counter::kIsRate);
145 }
146 #endif  // XNN_NO_QS8_OPERATORS
147 
148 #ifndef XNN_NO_F16_OPERATORS
global_average_pooling_f16(benchmark::State & state)149 static void global_average_pooling_f16(benchmark::State& state) {
150   if (!benchmark::utils::CheckNEONFP16ARITH(state)) {
151     return;
152   }
153   const size_t batch_size = state.range(0);
154   const size_t input_height = state.range(1);
155   const size_t input_width = state.range(2);
156   const size_t channels = state.range(3);
157 
158   std::random_device random_device;
159   auto rng = std::mt19937(random_device());
160   auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));
161   auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
162 
163   std::vector<uint16_t> input(batch_size * input_height * input_width * channels);
164   std::generate(input.begin(), input.end(), std::ref(f16rng));
165   std::vector<uint16_t> output(batch_size * channels);
166 
167   xnn_status status = xnn_initialize(nullptr /* allocator */);
168   if (status != xnn_status_success) {
169     state.SkipWithError("failed to initialize XNNPACK");
170   }
171 
172   xnn_operator_t global_pooling_op = nullptr;
173   status = xnn_create_global_average_pooling_nwc_f16(
174     channels, channels /* input stride */, channels /* output stride */,
175     -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
176     0 /* flags */, &global_pooling_op);
177   if (status != xnn_status_success) {
178     state.SkipWithError("failed to create Global Average Pooling operator");
179   }
180 
181   status = xnn_setup_global_average_pooling_nwc_f16(
182     global_pooling_op,
183     batch_size, input_height * input_width,
184     input.data(), output.data(),
185     nullptr /* thread pool */);
186   if (status != xnn_status_success) {
187     state.SkipWithError("failed to setup Global Average Pooling operator");
188   }
189 
190   for (auto _ : state) {
191     xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
192   }
193 
194   status = xnn_delete_operator(global_pooling_op);
195   if (status != xnn_status_success) {
196     state.SkipWithError("failed to delete Global Average Pooling operator");
197   }
198   global_pooling_op = nullptr;
199 
200   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
201   if (cpu_frequency != 0) {
202     state.counters["cpufreq"] = cpu_frequency;
203   }
204 
205   state.counters["bytes"] = benchmark::Counter(
206     uint64_t(state.iterations()) *
207       batch_size * (input_height * input_width + 1) * channels * sizeof(uint16_t),
208     benchmark::Counter::kIsRate);
209 }
210 #endif  // XNN_NO_F16_OPERATORS
211 
global_average_pooling_f32(benchmark::State & state)212 static void global_average_pooling_f32(benchmark::State& state) {
213   const size_t batch_size = state.range(0);
214   const size_t input_height = state.range(1);
215   const size_t input_width = state.range(2);
216   const size_t channels = state.range(3);
217 
218   std::random_device random_device;
219   auto rng = std::mt19937(random_device());
220   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
221 
222   std::vector<float> input(batch_size * input_height * input_width * channels);
223   std::generate(input.begin(), input.end(), std::ref(f32rng));
224   std::vector<float> output(batch_size * channels);
225 
226   xnn_status status = xnn_initialize(nullptr /* allocator */);
227   if (status != xnn_status_success) {
228     state.SkipWithError("failed to initialize XNNPACK");
229   }
230 
231   xnn_operator_t global_pooling_op = nullptr;
232   status = xnn_create_global_average_pooling_nwc_f32(
233     channels, channels /* input stride */, channels /* output stride */,
234     -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
235     0 /* flags */, &global_pooling_op);
236   if (status != xnn_status_success) {
237     state.SkipWithError("failed to create Global Average Pooling operator");
238   }
239 
240   status = xnn_setup_global_average_pooling_nwc_f32(
241     global_pooling_op,
242     batch_size, input_height * input_width,
243     input.data(), output.data(),
244     nullptr /* thread pool */);
245   if (status != xnn_status_success) {
246     state.SkipWithError("failed to setup Global Average Pooling operator");
247   }
248 
249   for (auto _ : state) {
250     xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
251   }
252 
253   status = xnn_delete_operator(global_pooling_op);
254   if (status != xnn_status_success) {
255     state.SkipWithError("failed to delete Global Average Pooling operator");
256   }
257   global_pooling_op = nullptr;
258 
259   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
260   if (cpu_frequency != 0) {
261     state.counters["cpufreq"] = cpu_frequency;
262   }
263 
264   state.counters["bytes"] = benchmark::Counter(
265     uint64_t(state.iterations()) *
266       batch_size * (input_height * input_width + 1) * channels * sizeof(float),
267     benchmark::Counter::kIsRate);
268 }
269 
ImageNetArguments(benchmark::internal::Benchmark * b)270 static void ImageNetArguments(benchmark::internal::Benchmark* b) {
271   b->ArgNames({"N", "H", "W", "C"});
272 
273   /*       N  IH  IW    C */
274   b->Args({1,  7,  7, 1000});
275   b->Args({1, 13, 13, 1000});
276 }
277 
278 #ifndef XNN_NO_QU8_OPERATORS
279 BENCHMARK(global_average_pooling_qu8)->Apply(ImageNetArguments)->UseRealTime();
280 #endif  // XNN_NO_QU8_OPERATORS
281 #ifndef XNN_NO_QS8_OPERATORS
282 BENCHMARK(global_average_pooling_qs8)->Apply(ImageNetArguments)->UseRealTime();
283 #endif  // XNN_NO_QS8_OPERATORS
284 #ifndef XNN_NO_F16_OPERATORS
285 BENCHMARK(global_average_pooling_f16)->Apply(ImageNetArguments)->UseRealTime();
286 #endif  // XNN_NO_F16_OPERATORS
287 BENCHMARK(global_average_pooling_f32)->Apply(ImageNetArguments)->UseRealTime();
288 
289 #ifndef XNNPACK_BENCHMARK_NO_MAIN
290 BENCHMARK_MAIN();
291 #endif
292