1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <algorithm>
10 #include <cfloat>
11 #include <cmath>
12 #include <functional>
13 #include <limits>
14 #include <random>
15 #include <vector>
16
17 #include <xnnpack.h>
18
19 #include <benchmark/benchmark.h>
20 #include <fp16.h>
21 #include "bench/utils.h"
22
23 #ifndef XNN_NO_QU8_OPERATORS
global_average_pooling_qu8(benchmark::State & state)24 static void global_average_pooling_qu8(benchmark::State& state) {
25 const size_t batch_size = state.range(0);
26 const size_t input_height = state.range(1);
27 const size_t input_width = state.range(2);
28 const size_t channels = state.range(3);
29
30 std::random_device random_device;
31 auto rng = std::mt19937(random_device());
32 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
33
34 std::vector<uint8_t> input(batch_size * input_height * input_width * channels);
35 std::generate(input.begin(), input.end(), std::ref(u8rng));
36 std::vector<uint8_t> output(batch_size * channels);
37
38 xnn_status status = xnn_initialize(nullptr /* allocator */);
39 if (status != xnn_status_success) {
40 state.SkipWithError("failed to initialize XNNPACK");
41 }
42
43 xnn_operator_t global_pooling_op = nullptr;
44 status = xnn_create_global_average_pooling_nwc_qu8(
45 channels, channels /* input stride */, channels /* output stride */,
46 127 /* input zero point */, 0.75f /* input scale */,
47 127 /* output zero point */, 1.25f /* output scale */,
48 0, 255,
49 0 /* flags */, &global_pooling_op);
50 if (status != xnn_status_success) {
51 state.SkipWithError("failed to create Global Average Pooling operator");
52 }
53
54 status = xnn_setup_global_average_pooling_nwc_qu8(
55 global_pooling_op,
56 batch_size, input_height * input_width,
57 input.data(), output.data(),
58 nullptr /* thread pool */);
59 if (status != xnn_status_success) {
60 state.SkipWithError("failed to setup Global Average Pooling operator");
61 }
62
63 for (auto _ : state) {
64 xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
65 }
66
67 status = xnn_delete_operator(global_pooling_op);
68 if (status != xnn_status_success) {
69 state.SkipWithError("failed to delete Global Average Pooling operator");
70 }
71 global_pooling_op = nullptr;
72
73 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
74 if (cpu_frequency != 0) {
75 state.counters["cpufreq"] = cpu_frequency;
76 }
77
78 state.counters["bytes"] = benchmark::Counter(
79 uint64_t(state.iterations()) *
80 batch_size * (input_height * input_width + 1) * channels * sizeof(uint8_t),
81 benchmark::Counter::kIsRate);
82 }
83 #endif // XNN_NO_QU8_OPERATORS
84
85 #ifndef XNN_NO_QS8_OPERATORS
global_average_pooling_qs8(benchmark::State & state)86 static void global_average_pooling_qs8(benchmark::State& state) {
87 const size_t batch_size = state.range(0);
88 const size_t input_height = state.range(1);
89 const size_t input_width = state.range(2);
90 const size_t channels = state.range(3);
91
92 std::random_device random_device;
93 auto rng = std::mt19937(random_device());
94 auto i8rng = std::bind(
95 std::uniform_int_distribution<uint32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
96
97 std::vector<int8_t> input(batch_size * input_height * input_width * channels);
98 std::generate(input.begin(), input.end(), std::ref(i8rng));
99 std::vector<int8_t> output(batch_size * channels);
100
101 xnn_status status = xnn_initialize(nullptr /* allocator */);
102 if (status != xnn_status_success) {
103 state.SkipWithError("failed to initialize XNNPACK");
104 }
105
106 xnn_operator_t global_pooling_op = nullptr;
107 status = xnn_create_global_average_pooling_nwc_qs8(
108 channels, channels /* input stride */, channels /* output stride */,
109 -1 /* input zero point */, 0.75f /* input scale */,
110 -1 /* output zero point */, 1.25f /* output scale */,
111 -128, 127,
112 0 /* flags */, &global_pooling_op);
113 if (status != xnn_status_success) {
114 state.SkipWithError("failed to create Global Average Pooling operator");
115 }
116
117 status = xnn_setup_global_average_pooling_nwc_qs8(
118 global_pooling_op,
119 batch_size, input_height * input_width,
120 input.data(), output.data(),
121 nullptr /* thread pool */);
122 if (status != xnn_status_success) {
123 state.SkipWithError("failed to setup Global Average Pooling operator");
124 }
125
126 for (auto _ : state) {
127 xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
128 }
129
130 status = xnn_delete_operator(global_pooling_op);
131 if (status != xnn_status_success) {
132 state.SkipWithError("failed to delete Global Average Pooling operator");
133 }
134 global_pooling_op = nullptr;
135
136 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
137 if (cpu_frequency != 0) {
138 state.counters["cpufreq"] = cpu_frequency;
139 }
140
141 state.counters["bytes"] = benchmark::Counter(
142 uint64_t(state.iterations()) *
143 batch_size * (input_height * input_width + 1) * channels * sizeof(int8_t),
144 benchmark::Counter::kIsRate);
145 }
146 #endif // XNN_NO_QS8_OPERATORS
147
148 #ifndef XNN_NO_F16_OPERATORS
global_average_pooling_f16(benchmark::State & state)149 static void global_average_pooling_f16(benchmark::State& state) {
150 if (!benchmark::utils::CheckNEONFP16ARITH(state)) {
151 return;
152 }
153 const size_t batch_size = state.range(0);
154 const size_t input_height = state.range(1);
155 const size_t input_width = state.range(2);
156 const size_t channels = state.range(3);
157
158 std::random_device random_device;
159 auto rng = std::mt19937(random_device());
160 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));
161 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
162
163 std::vector<uint16_t> input(batch_size * input_height * input_width * channels);
164 std::generate(input.begin(), input.end(), std::ref(f16rng));
165 std::vector<uint16_t> output(batch_size * channels);
166
167 xnn_status status = xnn_initialize(nullptr /* allocator */);
168 if (status != xnn_status_success) {
169 state.SkipWithError("failed to initialize XNNPACK");
170 }
171
172 xnn_operator_t global_pooling_op = nullptr;
173 status = xnn_create_global_average_pooling_nwc_f16(
174 channels, channels /* input stride */, channels /* output stride */,
175 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
176 0 /* flags */, &global_pooling_op);
177 if (status != xnn_status_success) {
178 state.SkipWithError("failed to create Global Average Pooling operator");
179 }
180
181 status = xnn_setup_global_average_pooling_nwc_f16(
182 global_pooling_op,
183 batch_size, input_height * input_width,
184 input.data(), output.data(),
185 nullptr /* thread pool */);
186 if (status != xnn_status_success) {
187 state.SkipWithError("failed to setup Global Average Pooling operator");
188 }
189
190 for (auto _ : state) {
191 xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
192 }
193
194 status = xnn_delete_operator(global_pooling_op);
195 if (status != xnn_status_success) {
196 state.SkipWithError("failed to delete Global Average Pooling operator");
197 }
198 global_pooling_op = nullptr;
199
200 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
201 if (cpu_frequency != 0) {
202 state.counters["cpufreq"] = cpu_frequency;
203 }
204
205 state.counters["bytes"] = benchmark::Counter(
206 uint64_t(state.iterations()) *
207 batch_size * (input_height * input_width + 1) * channels * sizeof(uint16_t),
208 benchmark::Counter::kIsRate);
209 }
210 #endif // XNN_NO_F16_OPERATORS
211
global_average_pooling_f32(benchmark::State & state)212 static void global_average_pooling_f32(benchmark::State& state) {
213 const size_t batch_size = state.range(0);
214 const size_t input_height = state.range(1);
215 const size_t input_width = state.range(2);
216 const size_t channels = state.range(3);
217
218 std::random_device random_device;
219 auto rng = std::mt19937(random_device());
220 auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
221
222 std::vector<float> input(batch_size * input_height * input_width * channels);
223 std::generate(input.begin(), input.end(), std::ref(f32rng));
224 std::vector<float> output(batch_size * channels);
225
226 xnn_status status = xnn_initialize(nullptr /* allocator */);
227 if (status != xnn_status_success) {
228 state.SkipWithError("failed to initialize XNNPACK");
229 }
230
231 xnn_operator_t global_pooling_op = nullptr;
232 status = xnn_create_global_average_pooling_nwc_f32(
233 channels, channels /* input stride */, channels /* output stride */,
234 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
235 0 /* flags */, &global_pooling_op);
236 if (status != xnn_status_success) {
237 state.SkipWithError("failed to create Global Average Pooling operator");
238 }
239
240 status = xnn_setup_global_average_pooling_nwc_f32(
241 global_pooling_op,
242 batch_size, input_height * input_width,
243 input.data(), output.data(),
244 nullptr /* thread pool */);
245 if (status != xnn_status_success) {
246 state.SkipWithError("failed to setup Global Average Pooling operator");
247 }
248
249 for (auto _ : state) {
250 xnn_run_operator(global_pooling_op, nullptr /* thread pool */);
251 }
252
253 status = xnn_delete_operator(global_pooling_op);
254 if (status != xnn_status_success) {
255 state.SkipWithError("failed to delete Global Average Pooling operator");
256 }
257 global_pooling_op = nullptr;
258
259 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
260 if (cpu_frequency != 0) {
261 state.counters["cpufreq"] = cpu_frequency;
262 }
263
264 state.counters["bytes"] = benchmark::Counter(
265 uint64_t(state.iterations()) *
266 batch_size * (input_height * input_width + 1) * channels * sizeof(float),
267 benchmark::Counter::kIsRate);
268 }
269
ImageNetArguments(benchmark::internal::Benchmark * b)270 static void ImageNetArguments(benchmark::internal::Benchmark* b) {
271 b->ArgNames({"N", "H", "W", "C"});
272
273 /* N IH IW C */
274 b->Args({1, 7, 7, 1000});
275 b->Args({1, 13, 13, 1000});
276 }
277
278 #ifndef XNN_NO_QU8_OPERATORS
279 BENCHMARK(global_average_pooling_qu8)->Apply(ImageNetArguments)->UseRealTime();
280 #endif // XNN_NO_QU8_OPERATORS
281 #ifndef XNN_NO_QS8_OPERATORS
282 BENCHMARK(global_average_pooling_qs8)->Apply(ImageNetArguments)->UseRealTime();
283 #endif // XNN_NO_QS8_OPERATORS
284 #ifndef XNN_NO_F16_OPERATORS
285 BENCHMARK(global_average_pooling_f16)->Apply(ImageNetArguments)->UseRealTime();
286 #endif // XNN_NO_F16_OPERATORS
287 BENCHMARK(global_average_pooling_f32)->Apply(ImageNetArguments)->UseRealTime();
288
289 #ifndef XNNPACK_BENCHMARK_NO_MAIN
290 BENCHMARK_MAIN();
291 #endif
292