xref: /aosp_15_r20/external/XNNPACK/test/argmaxpool-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <random>
15 #include <vector>
16 
17 #include <xnnpack.h>
18 #include <xnnpack/aligned-allocator.h>
19 #include <xnnpack/microfnptr.h>
20 #include <xnnpack/microparams-init.h>
21 
22 
23 class ArgMaxPoolMicrokernelTester {
24  public:
25   enum class Variant {
26     Native,
27     Scalar,
28   };
29 
output_pixels(size_t output_pixels)30   inline ArgMaxPoolMicrokernelTester& output_pixels(size_t output_pixels) {
31     assert(output_pixels != 0);
32     this->output_pixels_ = output_pixels;
33     return *this;
34   }
35 
output_pixels()36   inline size_t output_pixels() const {
37     return this->output_pixels_;
38   }
39 
step(size_t step)40   inline ArgMaxPoolMicrokernelTester& step(size_t step) {
41     assert(step != 0);
42     this->step_ = step;
43     return *this;
44   }
45 
step()46   inline size_t step() const {
47     return this->step_;
48   }
49 
input_offset(size_t input_offset)50   inline ArgMaxPoolMicrokernelTester& input_offset(size_t input_offset) {
51     assert(input_offset != 0);
52     this->input_offset_ = input_offset;
53     return *this;
54   }
55 
input_offset()56   inline size_t input_offset() const {
57     return this->input_offset_;
58   }
59 
pooling_elements(size_t pooling_elements)60   inline ArgMaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) {
61     assert(pooling_elements != 0);
62     this->pooling_elements_ = pooling_elements;
63     return *this;
64   }
65 
pooling_elements()66   inline size_t pooling_elements() const {
67     return this->pooling_elements_;
68   }
69 
packed_pooling_elements()70   inline size_t packed_pooling_elements() const {
71     if (pooling_elements() <= primary_pooling_tile()) {
72       return primary_pooling_tile();
73     } else {
74       return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile();
75     }
76   }
77 
pooling_tile(size_t primary_tile)78   inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile) {
79     assert(primary_tile != 0);
80     this->primary_pooling_tile_ = primary_tile;
81     this->incremental_pooling_tile_ = 0;
82     return *this;
83   }
84 
pooling_tile(size_t primary_tile,size_t incremental_tile)85   inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) {
86     assert(primary_tile != 0);
87     this->primary_pooling_tile_ = primary_tile;
88     this->incremental_pooling_tile_ = incremental_tile;
89     return *this;
90   }
91 
primary_pooling_tile(size_t primary_pooling_tile)92   inline ArgMaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) {
93     assert(primary_pooling_tile != 0);
94     this->primary_pooling_tile_ = primary_pooling_tile;
95     return *this;
96   }
97 
primary_pooling_tile()98   inline size_t primary_pooling_tile() const {
99     return this->primary_pooling_tile_;
100   }
101 
incremental_pooling_tile(size_t incremental_pooling_tile)102   inline ArgMaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) {
103     assert(incremental_pooling_tile != 0);
104     this->incremental_pooling_tile_ = incremental_pooling_tile;
105     return *this;
106   }
107 
incremental_pooling_tile()108   inline size_t incremental_pooling_tile() const {
109     return this->incremental_pooling_tile_;
110   }
111 
channels(size_t channels)112   inline ArgMaxPoolMicrokernelTester& channels(size_t channels) {
113     assert(channels != 0);
114     this->channels_ = channels;
115     return *this;
116   }
117 
channels()118   inline size_t channels() const {
119     return this->channels_;
120   }
121 
output_stride(size_t output_stride)122   inline ArgMaxPoolMicrokernelTester& output_stride(size_t output_stride) {
123     assert(output_stride != 0);
124     this->output_stride_ = output_stride;
125     return *this;
126   }
127 
output_stride()128   inline size_t output_stride() const {
129     if (this->output_stride_ == 0) {
130       return channels();
131     } else {
132       assert(this->output_stride_ >= channels());
133       return this->output_stride_;
134     }
135   }
136 
iterations(size_t iterations)137   inline ArgMaxPoolMicrokernelTester& iterations(size_t iterations) {
138     this->iterations_ = iterations;
139     return *this;
140   }
141 
iterations()142   inline size_t iterations() const {
143     return this->iterations_;
144   }
145 
146   void Test(xnn_f32_argmaxpool_unipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const {
147     std::random_device random_device;
148     auto rng = std::mt19937(random_device());
149     std::uniform_real_distribution<float> f32dist;
150 
151     std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
152     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
153       ((output_pixels() - 1) * step() + pooling_elements()) * channels());
154     std::vector<float> output((output_pixels() - 1) * output_stride() + channels());
155     std::vector<uint32_t> index(output_pixels() * channels());
156     std::vector<float> output_ref(output_pixels() * channels());
157     std::vector<uint32_t> index_ref(output_pixels() * channels());
158     for (size_t iteration = 0; iteration < iterations(); iteration++) {
159       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
160       std::fill(output.begin(), output.end(), nanf(""));
161 
162       for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
163         indirect_input[i] = input.data() + i * channels() - input_offset();
164       }
165       std::shuffle(indirect_input.begin(),
166         indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
167 
168       // Compute reference results, without clamping.
169       for (size_t x = 0; x < output_pixels(); x++) {
170         for (size_t c = 0; c < channels(); c++) {
171           float max_value = indirect_input[x * step()][c + input_offset()];
172           uint32_t max_index = 0;
173           for (size_t p = 0; p < pooling_elements(); p++) {
174             const float value = indirect_input[x * step() + p][c + input_offset()];
175             if (value > max_value) {
176               max_value = value;
177               max_index = p;
178             }
179           }
180           output_ref[x * channels() + c] = max_value;
181           index_ref[x * channels() + c] = max_index;
182         }
183       }
184 
185       // Call optimized micro-kernel.
186       argmaxpool(output_pixels(), pooling_elements(), channels(),
187         indirect_input.data(), input_offset() * sizeof(float), output.data(), index.data(),
188         step() * sizeof(void*),
189         (output_stride() - channels()) * sizeof(float));
190 
191       // Verify results.
192       for (size_t x = 0; x < output_pixels(); x++) {
193         for (size_t c = 0; c < channels(); c++) {
194           ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c])
195             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
196             << ", pooling elements = " << pooling_elements() << ", step = " << step()
197             << ", input offset = " << input_offset();
198           ASSERT_EQ(
199               indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()],
200               indirect_input[x * step() + index[x * channels() + c]][c + input_offset()])
201             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
202             << ", pooling elements = " << pooling_elements() << ", step = " << step()
203             << ", input offset = " << input_offset();
204           ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c])
205             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
206             << ", pooling elements = " << pooling_elements() << ", step = " << step()
207             << ", input offset = " << input_offset();
208         }
209       }
210     }
211   }
212 
213   void Test(xnn_f32_argmaxpool_multipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const {
214     std::random_device random_device;
215     auto rng = std::mt19937(random_device());
216     std::uniform_real_distribution<float> f32dist;
217 
218     std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
219     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
220       ((output_pixels() - 1) * step() + pooling_elements()) * channels());
221     std::vector<float> output((output_pixels() - 1) * output_stride() + channels());
222     std::vector<uint32_t> index(output_pixels() * channels());
223     std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> index_buffer(
224       channels() + XNN_EXTRA_BYTES / sizeof(uint32_t));
225     std::vector<float, AlignedAllocator<float, 64>> output_buffer(
226       channels() + XNN_EXTRA_BYTES / sizeof(float));
227     std::vector<float> output_ref(output_pixels() * channels());
228     std::vector<uint32_t> index_ref(output_pixels() * channels());
229     for (size_t iteration = 0; iteration < iterations(); iteration++) {
230       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
231       std::fill(output.begin(), output.end(), nanf(""));
232 
233       for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
234         indirect_input[i] = input.data() + i * channels() - input_offset();
235       }
236       std::shuffle(indirect_input.begin(),
237         indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
238 
239       // Compute reference results, without clamping.
240       for (size_t x = 0; x < output_pixels(); x++) {
241         for (size_t c = 0; c < channels(); c++) {
242           float max_value = indirect_input[x * step()][c + input_offset()];
243           uint32_t max_index = 0;
244           for (size_t p = 0; p < pooling_elements(); p++) {
245             const float value = indirect_input[x * step() + p][c + input_offset()];
246             if (value > max_value) {
247               max_value = value;
248               max_index = p;
249             }
250           }
251           output_ref[x * channels() + c] = max_value;
252           index_ref[x * channels() + c] = max_index;
253         }
254       }
255 
256       // Call optimized micro-kernel.
257       argmaxpool(output_pixels(), pooling_elements(), channels(),
258         indirect_input.data(), input_offset() * sizeof(float),
259         output_buffer.data(), index_buffer.data(),
260         output.data(), index.data(),
261         (step() - (packed_pooling_elements() - incremental_pooling_tile())) * sizeof(void*),
262         (output_stride() - channels()) * sizeof(float));
263 
264       // Verify results.
265       for (size_t x = 0; x < output_pixels(); x++) {
266         for (size_t c = 0; c < channels(); c++) {
267           ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c])
268             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
269             << ", pooling elements = " << pooling_elements() << ", step = " << step()
270             << ", input offset = " << input_offset();
271           ASSERT_EQ(
272               indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()],
273               indirect_input[x * step() + index[x * channels() + c]][c + input_offset()])
274             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
275             << ", pooling elements = " << pooling_elements() << ", step = " << step()
276             << ", input offset = " << input_offset();
277           ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c])
278             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
279             << ", pooling elements = " << pooling_elements() << ", step = " << step()
280             << ", input offset = " << input_offset();
281         }
282       }
283     }
284   }
285 
286  private:
287   size_t output_pixels_{1};
288   size_t pooling_elements_{1};
289   size_t channels_{1};
290   size_t input_offset_{0};
291   size_t step_{1};
292   size_t primary_pooling_tile_{1};
293   size_t incremental_pooling_tile_{1};
294   size_t output_stride_{0};
295   size_t iterations_{3};
296 };
297