1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cstddef> 13 #include <cstdlib> 14 #include <random> 15 #include <vector> 16 17 #include <xnnpack.h> 18 #include <xnnpack/aligned-allocator.h> 19 #include <xnnpack/microfnptr.h> 20 #include <xnnpack/microparams-init.h> 21 22 23 class ArgMaxPoolMicrokernelTester { 24 public: 25 enum class Variant { 26 Native, 27 Scalar, 28 }; 29 output_pixels(size_t output_pixels)30 inline ArgMaxPoolMicrokernelTester& output_pixels(size_t output_pixels) { 31 assert(output_pixels != 0); 32 this->output_pixels_ = output_pixels; 33 return *this; 34 } 35 output_pixels()36 inline size_t output_pixels() const { 37 return this->output_pixels_; 38 } 39 step(size_t step)40 inline ArgMaxPoolMicrokernelTester& step(size_t step) { 41 assert(step != 0); 42 this->step_ = step; 43 return *this; 44 } 45 step()46 inline size_t step() const { 47 return this->step_; 48 } 49 input_offset(size_t input_offset)50 inline ArgMaxPoolMicrokernelTester& input_offset(size_t input_offset) { 51 assert(input_offset != 0); 52 this->input_offset_ = input_offset; 53 return *this; 54 } 55 input_offset()56 inline size_t input_offset() const { 57 return this->input_offset_; 58 } 59 pooling_elements(size_t pooling_elements)60 inline ArgMaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) { 61 assert(pooling_elements != 0); 62 this->pooling_elements_ = pooling_elements; 63 return *this; 64 } 65 pooling_elements()66 inline size_t pooling_elements() const { 67 return this->pooling_elements_; 68 } 69 packed_pooling_elements()70 inline size_t packed_pooling_elements() const { 71 if (pooling_elements() <= primary_pooling_tile()) { 72 return primary_pooling_tile(); 73 } else { 74 return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile(); 75 } 76 } 77 pooling_tile(size_t primary_tile)78 inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile) { 79 assert(primary_tile != 0); 80 this->primary_pooling_tile_ = primary_tile; 81 this->incremental_pooling_tile_ = 0; 82 return *this; 83 } 84 pooling_tile(size_t primary_tile,size_t incremental_tile)85 inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) { 86 assert(primary_tile != 0); 87 this->primary_pooling_tile_ = primary_tile; 88 this->incremental_pooling_tile_ = incremental_tile; 89 return *this; 90 } 91 primary_pooling_tile(size_t primary_pooling_tile)92 inline ArgMaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) { 93 assert(primary_pooling_tile != 0); 94 this->primary_pooling_tile_ = primary_pooling_tile; 95 return *this; 96 } 97 primary_pooling_tile()98 inline size_t primary_pooling_tile() const { 99 return this->primary_pooling_tile_; 100 } 101 incremental_pooling_tile(size_t incremental_pooling_tile)102 inline ArgMaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) { 103 assert(incremental_pooling_tile != 0); 104 this->incremental_pooling_tile_ = incremental_pooling_tile; 105 return *this; 106 } 107 incremental_pooling_tile()108 inline size_t incremental_pooling_tile() const { 109 return this->incremental_pooling_tile_; 110 } 111 channels(size_t channels)112 inline ArgMaxPoolMicrokernelTester& channels(size_t channels) { 113 assert(channels != 0); 114 this->channels_ = channels; 115 return *this; 116 } 117 channels()118 inline size_t channels() const { 119 return this->channels_; 120 } 121 output_stride(size_t output_stride)122 inline ArgMaxPoolMicrokernelTester& output_stride(size_t output_stride) { 123 assert(output_stride != 0); 124 this->output_stride_ = output_stride; 125 return *this; 126 } 127 output_stride()128 inline size_t output_stride() const { 129 if (this->output_stride_ == 0) { 130 return channels(); 131 } else { 132 assert(this->output_stride_ >= channels()); 133 return this->output_stride_; 134 } 135 } 136 iterations(size_t iterations)137 inline ArgMaxPoolMicrokernelTester& iterations(size_t iterations) { 138 this->iterations_ = iterations; 139 return *this; 140 } 141 iterations()142 inline size_t iterations() const { 143 return this->iterations_; 144 } 145 146 void Test(xnn_f32_argmaxpool_unipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const { 147 std::random_device random_device; 148 auto rng = std::mt19937(random_device()); 149 std::uniform_real_distribution<float> f32dist; 150 151 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 152 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 153 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 154 std::vector<float> output((output_pixels() - 1) * output_stride() + channels()); 155 std::vector<uint32_t> index(output_pixels() * channels()); 156 std::vector<float> output_ref(output_pixels() * channels()); 157 std::vector<uint32_t> index_ref(output_pixels() * channels()); 158 for (size_t iteration = 0; iteration < iterations(); iteration++) { 159 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 160 std::fill(output.begin(), output.end(), nanf("")); 161 162 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 163 indirect_input[i] = input.data() + i * channels() - input_offset(); 164 } 165 std::shuffle(indirect_input.begin(), 166 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 167 168 // Compute reference results, without clamping. 169 for (size_t x = 0; x < output_pixels(); x++) { 170 for (size_t c = 0; c < channels(); c++) { 171 float max_value = indirect_input[x * step()][c + input_offset()]; 172 uint32_t max_index = 0; 173 for (size_t p = 0; p < pooling_elements(); p++) { 174 const float value = indirect_input[x * step() + p][c + input_offset()]; 175 if (value > max_value) { 176 max_value = value; 177 max_index = p; 178 } 179 } 180 output_ref[x * channels() + c] = max_value; 181 index_ref[x * channels() + c] = max_index; 182 } 183 } 184 185 // Call optimized micro-kernel. 186 argmaxpool(output_pixels(), pooling_elements(), channels(), 187 indirect_input.data(), input_offset() * sizeof(float), output.data(), index.data(), 188 step() * sizeof(void*), 189 (output_stride() - channels()) * sizeof(float)); 190 191 // Verify results. 192 for (size_t x = 0; x < output_pixels(); x++) { 193 for (size_t c = 0; c < channels(); c++) { 194 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c]) 195 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 196 << ", pooling elements = " << pooling_elements() << ", step = " << step() 197 << ", input offset = " << input_offset(); 198 ASSERT_EQ( 199 indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()], 200 indirect_input[x * step() + index[x * channels() + c]][c + input_offset()]) 201 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 202 << ", pooling elements = " << pooling_elements() << ", step = " << step() 203 << ", input offset = " << input_offset(); 204 ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c]) 205 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 206 << ", pooling elements = " << pooling_elements() << ", step = " << step() 207 << ", input offset = " << input_offset(); 208 } 209 } 210 } 211 } 212 213 void Test(xnn_f32_argmaxpool_multipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const { 214 std::random_device random_device; 215 auto rng = std::mt19937(random_device()); 216 std::uniform_real_distribution<float> f32dist; 217 218 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 219 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 220 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 221 std::vector<float> output((output_pixels() - 1) * output_stride() + channels()); 222 std::vector<uint32_t> index(output_pixels() * channels()); 223 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> index_buffer( 224 channels() + XNN_EXTRA_BYTES / sizeof(uint32_t)); 225 std::vector<float, AlignedAllocator<float, 64>> output_buffer( 226 channels() + XNN_EXTRA_BYTES / sizeof(float)); 227 std::vector<float> output_ref(output_pixels() * channels()); 228 std::vector<uint32_t> index_ref(output_pixels() * channels()); 229 for (size_t iteration = 0; iteration < iterations(); iteration++) { 230 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 231 std::fill(output.begin(), output.end(), nanf("")); 232 233 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 234 indirect_input[i] = input.data() + i * channels() - input_offset(); 235 } 236 std::shuffle(indirect_input.begin(), 237 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 238 239 // Compute reference results, without clamping. 240 for (size_t x = 0; x < output_pixels(); x++) { 241 for (size_t c = 0; c < channels(); c++) { 242 float max_value = indirect_input[x * step()][c + input_offset()]; 243 uint32_t max_index = 0; 244 for (size_t p = 0; p < pooling_elements(); p++) { 245 const float value = indirect_input[x * step() + p][c + input_offset()]; 246 if (value > max_value) { 247 max_value = value; 248 max_index = p; 249 } 250 } 251 output_ref[x * channels() + c] = max_value; 252 index_ref[x * channels() + c] = max_index; 253 } 254 } 255 256 // Call optimized micro-kernel. 257 argmaxpool(output_pixels(), pooling_elements(), channels(), 258 indirect_input.data(), input_offset() * sizeof(float), 259 output_buffer.data(), index_buffer.data(), 260 output.data(), index.data(), 261 (step() - (packed_pooling_elements() - incremental_pooling_tile())) * sizeof(void*), 262 (output_stride() - channels()) * sizeof(float)); 263 264 // Verify results. 265 for (size_t x = 0; x < output_pixels(); x++) { 266 for (size_t c = 0; c < channels(); c++) { 267 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c]) 268 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 269 << ", pooling elements = " << pooling_elements() << ", step = " << step() 270 << ", input offset = " << input_offset(); 271 ASSERT_EQ( 272 indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()], 273 indirect_input[x * step() + index[x * channels() + c]][c + input_offset()]) 274 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 275 << ", pooling elements = " << pooling_elements() << ", step = " << step() 276 << ", input offset = " << input_offset(); 277 ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c]) 278 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 279 << ", pooling elements = " << pooling_elements() << ", step = " << step() 280 << ", input offset = " << input_offset(); 281 } 282 } 283 } 284 } 285 286 private: 287 size_t output_pixels_{1}; 288 size_t pooling_elements_{1}; 289 size_t channels_{1}; 290 size_t input_offset_{0}; 291 size_t step_{1}; 292 size_t primary_pooling_tile_{1}; 293 size_t incremental_pooling_tile_{1}; 294 size_t output_stride_{0}; 295 size_t iterations_{3}; 296 }; 297