1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cstddef> 16 #include <cstdlib> 17 #include <limits> 18 #include <random> 19 #include <vector> 20 21 #include <fp16.h> 22 23 #include <xnnpack.h> 24 #include <xnnpack/microparams-init.h> 25 #include <xnnpack/microfnptr.h> 26 27 28 class MaxPoolMicrokernelTester { 29 public: output_pixels(size_t output_pixels)30 inline MaxPoolMicrokernelTester& output_pixels(size_t output_pixels) { 31 assert(output_pixels != 0); 32 this->output_pixels_ = output_pixels; 33 return *this; 34 } 35 output_pixels()36 inline size_t output_pixels() const { 37 return this->output_pixels_; 38 } 39 step(size_t step)40 inline MaxPoolMicrokernelTester& step(size_t step) { 41 assert(step != 0); 42 this->step_ = step; 43 return *this; 44 } 45 step()46 inline size_t step() const { 47 return this->step_; 48 } 49 input_offset(size_t input_offset)50 inline MaxPoolMicrokernelTester& input_offset(size_t input_offset) { 51 assert(input_offset != 0); 52 this->input_offset_ = input_offset; 53 return *this; 54 } 55 input_offset()56 inline size_t input_offset() const { 57 return this->input_offset_; 58 } 59 pooling_elements(size_t pooling_elements)60 inline MaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) { 61 assert(pooling_elements != 0); 62 this->pooling_elements_ = pooling_elements; 63 return *this; 64 } 65 pooling_elements()66 inline size_t pooling_elements() const { 67 return this->pooling_elements_; 68 } 69 packed_pooling_elements()70 inline size_t packed_pooling_elements() const { 71 if (pooling_elements() <= primary_pooling_tile()) { 72 return primary_pooling_tile(); 73 } else { 74 return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile(); 75 } 76 } 77 pooling_tile(size_t primary_tile,size_t incremental_tile)78 inline MaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) { 79 assert(primary_tile != 0); 80 this->primary_pooling_tile_ = primary_tile; 81 this->incremental_pooling_tile_ = incremental_tile; 82 return *this; 83 } 84 primary_pooling_tile(size_t primary_pooling_tile)85 inline MaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) { 86 assert(primary_pooling_tile != 0); 87 this->primary_pooling_tile_ = primary_pooling_tile; 88 return *this; 89 } 90 primary_pooling_tile()91 inline size_t primary_pooling_tile() const { 92 return this->primary_pooling_tile_; 93 } 94 incremental_pooling_tile(size_t incremental_pooling_tile)95 inline MaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) { 96 assert(incremental_pooling_tile != 0); 97 this->incremental_pooling_tile_ = incremental_pooling_tile; 98 return *this; 99 } 100 incremental_pooling_tile()101 inline size_t incremental_pooling_tile() const { 102 return this->incremental_pooling_tile_; 103 } 104 channels(size_t channels)105 inline MaxPoolMicrokernelTester& channels(size_t channels) { 106 assert(channels != 0); 107 this->channels_ = channels; 108 return *this; 109 } 110 channels()111 inline size_t channels() const { 112 return this->channels_; 113 } 114 output_stride(size_t output_stride)115 inline MaxPoolMicrokernelTester& output_stride(size_t output_stride) { 116 assert(output_stride != 0); 117 this->output_stride_ = output_stride; 118 return *this; 119 } 120 output_stride()121 inline size_t output_stride() const { 122 if (this->output_stride_ == 0) { 123 return channels(); 124 } else { 125 assert(this->output_stride_ >= channels()); 126 return this->output_stride_; 127 } 128 } 129 qmin(int16_t qmin)130 inline MaxPoolMicrokernelTester& qmin(int16_t qmin) { 131 this->qmin_ = qmin; 132 return *this; 133 } 134 qmin()135 inline int16_t qmin() const { 136 return this->qmin_; 137 } 138 qmax(int16_t qmax)139 inline MaxPoolMicrokernelTester& qmax(int16_t qmax) { 140 this->qmax_ = qmax; 141 return *this; 142 } 143 qmax()144 inline int16_t qmax() const { 145 return this->qmax_; 146 } 147 iterations(size_t iterations)148 inline MaxPoolMicrokernelTester& iterations(size_t iterations) { 149 this->iterations_ = iterations; 150 return *this; 151 } 152 iterations()153 inline size_t iterations() const { 154 return this->iterations_; 155 } 156 Test(xnn_s8_maxpool_ukernel_function maxpool,xnn_init_s8_minmax_params_fn init_params)157 void Test(xnn_s8_maxpool_ukernel_function maxpool, xnn_init_s8_minmax_params_fn init_params) const { 158 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 159 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 160 ASSERT_LT(qmin(), qmax()); 161 162 std::random_device random_device; 163 auto rng = std::mt19937(random_device()); 164 std::uniform_int_distribution<int32_t> i8dist( 165 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 166 167 std::vector<const int8_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 168 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 169 indirect_input.size() * channels()); 170 std::vector<int8_t> output(XNN_EXTRA_BYTES / sizeof(int8_t) + 171 (output_pixels() - 1) * output_stride() + channels()); 172 std::vector<int8_t> output_ref(output_pixels() * channels()); 173 for (size_t iteration = 0; iteration < iterations(); iteration++) { 174 do { 175 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 176 } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend())); 177 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 178 179 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 180 indirect_input[i] = input.data() + i * channels() - input_offset(); 181 } 182 std::shuffle(indirect_input.begin(), 183 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 184 185 // Prepare parameters. 186 xnn_s8_minmax_params params; 187 init_params(¶ms, static_cast<int8_t>(qmin()), static_cast<int8_t>(qmax())); 188 189 // Compute reference results. 190 for (size_t x = 0; x < output_pixels(); x++) { 191 for (size_t c = 0; c < channels(); c++) { 192 int8_t max_value = std::numeric_limits<int8_t>::min(); 193 for (size_t p = 0; p < pooling_elements(); p++) { 194 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]); 195 } 196 max_value = std::min(max_value, static_cast<int8_t>(qmax())); 197 max_value = std::max(max_value, static_cast<int8_t>(qmin())); 198 output_ref[x * channels() + c] = max_value; 199 } 200 } 201 202 // Call optimized micro-kernel. 203 maxpool(output_pixels(), pooling_elements(), channels(), 204 indirect_input.data(), input_offset() * sizeof(int8_t), output.data(), 205 (step() - packed_pooling_elements()) * sizeof(void*), 206 (output_stride() - channels()) * sizeof(int8_t), 207 ¶ms); 208 209 // Verify results. 210 for (size_t x = 0; x < output_pixels(); x++) { 211 for (size_t c = 0; c < channels(); c++) { 212 ASSERT_GE(int16_t(output[x * output_stride() + c]), qmin()) 213 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 214 << ", pooling elements = " << pooling_elements() << ", step = " << step() 215 << ", input offset = " << input_offset(); 216 ASSERT_LE(int16_t(output[x * output_stride() + c]), qmax()) 217 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 218 << ", pooling elements = " << pooling_elements() << ", step = " << step() 219 << ", input offset = " << input_offset(); 220 ASSERT_EQ(int32_t(output_ref[x * channels() + c]), int32_t(output[x * output_stride() + c])) 221 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 222 << ", pooling elements = " << pooling_elements() << ", step = " << step() 223 << ", input offset = " << input_offset(); 224 } 225 } 226 } 227 } 228 Test(xnn_u8_maxpool_ukernel_function maxpool,xnn_init_u8_minmax_params_fn init_params)229 void Test(xnn_u8_maxpool_ukernel_function maxpool, xnn_init_u8_minmax_params_fn init_params) const { 230 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 231 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 232 ASSERT_LT(qmin(), qmax()); 233 234 std::random_device random_device; 235 auto rng = std::mt19937(random_device()); 236 std::uniform_int_distribution<int32_t> u8dist( 237 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 238 239 std::vector<const uint8_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 240 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 241 indirect_input.size() * channels()); 242 std::vector<uint8_t> output(XNN_EXTRA_BYTES / sizeof(uint8_t) + 243 (output_pixels() - 1) * output_stride() + channels()); 244 std::vector<uint8_t> output_ref(output_pixels() * channels()); 245 for (size_t iteration = 0; iteration < iterations(); iteration++) { 246 do { 247 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 248 } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend())); 249 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 250 251 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 252 indirect_input[i] = input.data() + i * channels() - input_offset(); 253 } 254 std::shuffle(indirect_input.begin(), 255 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 256 257 // Prepare parameters. 258 xnn_u8_minmax_params params; 259 init_params(¶ms, static_cast<uint8_t>(qmin()), static_cast<uint8_t>(qmax())); 260 261 // Compute reference results. 262 for (size_t x = 0; x < output_pixels(); x++) { 263 for (size_t c = 0; c < channels(); c++) { 264 uint8_t max_value = 0; 265 for (size_t p = 0; p < pooling_elements(); p++) { 266 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]); 267 } 268 max_value = std::min(max_value, static_cast<uint8_t>(qmax())); 269 max_value = std::max(max_value, static_cast<uint8_t>(qmin())); 270 output_ref[x * channels() + c] = max_value; 271 } 272 } 273 274 // Call optimized micro-kernel. 275 maxpool(output_pixels(), pooling_elements(), channels(), 276 indirect_input.data(), input_offset() * sizeof(uint8_t), output.data(), 277 (step() - packed_pooling_elements()) * sizeof(void*), 278 (output_stride() - channels()) * sizeof(uint8_t), 279 ¶ms); 280 281 // Verify results. 282 for (size_t x = 0; x < output_pixels(); x++) { 283 for (size_t c = 0; c < channels(); c++) { 284 ASSERT_GE(int16_t(output[x * output_stride() + c]), qmin()) 285 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 286 << ", pooling elements = " << pooling_elements() << ", step = " << step() 287 << ", input offset = " << input_offset(); 288 ASSERT_LE(int16_t(output[x * output_stride() + c]), qmax()) 289 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 290 << ", pooling elements = " << pooling_elements() << ", step = " << step() 291 << ", input offset = " << input_offset(); 292 ASSERT_EQ(int32_t(output_ref[x * channels() + c]), int32_t(output[x * output_stride() + c])) 293 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 294 << ", pooling elements = " << pooling_elements() << ", step = " << step() 295 << ", input offset = " << input_offset(); 296 } 297 } 298 } 299 } 300 Test(xnn_f16_maxpool_ukernel_function maxpool,xnn_init_f16_minmax_params_fn init_params)301 void Test(xnn_f16_maxpool_ukernel_function maxpool, xnn_init_f16_minmax_params_fn init_params) const { 302 ASSERT_LT(qmin(), qmax()); 303 304 std::random_device random_device; 305 auto rng = std::mt19937(random_device()); 306 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 307 308 std::vector<const uint16_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 309 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + 310 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 311 std::vector<uint16_t> output(XNN_EXTRA_BYTES / sizeof(uint16_t) + 312 (output_pixels() - 1) * output_stride() + channels()); 313 std::vector<float> output_ref(output_pixels() * channels()); 314 for (size_t iteration = 0; iteration < iterations(); iteration++) { 315 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 316 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 317 318 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 319 indirect_input[i] = input.data() + i * channels() - input_offset(); 320 } 321 std::shuffle(indirect_input.begin(), 322 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 323 324 // Compute reference results, without clamping. 325 for (size_t x = 0; x < output_pixels(); x++) { 326 for (size_t c = 0; c < channels(); c++) { 327 float max_value = -std::numeric_limits<float>::infinity(); 328 for (size_t p = 0; p < pooling_elements(); p++) { 329 max_value = std::max(max_value, fp16_ieee_to_fp32_value(indirect_input[x * step() + p][c + input_offset()])); 330 } 331 output_ref[x * channels() + c] = max_value; 332 } 333 } 334 335 // Compute clamping parameters. 336 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 337 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 338 const float accumulated_range = accumulated_max - accumulated_min; 339 float output_min = accumulated_min + accumulated_range * 340 (static_cast<float>(qmin() - std::numeric_limits<int16_t>::min()) / 341 static_cast<float>(std::numeric_limits<int16_t>::max() - std::numeric_limits<int16_t>::min())); 342 if (qmin() == std::numeric_limits<int16_t>::min()) { 343 output_min = -std::numeric_limits<float>::infinity(); 344 } 345 float output_max = accumulated_max - accumulated_range * 346 (static_cast<float>(std::numeric_limits<int16_t>::max() - qmax()) / 347 static_cast<float>(std::numeric_limits<int16_t>::max() - std::numeric_limits<int16_t>::min())); 348 if (qmax() == std::numeric_limits<int16_t>::max()) { 349 output_max = +std::numeric_limits<float>::infinity(); 350 } 351 output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)); 352 output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max)); 353 354 // Prepare parameters. 355 xnn_f16_minmax_params params; 356 init_params(¶ms, fp16_ieee_from_fp32_value(output_min), fp16_ieee_from_fp32_value(output_max)); 357 358 // Clamp reference results. 359 for (float& output_value : output_ref) { 360 output_value = std::max(std::min(output_value, output_max), output_min); 361 } 362 363 // Call optimized micro-kernel. 364 maxpool(output_pixels(), pooling_elements(), channels(), 365 reinterpret_cast<const void**>(indirect_input.data()), input_offset() * sizeof(uint16_t), output.data(), 366 (step() - packed_pooling_elements()) * sizeof(void*), 367 (output_stride() - channels()) * sizeof(uint16_t), 368 ¶ms); 369 370 // Verify results. 371 for (size_t x = 0; x < output_pixels(); x++) { 372 for (size_t c = 0; c < channels(); c++) { 373 ASSERT_GE(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_min) 374 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 375 << ", pooling elements = " << pooling_elements() << ", step = " << step() 376 << ", input offset = " << input_offset(); 377 ASSERT_LE(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_max) 378 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 379 << ", pooling elements = " << pooling_elements() << ", step = " << step() 380 << ", input offset = " << input_offset(); 381 ASSERT_EQ(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_ref[x * channels() + c]) 382 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 383 << ", pooling elements = " << pooling_elements() << ", step = " << step() 384 << ", input offset = " << input_offset(); 385 } 386 } 387 } 388 } 389 Test(xnn_f32_maxpool_ukernel_function maxpool,xnn_init_f32_minmax_params_fn init_params)390 void Test(xnn_f32_maxpool_ukernel_function maxpool, xnn_init_f32_minmax_params_fn init_params) const { 391 ASSERT_LT(qmin(), qmax()); 392 393 std::random_device random_device; 394 auto rng = std::mt19937(random_device()); 395 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 396 397 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 398 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 399 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 400 std::vector<float> output(XNN_EXTRA_BYTES / sizeof(float) + 401 (output_pixels() - 1) * output_stride() + channels()); 402 std::vector<float> output_ref(output_pixels() * channels()); 403 for (size_t iteration = 0; iteration < iterations(); iteration++) { 404 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 405 std::fill(output.begin(), output.end(), nanf("")); 406 407 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 408 indirect_input[i] = input.data() + i * channels() - input_offset(); 409 } 410 std::shuffle(indirect_input.begin(), 411 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 412 413 // Compute reference results, without clamping. 414 for (size_t x = 0; x < output_pixels(); x++) { 415 for (size_t c = 0; c < channels(); c++) { 416 float max_value = -std::numeric_limits<float>::infinity(); 417 for (size_t p = 0; p < pooling_elements(); p++) { 418 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]); 419 } 420 output_ref[x * channels() + c] = max_value; 421 } 422 } 423 424 // Compute clamping parameters. 425 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 426 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 427 const float accumulated_range = accumulated_max - accumulated_min; 428 float output_min = accumulated_min + accumulated_range * 429 (static_cast<float>(qmin() - std::numeric_limits<int16_t>::min()) / 430 static_cast<float>(std::numeric_limits<int16_t>::max() - std::numeric_limits<int16_t>::min())); 431 if (qmin() == std::numeric_limits<int16_t>::min()) { 432 output_min = -std::numeric_limits<float>::infinity(); 433 } 434 float output_max = accumulated_max - accumulated_range * 435 (static_cast<float>(std::numeric_limits<int16_t>::max() - qmax()) / 436 static_cast<float>(std::numeric_limits<int16_t>::max() - std::numeric_limits<int16_t>::min())); 437 if (qmax() == std::numeric_limits<int16_t>::max()) { 438 output_max = +std::numeric_limits<float>::infinity(); 439 } 440 441 // Prepare parameters. 442 xnn_f32_minmax_params params; 443 init_params(¶ms, output_min, output_max); 444 445 // Clamp reference results. 446 for (float& output_value : output_ref) { 447 output_value = std::max(std::min(output_value, output_max), output_min); 448 } 449 450 // Call optimized micro-kernel. 451 maxpool(output_pixels(), pooling_elements(), channels(), 452 indirect_input.data(), input_offset() * sizeof(float), output.data(), 453 (step() - packed_pooling_elements()) * sizeof(void*), 454 (output_stride() - channels()) * sizeof(float), 455 ¶ms); 456 457 // Verify results. 458 for (size_t x = 0; x < output_pixels(); x++) { 459 for (size_t c = 0; c < channels(); c++) { 460 ASSERT_GE(output[x * output_stride() + c], output_min) 461 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 462 << ", pooling elements = " << pooling_elements() << ", step = " << step() 463 << ", input offset = " << input_offset(); 464 ASSERT_LE(output[x * output_stride() + c], output_max) 465 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 466 << ", pooling elements = " << pooling_elements() << ", step = " << step() 467 << ", input offset = " << input_offset(); 468 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c]) 469 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 470 << ", pooling elements = " << pooling_elements() << ", step = " << step() 471 << ", input offset = " << input_offset(); 472 } 473 } 474 } 475 } 476 477 private: 478 size_t output_pixels_{1}; 479 size_t pooling_elements_{1}; 480 size_t channels_{1}; 481 size_t input_offset_{0}; 482 size_t step_{1}; 483 size_t primary_pooling_tile_{1}; 484 size_t incremental_pooling_tile_{1}; 485 size_t output_stride_{0}; 486 int16_t qmin_{std::numeric_limits<int16_t>::min()}; 487 int16_t qmax_{std::numeric_limits<int16_t>::max()}; 488 size_t iterations_{3}; 489 }; 490