1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <fp16.h> 14 15 #include <algorithm> 16 #include <cmath> 17 #include <cassert> 18 #include <cstddef> 19 #include <cstdlib> 20 #include <limits> 21 #include <random> 22 #include <vector> 23 24 #include <xnnpack.h> 25 26 27 class AveragePoolingOperatorTester { 28 public: padding_tf_same(bool padding_same)29 inline AveragePoolingOperatorTester& padding_tf_same(bool padding_same) { 30 if (padding_same) { 31 assert(padding_top() == 0); 32 assert(padding_left() == 0); 33 assert(padding_bottom() == 0); 34 assert(padding_right() == 0); 35 } 36 this->padding_tf_same_ = padding_same; 37 return *this; 38 } 39 padding_tf_same()40 inline bool padding_tf_same() const { 41 return this->padding_tf_same_; 42 } 43 padding(uint32_t padding)44 inline AveragePoolingOperatorTester& padding(uint32_t padding) { 45 assert(!padding_tf_same()); 46 this->padding_top_ = padding; 47 this->padding_right_ = padding; 48 this->padding_bottom_ = padding; 49 this->padding_left_ = padding; 50 return *this; 51 } 52 padding(uint32_t padding_height,uint32_t padding_width)53 inline AveragePoolingOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) { 54 assert(!padding_tf_same()); 55 this->padding_top_ = padding_height; 56 this->padding_right_ = padding_width; 57 this->padding_bottom_ = padding_height; 58 this->padding_left_ = padding_width; 59 return *this; 60 } 61 padding_height(uint32_t padding_height)62 inline AveragePoolingOperatorTester& padding_height(uint32_t padding_height) { 63 assert(!padding_tf_same()); 64 this->padding_top_ = padding_height; 65 this->padding_bottom_ = padding_height; 66 return *this; 67 } 68 padding_width(uint32_t padding_width)69 inline AveragePoolingOperatorTester& padding_width(uint32_t padding_width) { 70 assert(!padding_tf_same()); 71 this->padding_right_ = padding_width; 72 this->padding_left_ = padding_width; 73 return *this; 74 } 75 padding_top(uint32_t padding_top)76 inline AveragePoolingOperatorTester& padding_top(uint32_t padding_top) { 77 assert(!padding_tf_same()); 78 this->padding_top_ = padding_top; 79 return *this; 80 } 81 padding_top()82 inline uint32_t padding_top() const { 83 if (padding_tf_same()) { 84 const uint32_t total_padding_height = 85 (output_height() - 1) * stride_height() + pooling_height() - input_height(); 86 return total_padding_height / 2; 87 } else { 88 return this->padding_top_; 89 } 90 } 91 padding_left(uint32_t padding_left)92 inline AveragePoolingOperatorTester& padding_left(uint32_t padding_left) { 93 assert(!padding_tf_same()); 94 this->padding_left_ = padding_left; 95 return *this; 96 } 97 padding_left()98 inline uint32_t padding_left() const { 99 if (padding_tf_same()) { 100 const uint32_t total_padding_width = 101 (output_width() - 1) * stride_width() + pooling_width() - input_width(); 102 return total_padding_width / 2; 103 } else { 104 return this->padding_left_; 105 } 106 } 107 padding_bottom(uint32_t padding_bottom)108 inline AveragePoolingOperatorTester& padding_bottom(uint32_t padding_bottom) { 109 assert(!padding_tf_same()); 110 this->padding_bottom_ = padding_bottom; 111 return *this; 112 } 113 padding_bottom()114 inline uint32_t padding_bottom() const { 115 if (padding_tf_same()) { 116 const uint32_t total_padding_height = 117 (output_height() - 1) * stride_height() + pooling_height() - input_height(); 118 return total_padding_height - total_padding_height / 2; 119 } else { 120 return this->padding_bottom_; 121 } 122 } 123 padding_right(uint32_t padding_right)124 inline AveragePoolingOperatorTester& padding_right(uint32_t padding_right) { 125 assert(!padding_tf_same()); 126 this->padding_right_ = padding_right; 127 return *this; 128 } 129 padding_right()130 inline uint32_t padding_right() const { 131 if (padding_tf_same()) { 132 const uint32_t total_padding_width = 133 (output_width() - 1) * stride_width() + pooling_width() - input_width(); 134 return total_padding_width - total_padding_width / 2; 135 } else { 136 return this->padding_right_; 137 } 138 } 139 input_size(size_t input_height,size_t input_width)140 inline AveragePoolingOperatorTester& input_size(size_t input_height, size_t input_width) { 141 assert(input_height >= 1); 142 assert(input_width >= 1); 143 this->input_height_ = input_height; 144 this->input_width_ = input_width; 145 return *this; 146 } 147 input_height(size_t input_height)148 inline AveragePoolingOperatorTester& input_height(size_t input_height) { 149 assert(input_height >= 1); 150 this->input_height_ = input_height; 151 return *this; 152 } 153 input_height()154 inline size_t input_height() const { 155 return this->input_height_; 156 } 157 input_width(size_t input_width)158 inline AveragePoolingOperatorTester& input_width(size_t input_width) { 159 assert(input_width >= 1); 160 this->input_width_ = input_width; 161 return *this; 162 } 163 input_width()164 inline size_t input_width() const { 165 return this->input_width_; 166 } 167 channels(size_t channels)168 inline AveragePoolingOperatorTester& channels(size_t channels) { 169 assert(channels != 0); 170 this->channels_ = channels; 171 return *this; 172 } 173 channels()174 inline size_t channels() const { 175 return this->channels_; 176 } 177 batch_size(size_t batch_size)178 inline AveragePoolingOperatorTester& batch_size(size_t batch_size) { 179 assert(batch_size != 0); 180 this->batch_size_ = batch_size; 181 return *this; 182 } 183 batch_size()184 inline size_t batch_size() const { 185 return this->batch_size_; 186 } 187 pooling_size(uint32_t pooling_size)188 inline AveragePoolingOperatorTester& pooling_size(uint32_t pooling_size) { 189 assert(pooling_size >= 1); 190 this->pooling_height_ = pooling_size; 191 this->pooling_width_ = pooling_size; 192 return *this; 193 } 194 pooling_size(uint32_t pooling_height,uint32_t pooling_width)195 inline AveragePoolingOperatorTester& pooling_size(uint32_t pooling_height, uint32_t pooling_width) { 196 assert(pooling_height >= 1); 197 assert(pooling_width >= 1); 198 this->pooling_height_ = pooling_height; 199 this->pooling_width_ = pooling_width; 200 return *this; 201 } 202 pooling_height(uint32_t pooling_height)203 inline AveragePoolingOperatorTester& pooling_height(uint32_t pooling_height) { 204 assert(pooling_height >= 1); 205 this->pooling_height_ = pooling_height; 206 return *this; 207 } 208 pooling_height()209 inline uint32_t pooling_height() const { 210 return this->pooling_height_; 211 } 212 pooling_width(uint32_t pooling_width)213 inline AveragePoolingOperatorTester& pooling_width(uint32_t pooling_width) { 214 assert(pooling_width >= 1); 215 this->pooling_width_ = pooling_width; 216 return *this; 217 } 218 pooling_width()219 inline uint32_t pooling_width() const { 220 return this->pooling_width_; 221 } 222 stride(uint32_t stride)223 inline AveragePoolingOperatorTester& stride(uint32_t stride) { 224 assert(stride >= 1); 225 this->stride_height_ = stride; 226 this->stride_width_ = stride; 227 return *this; 228 } 229 stride(uint32_t stride_height,uint32_t stride_width)230 inline AveragePoolingOperatorTester& stride(uint32_t stride_height, uint32_t stride_width) { 231 assert(stride_height >= 1); 232 assert(stride_width >= 1); 233 this->stride_height_ = stride_height; 234 this->stride_width_ = stride_width; 235 return *this; 236 } 237 stride_height(uint32_t stride_height)238 inline AveragePoolingOperatorTester& stride_height(uint32_t stride_height) { 239 assert(stride_height >= 1); 240 this->stride_height_ = stride_height; 241 return *this; 242 } 243 stride_height()244 inline uint32_t stride_height() const { 245 return this->stride_height_; 246 } 247 stride_width(uint32_t stride_width)248 inline AveragePoolingOperatorTester& stride_width(uint32_t stride_width) { 249 assert(stride_width >= 1); 250 this->stride_width_ = stride_width; 251 return *this; 252 } 253 stride_width()254 inline uint32_t stride_width() const { 255 return this->stride_width_; 256 } 257 output_height()258 inline size_t output_height() const { 259 if (padding_tf_same()) { 260 return (input_height() + stride_height() - 1) / stride_height(); 261 } else { 262 const size_t padded_input_height = padding_top() + input_height() + padding_bottom(); 263 if (padded_input_height <= pooling_height()) { 264 return 1; 265 } else { 266 return (padded_input_height - pooling_height()) / stride_height() + 1; 267 } 268 } 269 } 270 output_width()271 inline size_t output_width() const { 272 if (padding_tf_same()) { 273 return (input_width() + stride_width() - 1) / stride_width(); 274 } else { 275 const size_t padded_input_width = padding_left() + input_width() + padding_right(); 276 if (padded_input_width <= pooling_width()) { 277 return 1; 278 } else { 279 return (padded_input_width - pooling_width()) / stride_width() + 1; 280 } 281 } 282 } 283 input_pixel_stride(size_t input_pixel_stride)284 inline AveragePoolingOperatorTester& input_pixel_stride(size_t input_pixel_stride) { 285 assert(input_pixel_stride != 0); 286 this->input_pixel_stride_ = input_pixel_stride; 287 return *this; 288 } 289 input_pixel_stride()290 inline size_t input_pixel_stride() const { 291 if (this->input_pixel_stride_ == 0) { 292 return channels(); 293 } else { 294 assert(this->input_pixel_stride_ >= channels()); 295 return this->input_pixel_stride_; 296 } 297 } 298 output_pixel_stride(size_t output_pixel_stride)299 inline AveragePoolingOperatorTester& output_pixel_stride(size_t output_pixel_stride) { 300 assert(output_pixel_stride != 0); 301 this->output_pixel_stride_ = output_pixel_stride; 302 return *this; 303 } 304 output_pixel_stride()305 inline size_t output_pixel_stride() const { 306 if (this->output_pixel_stride_ == 0) { 307 return channels(); 308 } else { 309 assert(this->output_pixel_stride_ >= channels()); 310 return this->output_pixel_stride_; 311 } 312 } 313 next_input_size(uint32_t next_input_height,uint32_t next_input_width)314 inline AveragePoolingOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) { 315 assert(next_input_height >= 1); 316 assert(next_input_width >= 1); 317 this->next_input_height_ = next_input_height; 318 this->next_input_width_ = next_input_width; 319 return *this; 320 } 321 next_input_height(uint32_t next_input_height)322 inline AveragePoolingOperatorTester& next_input_height(uint32_t next_input_height) { 323 assert(next_input_height >= 1); 324 this->next_input_height_ = next_input_height; 325 return *this; 326 } 327 next_input_height()328 inline uint32_t next_input_height() const { 329 if (this->next_input_height_ == 0) { 330 return input_height(); 331 } else { 332 return this->next_input_height_; 333 } 334 } 335 next_input_width(uint32_t next_input_width)336 inline AveragePoolingOperatorTester& next_input_width(uint32_t next_input_width) { 337 assert(next_input_width >= 1); 338 this->next_input_width_ = next_input_width; 339 return *this; 340 } 341 next_input_width()342 inline uint32_t next_input_width() const { 343 if (this->next_input_width_ == 0) { 344 return input_width(); 345 } else { 346 return this->next_input_width_; 347 } 348 } 349 next_output_height()350 inline size_t next_output_height() const { 351 const size_t padded_next_input_height = padding_top() + next_input_height() + padding_bottom(); 352 if (padded_next_input_height <= pooling_height()) { 353 return 1; 354 } else { 355 return (padded_next_input_height - pooling_height()) / stride_height() + 1; 356 } 357 } 358 next_output_width()359 inline size_t next_output_width() const { 360 const size_t padded_next_input_width = padding_left() + next_input_width() + padding_right(); 361 if (padded_next_input_width <= pooling_width()) { 362 return 1; 363 } else { 364 return (padded_next_input_width - pooling_width()) / stride_width() + 1; 365 } 366 } 367 next_batch_size(size_t next_batch_size)368 inline AveragePoolingOperatorTester& next_batch_size(size_t next_batch_size) { 369 assert(next_batch_size >= 1); 370 this->next_batch_size_ = next_batch_size; 371 return *this; 372 } 373 next_batch_size()374 inline size_t next_batch_size() const { 375 if (this->next_batch_size_ == 0) { 376 return batch_size(); 377 } else { 378 return this->next_batch_size_; 379 } 380 } 381 input_scale(float input_scale)382 inline AveragePoolingOperatorTester& input_scale(float input_scale) { 383 assert(input_scale > 0.0f); 384 assert(std::isnormal(input_scale)); 385 this->input_scale_ = input_scale; 386 return *this; 387 } 388 input_scale()389 inline float input_scale() const { 390 return this->input_scale_; 391 } 392 input_zero_point(uint8_t input_zero_point)393 inline AveragePoolingOperatorTester& input_zero_point(uint8_t input_zero_point) { 394 this->input_zero_point_ = input_zero_point; 395 return *this; 396 } 397 input_zero_point()398 inline uint8_t input_zero_point() const { 399 return this->input_zero_point_; 400 } 401 output_scale(float output_scale)402 inline AveragePoolingOperatorTester& output_scale(float output_scale) { 403 assert(output_scale > 0.0f); 404 assert(std::isnormal(output_scale)); 405 this->output_scale_ = output_scale; 406 return *this; 407 } 408 output_scale()409 inline float output_scale() const { 410 return this->output_scale_; 411 } 412 output_zero_point(uint8_t output_zero_point)413 inline AveragePoolingOperatorTester& output_zero_point(uint8_t output_zero_point) { 414 this->output_zero_point_ = output_zero_point; 415 return *this; 416 } 417 output_zero_point()418 inline uint8_t output_zero_point() const { 419 return this->output_zero_point_; 420 } 421 qmin(uint8_t qmin)422 inline AveragePoolingOperatorTester& qmin(uint8_t qmin) { 423 this->qmin_ = qmin; 424 return *this; 425 } 426 qmin()427 inline uint8_t qmin() const { 428 return this->qmin_; 429 } 430 qmax(uint8_t qmax)431 inline AveragePoolingOperatorTester& qmax(uint8_t qmax) { 432 this->qmax_ = qmax; 433 return *this; 434 } 435 qmax()436 inline uint8_t qmax() const { 437 return this->qmax_; 438 } 439 iterations(size_t iterations)440 inline AveragePoolingOperatorTester& iterations(size_t iterations) { 441 this->iterations_ = iterations; 442 return *this; 443 } 444 iterations()445 inline size_t iterations() const { 446 return this->iterations_; 447 } 448 TestF16()449 void TestF16() const { 450 std::random_device random_device; 451 auto rng = std::mt19937(random_device()); 452 std::uniform_real_distribution<float> f32dist; 453 454 std::vector<uint16_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 455 std::vector<uint16_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels()); 456 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels()); 457 for (size_t iteration = 0; iteration < iterations(); iteration++) { 458 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 459 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 460 461 // Compute reference results, without clamping. 462 for (size_t i = 0; i < batch_size(); i++) { 463 for (size_t oy = 0; oy < output_height(); oy++) { 464 for (size_t ox = 0; ox < output_width(); ox++) { 465 for (size_t c = 0; c < channels(); c++) { 466 float acc = 0.0f; 467 int32_t n = 0; 468 for (size_t py = 0; py < pooling_height(); py++) { 469 const size_t iy = oy * stride_height() + py - padding_top(); 470 for (size_t px = 0; px < pooling_width(); px++) { 471 const size_t ix = ox * stride_width() + px - padding_left(); 472 if (ix < input_width() && iy < input_height()) { 473 acc += fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]); 474 n += 1; 475 } 476 } 477 } 478 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = acc / float(n); 479 } 480 } 481 } 482 } 483 484 // Compute clamping parameters. 485 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 486 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 487 const float accumulated_range = accumulated_max - accumulated_min; 488 float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin()); 489 float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 490 output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)); 491 output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max)); 492 if (accumulated_range == 0.0f) { 493 output_min = -std::numeric_limits<float>::infinity(); 494 output_max = +std::numeric_limits<float>::infinity(); 495 } 496 if (qmin() == std::numeric_limits<uint8_t>::min()) { 497 output_min = -std::numeric_limits<float>::infinity(); 498 } 499 if (qmax() == std::numeric_limits<uint8_t>::max()) { 500 output_max = +std::numeric_limits<float>::infinity(); 501 } 502 503 // Clamp reference results. 504 for (float& value : output_ref) { 505 value = std::max(std::min(value, output_max), output_min); 506 } 507 508 // Create, setup, run, and destroy Average Pooling operator. 509 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 510 xnn_operator_t average_pooling_op = nullptr; 511 512 const xnn_status status = xnn_create_average_pooling2d_nhwc_f16( 513 padding_top(), padding_right(), padding_bottom(), padding_left(), 514 pooling_height(), pooling_width(), 515 stride_height(), stride_width(), 516 channels(), input_pixel_stride(), output_pixel_stride(), 517 output_min, output_max, 518 0, &average_pooling_op); 519 if (status == xnn_status_unsupported_hardware) { 520 GTEST_SKIP(); 521 } 522 ASSERT_EQ(xnn_status_success, status); 523 ASSERT_NE(nullptr, average_pooling_op); 524 525 // Smart pointer to automatically delete average_pooling_op. 526 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_average_pooling_op(average_pooling_op, xnn_delete_operator); 527 528 ASSERT_EQ(xnn_status_success, 529 xnn_setup_average_pooling2d_nhwc_f16( 530 average_pooling_op, 531 batch_size(), input_height(), input_width(), 532 input.data(), output.data(), 533 nullptr /* thread pool */)); 534 535 ASSERT_EQ(xnn_status_success, 536 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 537 538 // Verify results. 539 for (size_t i = 0; i < batch_size(); i++) { 540 for (size_t y = 0; y < output_height(); y++) { 541 for (size_t x = 0; x < output_width(); x++) { 542 for (size_t c = 0; c < channels(); c++) { 543 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_max); 544 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_min); 545 ASSERT_NEAR( 546 fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), 547 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c], 548 std::max(1.0e-3f, std::abs(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) * 1.0e-2f)) << 549 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 550 } 551 } 552 } 553 } 554 } 555 } 556 TestF32()557 void TestF32() const { 558 std::random_device random_device; 559 auto rng = std::mt19937(random_device()); 560 std::uniform_real_distribution<float> f32dist; 561 562 std::vector<float> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 563 std::vector<float> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels()); 564 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels()); 565 for (size_t iteration = 0; iteration < iterations(); iteration++) { 566 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 567 std::fill(output.begin(), output.end(), std::nanf("")); 568 569 // Compute reference results, without clamping. 570 for (size_t i = 0; i < batch_size(); i++) { 571 for (size_t oy = 0; oy < output_height(); oy++) { 572 for (size_t ox = 0; ox < output_width(); ox++) { 573 for (size_t c = 0; c < channels(); c++) { 574 float acc = 0.0f; 575 int32_t n = 0; 576 for (size_t py = 0; py < pooling_height(); py++) { 577 const size_t iy = oy * stride_height() + py - padding_top(); 578 for (size_t px = 0; px < pooling_width(); px++) { 579 const size_t ix = ox * stride_width() + px - padding_left(); 580 if (ix < input_width() && iy < input_height()) { 581 acc += input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]; 582 n += 1; 583 } 584 } 585 } 586 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = acc / float(n); 587 } 588 } 589 } 590 } 591 592 // Compute clamping parameters. 593 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 594 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 595 const float accumulated_range = accumulated_max - accumulated_min; 596 const float output_min = accumulated_range == 0.0f ? 597 -std::numeric_limits<float>::infinity() : 598 accumulated_min + accumulated_range / 255.0f * float(qmin()); 599 const float output_max = accumulated_range == 0.0f ? 600 +std::numeric_limits<float>::infinity() : 601 accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 602 603 // Clamp reference results. 604 for (float& value : output_ref) { 605 value = std::max(std::min(value, output_max), output_min); 606 } 607 608 // Create, setup, run, and destroy Average Pooling operator. 609 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 610 xnn_operator_t average_pooling_op = nullptr; 611 612 ASSERT_EQ(xnn_status_success, 613 xnn_create_average_pooling2d_nhwc_f32( 614 padding_top(), padding_right(), padding_bottom(), padding_left(), 615 pooling_height(), pooling_width(), 616 stride_height(), stride_width(), 617 channels(), input_pixel_stride(), output_pixel_stride(), 618 output_min, output_max, 619 0, &average_pooling_op)); 620 ASSERT_NE(nullptr, average_pooling_op); 621 622 // Smart pointer to automatically delete average_pooling_op. 623 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_average_pooling_op(average_pooling_op, xnn_delete_operator); 624 625 ASSERT_EQ(xnn_status_success, 626 xnn_setup_average_pooling2d_nhwc_f32( 627 average_pooling_op, 628 batch_size(), input_height(), input_width(), 629 input.data(), output.data(), 630 nullptr /* thread pool */)); 631 632 ASSERT_EQ(xnn_status_success, 633 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 634 635 // Verify results. 636 for (size_t i = 0; i < batch_size(); i++) { 637 for (size_t y = 0; y < output_height(); y++) { 638 for (size_t x = 0; x < output_width(); x++) { 639 for (size_t c = 0; c < channels(); c++) { 640 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_max); 641 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_min); 642 ASSERT_NEAR(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], 643 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c], 644 std::abs(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) * 1.0e-6f) << 645 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 646 } 647 } 648 } 649 } 650 } 651 } 652 TestQU8()653 void TestQU8() const { 654 std::random_device random_device; 655 auto rng = std::mt19937(random_device()); 656 std::uniform_int_distribution<int32_t> u8dist( 657 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 658 659 std::vector<uint8_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 660 std::vector<uint8_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels()); 661 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels()); 662 for (size_t iteration = 0; iteration < iterations(); iteration++) { 663 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 664 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 665 666 // Compute reference results. 667 const double scale = double(input_scale()) / (double(output_scale()) * double(pooling_height() * pooling_width())); 668 for (size_t i = 0; i < batch_size(); i++) { 669 for (size_t oy = 0; oy < output_height(); oy++) { 670 for (size_t ox = 0; ox < output_width(); ox++) { 671 for (size_t c = 0; c < channels(); c++) { 672 double acc = 0.0f; 673 for (size_t py = 0; py < pooling_height(); py++) { 674 const size_t iy = oy * stride_height() + py - padding_top(); 675 for (size_t px = 0; px < pooling_width(); px++) { 676 const size_t ix = ox * stride_width() + px - padding_left(); 677 if (ix < input_width() && iy < input_height()) { 678 acc += double(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]) - int32_t(input_zero_point())); 679 } 680 } 681 } 682 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = float(acc * scale + double(output_zero_point())); 683 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = 684 std::min<float>(output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c], float(qmax())); 685 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = 686 std::max<float>(output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c], float(qmin())); 687 } 688 } 689 } 690 } 691 692 // Create, setup, run, and destroy Average Pooling operator. 693 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 694 xnn_operator_t average_pooling_op = nullptr; 695 696 ASSERT_EQ(xnn_status_success, 697 xnn_create_average_pooling2d_nhwc_qu8( 698 padding_top(), padding_right(), padding_bottom(), padding_left(), 699 pooling_height(), pooling_width(), 700 stride_height(), stride_width(), 701 channels(), input_pixel_stride(), output_pixel_stride(), 702 input_zero_point(), input_scale(), 703 output_zero_point(), output_scale(), 704 qmin(), qmax(), 705 0, &average_pooling_op)); 706 ASSERT_NE(nullptr, average_pooling_op); 707 708 // Smart pointer to automatically delete average_pooling_op. 709 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_average_pooling_op(average_pooling_op, xnn_delete_operator); 710 711 ASSERT_EQ(xnn_status_success, 712 xnn_setup_average_pooling2d_nhwc_qu8( 713 average_pooling_op, 714 batch_size(), input_height(), input_width(), 715 input.data(), output.data(), 716 nullptr /* thread pool */)); 717 718 ASSERT_EQ(xnn_status_success, 719 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 720 721 // Verify results. 722 for (size_t i = 0; i < batch_size(); i++) { 723 for (size_t y = 0; y < output_height(); y++) { 724 for (size_t x = 0; x < output_width(); x++) { 725 for (size_t c = 0; c < channels(); c++) { 726 ASSERT_LE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax())); 727 ASSERT_GE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin())); 728 ASSERT_NEAR(float(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])), 729 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c], 0.80f) << 730 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 731 } 732 } 733 } 734 } 735 } 736 } 737 TestSetupF16()738 void TestSetupF16() const { 739 std::random_device random_device; 740 auto rng = std::mt19937(random_device()); 741 std::uniform_real_distribution<float> f32dist; 742 743 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max<size_t>( 744 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(), 745 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels())); 746 std::vector<uint16_t> output(std::max<size_t>( 747 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(), 748 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels())); 749 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels()); 750 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels()); 751 for (size_t iteration = 0; iteration < iterations(); iteration++) { 752 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 753 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 754 755 // Compute reference results, without clamping. 756 for (size_t i = 0; i < batch_size(); i++) { 757 for (size_t oy = 0; oy < output_height(); oy++) { 758 for (size_t ox = 0; ox < output_width(); ox++) { 759 for (size_t c = 0; c < channels(); c++) { 760 float acc = 0.0f; 761 size_t n = 0; 762 for (size_t py = 0; py < pooling_height(); py++) { 763 const size_t iy = oy * stride_height() + py - padding_top(); 764 for (size_t px = 0; px < pooling_width(); px++) { 765 const size_t ix = ox * stride_width() + px - padding_left(); 766 if (ix < input_width() && iy < input_height()) { 767 acc += fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]); 768 n += 1; 769 } 770 } 771 } 772 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = acc / float(n); 773 } 774 } 775 } 776 } 777 778 // Compute clamping parameters. 779 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 780 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 781 const float accumulated_range = accumulated_max - accumulated_min; 782 float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin()); 783 float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 784 output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)); 785 output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max)); 786 if (accumulated_range == 0.0f) { 787 output_min = -std::numeric_limits<float>::infinity(); 788 output_max = +std::numeric_limits<float>::infinity(); 789 } 790 if (qmin() == std::numeric_limits<uint8_t>::min()) { 791 output_min = -std::numeric_limits<float>::infinity(); 792 } 793 if (qmax() == std::numeric_limits<uint8_t>::max()) { 794 output_max = +std::numeric_limits<float>::infinity(); 795 } 796 797 // Clamp reference results. 798 for (float& value : output_ref) { 799 value = std::max(std::min(value, output_max), output_min); 800 } 801 802 // Create, setup, and run Average Pooling operator once. 803 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 804 xnn_operator_t average_pooling_op = nullptr; 805 806 const xnn_status status = xnn_create_average_pooling2d_nhwc_f16( 807 padding_top(), padding_right(), padding_bottom(), padding_left(), 808 pooling_height(), pooling_width(), 809 stride_height(), stride_width(), 810 channels(), input_pixel_stride(), output_pixel_stride(), 811 output_min, output_max, 812 0, &average_pooling_op); 813 if (status == xnn_status_unsupported_hardware) { 814 GTEST_SKIP(); 815 } 816 ASSERT_EQ(xnn_status_success, status); 817 ASSERT_NE(nullptr, average_pooling_op); 818 819 ASSERT_EQ(xnn_status_success, 820 xnn_setup_average_pooling2d_nhwc_f16( 821 average_pooling_op, 822 batch_size(), input_height(), input_width(), 823 input.data(), output.data(), 824 nullptr /* thread pool */)); 825 826 ASSERT_EQ(xnn_status_success, 827 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 828 829 // Verify results of the first run. 830 for (size_t i = 0; i < batch_size(); i++) { 831 for (size_t y = 0; y < output_height(); y++) { 832 for (size_t x = 0; x < output_width(); x++) { 833 for (size_t c = 0; c < channels(); c++) { 834 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_max); 835 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_min); 836 ASSERT_NEAR( 837 fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), 838 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c], 839 std::max(1.0e-3f, std::abs(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) * 1.0e-2f)) << 840 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 841 } 842 } 843 } 844 } 845 846 // Re-generate data for the second run. 847 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 848 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 849 850 // Compute reference results for the second run. 851 for (size_t i = 0; i < next_batch_size(); i++) { 852 for (size_t oy = 0; oy < next_output_height(); oy++) { 853 for (size_t ox = 0; ox < next_output_width(); ox++) { 854 for (size_t c = 0; c < channels(); c++) { 855 float acc = 0.0f; 856 int32_t n = 0; 857 for (size_t py = 0; py < pooling_height(); py++) { 858 const size_t iy = oy * stride_height() + py - padding_top(); 859 for (size_t px = 0; px < pooling_width(); px++) { 860 const size_t ix = ox * stride_width() + px - padding_left(); 861 if (ix < next_input_width() && iy < next_input_height()) { 862 acc += fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]); 863 n += 1; 864 } 865 } 866 } 867 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = 868 std::max(std::min(acc / float(n), output_max), output_min); 869 } 870 } 871 } 872 } 873 874 // Setup and run Average Pooling operator the second time, and destroy the operator. 875 ASSERT_EQ(xnn_status_success, 876 xnn_setup_average_pooling2d_nhwc_f16( 877 average_pooling_op, 878 next_batch_size(), next_input_height(), next_input_width(), 879 input.data(), output.data(), 880 nullptr /* thread pool */)); 881 882 ASSERT_EQ(xnn_status_success, 883 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 884 885 ASSERT_EQ(xnn_status_success, 886 xnn_delete_operator(average_pooling_op)); 887 average_pooling_op = nullptr; 888 889 // Verify results of the second run. 890 for (size_t i = 0; i < next_batch_size(); i++) { 891 for (size_t y = 0; y < next_output_height(); y++) { 892 for (size_t x = 0; x < next_output_width(); x++) { 893 for (size_t c = 0; c < channels(); c++) { 894 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), output_max); 895 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), output_min); 896 ASSERT_NEAR( 897 fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), 898 next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c], 899 std::max(1.0e-3f, std::abs(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]) * 1.0e-2f)) << 900 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 901 } 902 } 903 } 904 } 905 } 906 } 907 TestSetupF32()908 void TestSetupF32() const { 909 std::random_device random_device; 910 auto rng = std::mt19937(random_device()); 911 std::uniform_real_distribution<float> f32dist; 912 913 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max<size_t>( 914 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(), 915 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels())); 916 std::vector<float> output(std::max<size_t>( 917 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(), 918 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels())); 919 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels()); 920 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels()); 921 for (size_t iteration = 0; iteration < iterations(); iteration++) { 922 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 923 std::fill(output.begin(), output.end(), std::nanf("")); 924 925 // Compute reference results, without clamping. 926 for (size_t i = 0; i < batch_size(); i++) { 927 for (size_t oy = 0; oy < output_height(); oy++) { 928 for (size_t ox = 0; ox < output_width(); ox++) { 929 for (size_t c = 0; c < channels(); c++) { 930 float acc = 0.0f; 931 size_t n = 0; 932 for (size_t py = 0; py < pooling_height(); py++) { 933 const size_t iy = oy * stride_height() + py - padding_top(); 934 for (size_t px = 0; px < pooling_width(); px++) { 935 const size_t ix = ox * stride_width() + px - padding_left(); 936 if (ix < input_width() && iy < input_height()) { 937 acc += input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]; 938 n += 1; 939 } 940 } 941 } 942 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = acc / float(n); 943 } 944 } 945 } 946 } 947 948 // Compute clamping parameters. 949 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 950 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 951 const float accumulated_range = accumulated_max - accumulated_min; 952 const float output_min = accumulated_range == 0.0f ? 953 -std::numeric_limits<float>::infinity() : 954 accumulated_min + accumulated_range / 255.0f * float(qmin()); 955 const float output_max = accumulated_range == 0.0f ? 956 +std::numeric_limits<float>::infinity() : 957 accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 958 959 // Clamp reference results. 960 for (float& value : output_ref) { 961 value = std::max(std::min(value, output_max), output_min); 962 } 963 964 // Create, setup, and run Average Pooling operator once. 965 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 966 xnn_operator_t average_pooling_op = nullptr; 967 968 ASSERT_EQ(xnn_status_success, 969 xnn_create_average_pooling2d_nhwc_f32( 970 padding_top(), padding_right(), padding_bottom(), padding_left(), 971 pooling_height(), pooling_width(), 972 stride_height(), stride_width(), 973 channels(), input_pixel_stride(), output_pixel_stride(), 974 output_min, output_max, 975 0, &average_pooling_op)); 976 ASSERT_NE(nullptr, average_pooling_op); 977 978 ASSERT_EQ(xnn_status_success, 979 xnn_setup_average_pooling2d_nhwc_f32( 980 average_pooling_op, 981 batch_size(), input_height(), input_width(), 982 input.data(), output.data(), 983 nullptr /* thread pool */)); 984 985 ASSERT_EQ(xnn_status_success, 986 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 987 988 // Verify results of the first run. 989 for (size_t i = 0; i < batch_size(); i++) { 990 for (size_t y = 0; y < output_height(); y++) { 991 for (size_t x = 0; x < output_width(); x++) { 992 for (size_t c = 0; c < channels(); c++) { 993 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_max); 994 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_min); 995 ASSERT_NEAR(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], 996 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c], 997 std::abs(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) * 1.0e-6f) << 998 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 999 } 1000 } 1001 } 1002 } 1003 1004 // Re-generate data for the second run. 1005 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 1006 std::fill(output.begin(), output.end(), std::nanf("")); 1007 1008 // Compute reference results for the second run. 1009 for (size_t i = 0; i < next_batch_size(); i++) { 1010 for (size_t oy = 0; oy < next_output_height(); oy++) { 1011 for (size_t ox = 0; ox < next_output_width(); ox++) { 1012 for (size_t c = 0; c < channels(); c++) { 1013 float acc = 0.0f; 1014 int32_t n = 0; 1015 for (size_t py = 0; py < pooling_height(); py++) { 1016 const size_t iy = oy * stride_height() + py - padding_top(); 1017 for (size_t px = 0; px < pooling_width(); px++) { 1018 const size_t ix = ox * stride_width() + px - padding_left(); 1019 if (ix < next_input_width() && iy < next_input_height()) { 1020 acc += input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]; 1021 n += 1; 1022 } 1023 } 1024 } 1025 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = 1026 std::max(std::min(acc / float(n), output_max), output_min); 1027 } 1028 } 1029 } 1030 } 1031 1032 // Setup and run Average Pooling operator the second time, and destroy the operator. 1033 ASSERT_EQ(xnn_status_success, 1034 xnn_setup_average_pooling2d_nhwc_f32( 1035 average_pooling_op, 1036 next_batch_size(), next_input_height(), next_input_width(), 1037 input.data(), output.data(), 1038 nullptr /* thread pool */)); 1039 1040 ASSERT_EQ(xnn_status_success, 1041 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 1042 1043 ASSERT_EQ(xnn_status_success, 1044 xnn_delete_operator(average_pooling_op)); 1045 average_pooling_op = nullptr; 1046 1047 // Verify results of the second run. 1048 for (size_t i = 0; i < next_batch_size(); i++) { 1049 for (size_t y = 0; y < next_output_height(); y++) { 1050 for (size_t x = 0; x < next_output_width(); x++) { 1051 for (size_t c = 0; c < channels(); c++) { 1052 ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c], output_max); 1053 ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c], output_min); 1054 ASSERT_NEAR(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c], 1055 next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c], 1056 std::abs(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]) * 1.0e-6f) << 1057 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 1058 } 1059 } 1060 } 1061 } 1062 } 1063 } 1064 TestSetupQU8()1065 void TestSetupQU8() const { 1066 std::random_device random_device; 1067 auto rng = std::mt19937(random_device()); 1068 std::uniform_int_distribution<int32_t> u8dist( 1069 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 1070 1071 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max<size_t>( 1072 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(), 1073 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels())); 1074 std::vector<uint8_t> output(std::max<size_t>( 1075 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(), 1076 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels())); 1077 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels()); 1078 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels()); 1079 for (size_t iteration = 0; iteration < iterations(); iteration++) { 1080 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 1081 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 1082 1083 // Compute reference results. 1084 const double scale = double(input_scale()) / (double(output_scale()) * double(pooling_height() * pooling_width())); 1085 for (size_t i = 0; i < batch_size(); i++) { 1086 for (size_t oy = 0; oy < output_height(); oy++) { 1087 for (size_t ox = 0; ox < output_width(); ox++) { 1088 for (size_t c = 0; c < channels(); c++) { 1089 double acc = 0.0f; 1090 for (size_t py = 0; py < pooling_height(); py++) { 1091 const size_t iy = oy * stride_height() + py - padding_top(); 1092 for (size_t px = 0; px < pooling_width(); px++) { 1093 const size_t ix = ox * stride_width() + px - padding_left(); 1094 if (ix < input_width() && iy < input_height()) { 1095 acc += double(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]) - int32_t(input_zero_point())); 1096 } 1097 } 1098 } 1099 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = float(acc * scale + double(output_zero_point())); 1100 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = 1101 std::min<float>(output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c], float(qmax())); 1102 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = 1103 std::max<float>(output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c], float(qmin())); 1104 } 1105 } 1106 } 1107 } 1108 1109 // Create, setup, and run Average Pooling operator once. 1110 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 1111 xnn_operator_t average_pooling_op = nullptr; 1112 1113 ASSERT_EQ(xnn_status_success, 1114 xnn_create_average_pooling2d_nhwc_qu8( 1115 padding_top(), padding_right(), padding_bottom(), padding_left(), 1116 pooling_height(), pooling_width(), 1117 stride_height(), stride_width(), 1118 channels(), input_pixel_stride(), output_pixel_stride(), 1119 input_zero_point(), input_scale(), 1120 output_zero_point(), output_scale(), 1121 qmin(), qmax(), 1122 0, &average_pooling_op)); 1123 ASSERT_NE(nullptr, average_pooling_op); 1124 1125 ASSERT_EQ(xnn_status_success, 1126 xnn_setup_average_pooling2d_nhwc_qu8( 1127 average_pooling_op, 1128 batch_size(), input_height(), input_width(), 1129 input.data(), output.data(), 1130 nullptr /* thread pool */)); 1131 1132 ASSERT_EQ(xnn_status_success, 1133 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 1134 1135 // Verify results of the first run. 1136 for (size_t i = 0; i < batch_size(); i++) { 1137 for (size_t y = 0; y < output_height(); y++) { 1138 for (size_t x = 0; x < output_width(); x++) { 1139 for (size_t c = 0; c < channels(); c++) { 1140 ASSERT_LE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax())); 1141 ASSERT_GE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin())); 1142 ASSERT_NEAR(float(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])), 1143 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c], 0.80f) << 1144 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 1145 } 1146 } 1147 } 1148 } 1149 1150 // Re-generate data for the second run. 1151 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 1152 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 1153 1154 // Compute reference results for the second run. 1155 for (size_t i = 0; i < next_batch_size(); i++) { 1156 for (size_t oy = 0; oy < next_output_height(); oy++) { 1157 for (size_t ox = 0; ox < next_output_width(); ox++) { 1158 for (size_t c = 0; c < channels(); c++) { 1159 double acc = 0.0f; 1160 for (size_t py = 0; py < pooling_height(); py++) { 1161 const size_t iy = oy * stride_height() + py - padding_top(); 1162 for (size_t px = 0; px < pooling_width(); px++) { 1163 const size_t ix = ox * stride_width() + px - padding_left(); 1164 if (ix < next_input_width() && iy < next_input_height()) { 1165 acc += double(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]) - int32_t(input_zero_point())); 1166 } 1167 } 1168 } 1169 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = float(acc * scale + double(output_zero_point())); 1170 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = 1171 std::min<float>(next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c], float(qmax())); 1172 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = 1173 std::max<float>(next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c], float(qmin())); 1174 } 1175 } 1176 } 1177 } 1178 1179 // Setup and run Average Pooling operator the second time, and destroy the operator. 1180 ASSERT_EQ(xnn_status_success, 1181 xnn_setup_average_pooling2d_nhwc_qu8( 1182 average_pooling_op, 1183 next_batch_size(), next_input_height(), next_input_width(), 1184 input.data(), output.data(), 1185 nullptr /* thread pool */)); 1186 1187 ASSERT_EQ(xnn_status_success, 1188 xnn_run_operator(average_pooling_op, nullptr /* thread pool */)); 1189 1190 ASSERT_EQ(xnn_status_success, 1191 xnn_delete_operator(average_pooling_op)); 1192 average_pooling_op = nullptr; 1193 1194 // Verify results of the second run. 1195 for (size_t i = 0; i < next_batch_size(); i++) { 1196 for (size_t y = 0; y < next_output_height(); y++) { 1197 for (size_t x = 0; x < next_output_width(); x++) { 1198 for (size_t c = 0; c < channels(); c++) { 1199 ASSERT_LE(uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax())); 1200 ASSERT_GE(uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin())); 1201 ASSERT_NEAR(float(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c])), 1202 next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c], 0.80f) << 1203 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c; 1204 } 1205 } 1206 } 1207 } 1208 } 1209 } 1210 1211 private: 1212 uint32_t padding_top_{0}; 1213 uint32_t padding_right_{0}; 1214 uint32_t padding_bottom_{0}; 1215 uint32_t padding_left_{0}; 1216 bool padding_tf_same_{false}; 1217 size_t input_height_{1}; 1218 size_t input_width_{1}; 1219 size_t channels_{1}; 1220 size_t batch_size_{1}; 1221 size_t input_pixel_stride_{0}; 1222 size_t output_pixel_stride_{0}; 1223 uint32_t pooling_height_{1}; 1224 uint32_t pooling_width_{1}; 1225 uint32_t stride_height_{1}; 1226 uint32_t stride_width_{1}; 1227 size_t next_input_height_{0}; 1228 size_t next_input_width_{0}; 1229 size_t next_batch_size_{0}; 1230 float input_scale_{1.0f}; 1231 float output_scale_{1.0f}; 1232 uint8_t input_zero_point_{121}; 1233 uint8_t output_zero_point_{133}; 1234 uint8_t qmin_{0}; 1235 uint8_t qmax_{255}; 1236 size_t iterations_{1}; 1237 }; 1238