1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cmath> 13 #include <cstddef> 14 #include <cstdlib> 15 #include <random> 16 #include <vector> 17 18 #include <fp16.h> 19 20 #include <xnnpack.h> 21 22 23 class ConvertOperatorTester { 24 public: channels(size_t channels)25 inline ConvertOperatorTester& channels(size_t channels) { 26 assert(channels != 0); 27 this->channels_ = channels; 28 return *this; 29 } 30 channels()31 inline size_t channels() const { 32 return this->channels_; 33 } 34 input_stride(size_t input_stride)35 inline ConvertOperatorTester& input_stride(size_t input_stride) { 36 assert(input_stride != 0); 37 this->input_stride_ = input_stride; 38 return *this; 39 } 40 input_stride()41 inline size_t input_stride() const { 42 if (this->input_stride_ == 0) { 43 return this->channels_; 44 } else { 45 assert(this->input_stride_ >= this->channels_); 46 return this->input_stride_; 47 } 48 } 49 output_stride(size_t output_stride)50 inline ConvertOperatorTester& output_stride(size_t output_stride) { 51 assert(output_stride != 0); 52 this->output_stride_ = output_stride; 53 return *this; 54 } 55 output_stride()56 inline size_t output_stride() const { 57 if (this->output_stride_ == 0) { 58 return this->channels_; 59 } else { 60 assert(this->output_stride_ >= this->channels_); 61 return this->output_stride_; 62 } 63 } 64 batch_size(size_t batch_size)65 inline ConvertOperatorTester& batch_size(size_t batch_size) { 66 assert(batch_size != 0); 67 this->batch_size_ = batch_size; 68 return *this; 69 } 70 batch_size()71 inline size_t batch_size() const { 72 return this->batch_size_; 73 } 74 scale(float scale)75 inline ConvertOperatorTester& scale(float scale) { 76 assert(scale >= 0.0f); 77 assert(std::isnormal(scale)); 78 this->scale_ = scale; 79 return *this; 80 } 81 scale()82 inline float scale() const { 83 return this->scale_; 84 } 85 zero_point(int16_t zero_point)86 inline ConvertOperatorTester& zero_point(int16_t zero_point) { 87 this->zero_point_ = zero_point; 88 return *this; 89 } 90 zero_point()91 inline int16_t zero_point() const { 92 return this->zero_point_; 93 } 94 qmin(int16_t qmin)95 inline ConvertOperatorTester& qmin(int16_t qmin) { 96 this->qmin_ = qmin; 97 return *this; 98 } 99 qmin()100 inline int16_t qmin() const { 101 return this->qmin_; 102 } 103 qmax(int16_t qmax)104 inline ConvertOperatorTester& qmax(int16_t qmax) { 105 this->qmax_ = qmax; 106 return *this; 107 } 108 qmax()109 inline int16_t qmax() const { 110 return this->qmax_; 111 } 112 iterations(size_t iterations)113 inline ConvertOperatorTester& iterations(size_t iterations) { 114 this->iterations_ = iterations; 115 return *this; 116 } 117 iterations()118 inline size_t iterations() const { 119 return this->iterations_; 120 } 121 TestF16toF32()122 void TestF16toF32() const { 123 std::random_device random_device; 124 auto rng = std::mt19937(random_device()); 125 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 126 127 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + 128 (batch_size() - 1) * input_stride() + channels()); 129 std::vector<float> output((batch_size() - 1) * output_stride() + channels()); 130 std::vector<float> output_ref(batch_size() * channels()); 131 for (size_t iteration = 0; iteration < iterations(); iteration++) { 132 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 133 std::fill(output.begin(), output.end(), std::nanf("")); 134 135 // Compute reference results. 136 for (size_t i = 0; i < batch_size(); i++) { 137 for (size_t c = 0; c < channels(); c++) { 138 output_ref[i * channels() + c] = fp16_ieee_to_fp32_value(input[i * input_stride() + c]); 139 } 140 } 141 142 // Create, setup, run, and destroy Convert operator. 143 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 144 xnn_operator_t convert_op = nullptr; 145 146 ASSERT_EQ(xnn_status_success, 147 xnn_create_convert_nc_f16_f32( 148 channels(), input_stride(), output_stride(), 149 0, &convert_op)); 150 ASSERT_NE(nullptr, convert_op); 151 152 // Smart pointer to automatically delete convert op. 153 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 154 155 ASSERT_EQ(xnn_status_success, 156 xnn_setup_convert_nc_f16_f32( 157 convert_op, 158 batch_size(), 159 input.data(), output.data(), 160 nullptr /* thread pool */)); 161 162 ASSERT_EQ(xnn_status_success, 163 xnn_run_operator(convert_op, nullptr /* thread pool */)); 164 165 // Verify results. 166 for (size_t i = 0; i < batch_size(); i++) { 167 for (size_t c = 0; c < channels(); c++) { 168 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 169 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 170 } 171 } 172 } 173 } 174 TestF32toF16()175 void TestF32toF16() const { 176 std::random_device random_device; 177 auto rng = std::mt19937(random_device()); 178 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 179 180 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 181 (batch_size() - 1) * input_stride() + channels()); 182 std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels()); 183 std::vector<uint16_t> output_ref(batch_size() * channels()); 184 for (size_t iteration = 0; iteration < iterations(); iteration++) { 185 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 186 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 187 188 // Compute reference results. 189 for (size_t i = 0; i < batch_size(); i++) { 190 for (size_t c = 0; c < channels(); c++) { 191 output_ref[i * channels() + c] = fp16_ieee_from_fp32_value(input[i * input_stride() + c]); 192 } 193 } 194 195 // Create, setup, run, and destroy Convert operator. 196 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 197 xnn_operator_t convert_op = nullptr; 198 199 ASSERT_EQ(xnn_status_success, 200 xnn_create_convert_nc_f32_f16( 201 channels(), input_stride(), output_stride(), 202 0, &convert_op)); 203 ASSERT_NE(nullptr, convert_op); 204 205 // Smart pointer to automatically delete convert op. 206 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 207 208 ASSERT_EQ(xnn_status_success, 209 xnn_setup_convert_nc_f32_f16( 210 convert_op, 211 batch_size(), 212 input.data(), output.data(), 213 nullptr /* thread pool */)); 214 215 ASSERT_EQ(xnn_status_success, 216 xnn_run_operator(convert_op, nullptr /* thread pool */)); 217 218 // Verify results. 219 for (size_t i = 0; i < batch_size(); i++) { 220 for (size_t c = 0; c < channels(); c++) { 221 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 222 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 223 } 224 } 225 } 226 } 227 TestF32toQS8()228 void TestF32toQS8() const { 229 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 230 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 231 ASSERT_LT(qmin(), qmax()); 232 233 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 234 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 235 236 std::random_device random_device; 237 auto rng = std::mt19937(random_device()); 238 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 239 240 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 241 (batch_size() - 1) * input_stride() + channels()); 242 std::vector<int8_t> output((batch_size() - 1) * output_stride() + channels()); 243 std::vector<int8_t> output_ref(batch_size() * channels()); 244 for (size_t iteration = 0; iteration < iterations(); iteration++) { 245 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 246 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 247 248 // Compute reference results. 249 const float inv_scale = 1.0f / scale(); 250 for (size_t i = 0; i < batch_size(); i++) { 251 for (size_t c = 0; c < channels(); c++) { 252 float scaled_input = input[i * input_stride() + c] * inv_scale; 253 scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point())); 254 scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point())); 255 output_ref[i * channels() + c] = int8_t(std::lrintf(scaled_input) + long(zero_point())); 256 } 257 } 258 259 // Create, setup, run, and destroy Convert operator. 260 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 261 xnn_operator_t convert_op = nullptr; 262 263 ASSERT_EQ(xnn_status_success, 264 xnn_create_convert_nc_f32_qs8( 265 channels(), input_stride(), output_stride(), 266 scale(), int8_t(zero_point()), int8_t(qmin()), int8_t(qmax()), 267 0, &convert_op)); 268 ASSERT_NE(nullptr, convert_op); 269 270 // Smart pointer to automatically delete convert op. 271 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 272 273 ASSERT_EQ(xnn_status_success, 274 xnn_setup_convert_nc_f32_qs8( 275 convert_op, 276 batch_size(), 277 input.data(), output.data(), 278 nullptr /* thread pool */)); 279 280 ASSERT_EQ(xnn_status_success, 281 xnn_run_operator(convert_op, nullptr /* thread pool */)); 282 283 // Verify results. 284 for (size_t i = 0; i < batch_size(); i++) { 285 for (size_t c = 0; c < channels(); c++) { 286 ASSERT_EQ(int32_t(output_ref[i * channels() + c]), int32_t(output[i * output_stride() + c])) 287 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 288 } 289 } 290 } 291 } 292 TestF32toQU8()293 void TestF32toQU8() const { 294 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 295 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 296 ASSERT_LT(qmin(), qmax()); 297 298 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 299 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 300 301 std::random_device random_device; 302 auto rng = std::mt19937(random_device()); 303 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 304 305 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 306 (batch_size() - 1) * input_stride() + channels()); 307 std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels()); 308 std::vector<uint8_t> output_ref(batch_size() * channels()); 309 for (size_t iteration = 0; iteration < iterations(); iteration++) { 310 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 311 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 312 313 // Compute reference results. 314 const float inv_scale = 1.0f / scale(); 315 for (size_t i = 0; i < batch_size(); i++) { 316 for (size_t c = 0; c < channels(); c++) { 317 float scaled_input = input[i * input_stride() + c] * inv_scale; 318 scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point())); 319 scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point())); 320 output_ref[i * channels() + c] = uint8_t(std::lrintf(scaled_input) + long(zero_point())); 321 } 322 } 323 324 // Create, setup, run, and destroy Convert operator. 325 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 326 xnn_operator_t convert_op = nullptr; 327 328 ASSERT_EQ(xnn_status_success, 329 xnn_create_convert_nc_f32_qu8( 330 channels(), input_stride(), output_stride(), 331 scale(), uint8_t(zero_point()), uint8_t(qmin()), uint8_t(qmax()), 332 0, &convert_op)); 333 ASSERT_NE(nullptr, convert_op); 334 335 // Smart pointer to automatically delete convert op. 336 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 337 338 ASSERT_EQ(xnn_status_success, 339 xnn_setup_convert_nc_f32_qu8( 340 convert_op, 341 batch_size(), 342 input.data(), output.data(), 343 nullptr /* thread pool */)); 344 345 ASSERT_EQ(xnn_status_success, 346 xnn_run_operator(convert_op, nullptr /* thread pool */)); 347 348 // Verify results. 349 for (size_t i = 0; i < batch_size(); i++) { 350 for (size_t c = 0; c < channels(); c++) { 351 ASSERT_EQ(uint32_t(output_ref[i * channels() + c]), uint32_t(output[i * output_stride() + c])) 352 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 353 } 354 } 355 } 356 } 357 TestQS8toF32()358 void TestQS8toF32() const { 359 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 360 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 361 362 std::random_device random_device; 363 auto rng = std::mt19937(random_device()); 364 std::uniform_int_distribution<int32_t> i8dist( 365 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 366 367 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 368 (batch_size() - 1) * input_stride() + channels()); 369 std::vector<float> output((batch_size() - 1) * output_stride() + channels()); 370 std::vector<float> output_ref(batch_size() * channels()); 371 for (size_t iteration = 0; iteration < iterations(); iteration++) { 372 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 373 std::fill(output.begin(), output.end(), std::nanf("")); 374 375 // Compute reference results. 376 for (size_t i = 0; i < batch_size(); i++) { 377 for (size_t c = 0; c < channels(); c++) { 378 output_ref[i * channels() + c] = float(input[i * input_stride() + c] - zero_point()) * scale(); 379 } 380 } 381 382 // Create, setup, run, and destroy Convert operator. 383 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 384 xnn_operator_t convert_op = nullptr; 385 386 ASSERT_EQ(xnn_status_success, 387 xnn_create_convert_nc_qs8_f32( 388 channels(), input_stride(), output_stride(), 389 scale(), int8_t(zero_point()), 390 0, &convert_op)); 391 ASSERT_NE(nullptr, convert_op); 392 393 // Smart pointer to automatically delete convert op. 394 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 395 396 ASSERT_EQ(xnn_status_success, 397 xnn_setup_convert_nc_qs8_f32( 398 convert_op, 399 batch_size(), 400 input.data(), output.data(), 401 nullptr /* thread pool */)); 402 403 ASSERT_EQ(xnn_status_success, 404 xnn_run_operator(convert_op, nullptr /* thread pool */)); 405 406 // Verify results. 407 for (size_t i = 0; i < batch_size(); i++) { 408 for (size_t c = 0; c < channels(); c++) { 409 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 410 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 411 } 412 } 413 } 414 } 415 TestQU8toF32()416 void TestQU8toF32() const { 417 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 418 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 419 420 std::random_device random_device; 421 auto rng = std::mt19937(random_device()); 422 std::uniform_int_distribution<int32_t> u8dist( 423 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 424 425 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 426 (batch_size() - 1) * input_stride() + channels()); 427 std::vector<float> output((batch_size() - 1) * output_stride() + channels()); 428 std::vector<float> output_ref(batch_size() * channels()); 429 for (size_t iteration = 0; iteration < iterations(); iteration++) { 430 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 431 std::fill(output.begin(), output.end(), std::nanf("")); 432 433 // Compute reference results. 434 for (size_t i = 0; i < batch_size(); i++) { 435 for (size_t c = 0; c < channels(); c++) { 436 output_ref[i * channels() + c] = float(input[i * input_stride() + c] - zero_point()) * scale(); 437 } 438 } 439 440 // Create, setup, run, and destroy Convert operator. 441 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 442 xnn_operator_t convert_op = nullptr; 443 444 ASSERT_EQ(xnn_status_success, 445 xnn_create_convert_nc_qu8_f32( 446 channels(), input_stride(), output_stride(), 447 scale(), uint8_t(zero_point()), 448 0, &convert_op)); 449 ASSERT_NE(nullptr, convert_op); 450 451 // Smart pointer to automatically delete convert op. 452 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 453 454 ASSERT_EQ(xnn_status_success, 455 xnn_setup_convert_nc_qu8_f32( 456 convert_op, 457 batch_size(), 458 input.data(), output.data(), 459 nullptr /* thread pool */)); 460 461 ASSERT_EQ(xnn_status_success, 462 xnn_run_operator(convert_op, nullptr /* thread pool */)); 463 464 // Verify results. 465 for (size_t i = 0; i < batch_size(); i++) { 466 for (size_t c = 0; c < channels(); c++) { 467 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 468 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 469 } 470 } 471 } 472 } 473 474 private: 475 size_t batch_size_{1}; 476 size_t channels_{1}; 477 size_t input_stride_{0}; 478 size_t output_stride_{0}; 479 float scale_{150.0f}; 480 int16_t zero_point_{1}; 481 int16_t qmin_{std::numeric_limits<int16_t>::min()}; 482 int16_t qmax_{std::numeric_limits<int16_t>::max()}; 483 size_t iterations_{15}; 484 }; 485