1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cstddef> 13 #include <cstdlib> 14 #include <functional> 15 #include <random> 16 #include <vector> 17 18 #include <fp16.h> 19 20 #include <xnnpack.h> 21 #include <xnnpack/microfnptr.h> 22 #include <xnnpack/microparams-init.h> 23 24 25 class VBinaryMicrokernelTester { 26 public: 27 enum class OpType { 28 Add, 29 Div, 30 Max, 31 Min, 32 Mul, 33 Sub, 34 SqrDiff, 35 }; 36 batch_size(size_t batch_size)37 inline VBinaryMicrokernelTester& batch_size(size_t batch_size) { 38 assert(batch_size != 0); 39 this->batch_size_ = batch_size; 40 return *this; 41 } 42 batch_size()43 inline size_t batch_size() const { 44 return this->batch_size_; 45 } 46 inplace_a(bool inplace_a)47 inline VBinaryMicrokernelTester& inplace_a(bool inplace_a) { 48 this->inplace_a_ = inplace_a; 49 return *this; 50 } 51 inplace_a()52 inline bool inplace_a() const { 53 return this->inplace_a_; 54 } 55 inplace_b(bool inplace_b)56 inline VBinaryMicrokernelTester& inplace_b(bool inplace_b) { 57 this->inplace_b_ = inplace_b; 58 return *this; 59 } 60 inplace_b()61 inline bool inplace_b() const { 62 return this->inplace_b_; 63 } 64 qmin(uint8_t qmin)65 inline VBinaryMicrokernelTester& qmin(uint8_t qmin) { 66 this->qmin_ = qmin; 67 return *this; 68 } 69 qmin()70 inline uint8_t qmin() const { 71 return this->qmin_; 72 } 73 qmax(uint8_t qmax)74 inline VBinaryMicrokernelTester& qmax(uint8_t qmax) { 75 this->qmax_ = qmax; 76 return *this; 77 } 78 qmax()79 inline uint8_t qmax() const { 80 return this->qmax_; 81 } 82 iterations(size_t iterations)83 inline VBinaryMicrokernelTester& iterations(size_t iterations) { 84 this->iterations_ = iterations; 85 return *this; 86 } 87 iterations()88 inline size_t iterations() const { 89 return this->iterations_; 90 } 91 Test(xnn_f16_vbinary_ukernel_function vbinary,OpType op_type)92 void Test(xnn_f16_vbinary_ukernel_function vbinary, OpType op_type) const { 93 std::random_device random_device; 94 auto rng = std::mt19937(random_device()); 95 std::uniform_real_distribution<float> f32dist(0.01f, 1.0f); 96 97 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 98 std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 99 std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 100 std::vector<float> y_ref(batch_size()); 101 for (size_t iteration = 0; iteration < iterations(); iteration++) { 102 std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 103 std::generate(b.begin(), b.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 104 if (inplace_a() || inplace_b()) { 105 std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 106 } else { 107 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 108 } 109 const uint16_t* a_data = inplace_a() ? y.data() : a.data(); 110 const uint16_t* b_data = inplace_b() ? y.data() : b.data(); 111 112 // Compute reference results. 113 for (size_t i = 0; i < batch_size(); i++) { 114 switch (op_type) { 115 case OpType::Add: 116 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]); 117 break; 118 case OpType::Div: 119 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]); 120 break; 121 case OpType::Max: 122 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 123 break; 124 case OpType::Min: 125 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 126 break; 127 case OpType::Mul: 128 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]); 129 break; 130 case OpType::SqrDiff: 131 { 132 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 133 y_ref[i] = diff * diff; 134 break; 135 } 136 case OpType::Sub: 137 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 138 break; 139 } 140 } 141 142 // Call optimized micro-kernel. 143 vbinary(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), nullptr); 144 145 // Verify results. 146 for (size_t i = 0; i < batch_size(); i++) { 147 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 148 << "at " << i << " / " << batch_size(); 149 } 150 } 151 } 152 Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)153 void Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const { 154 std::random_device random_device; 155 auto rng = std::mt19937(random_device()); 156 std::uniform_real_distribution<float> f32dist(0.01f, 1.0f); 157 158 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 159 std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 160 std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 161 std::vector<float> y_ref(batch_size()); 162 for (size_t iteration = 0; iteration < iterations(); iteration++) { 163 std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 164 std::generate(b.begin(), b.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 165 if (inplace_a() || inplace_b()) { 166 std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 167 } else { 168 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 169 } 170 const uint16_t* a_data = inplace_a() ? y.data() : a.data(); 171 const uint16_t* b_data = inplace_b() ? y.data() : b.data(); 172 173 // Compute reference results. 174 for (size_t i = 0; i < batch_size(); i++) { 175 switch (op_type) { 176 case OpType::Add: 177 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]); 178 break; 179 case OpType::Div: 180 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]); 181 break; 182 case OpType::Max: 183 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 184 break; 185 case OpType::Min: 186 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 187 break; 188 case OpType::Mul: 189 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]); 190 break; 191 case OpType::SqrDiff: 192 { 193 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 194 y_ref[i] = diff * diff; 195 break; 196 } 197 case OpType::Sub: 198 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 199 break; 200 } 201 } 202 203 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 204 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 205 const float accumulated_range = accumulated_max - accumulated_min; 206 const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 207 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 208 +std::numeric_limits<float>::infinity())); 209 const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 210 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 211 -std::numeric_limits<float>::infinity())); 212 for (size_t i = 0; i < batch_size(); i++) { 213 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 214 } 215 216 // Prepare parameters. 217 xnn_f16_minmax_params params; 218 init_params(¶ms, 219 fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max)); 220 221 // Call optimized micro-kernel. 222 vbinary_minmax(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), ¶ms); 223 224 // Verify results. 225 for (size_t i = 0; i < batch_size(); i++) { 226 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 227 << "at " << i << " / " << batch_size(); 228 } 229 } 230 } 231 232 void Test(xnn_f32_vbinary_ukernel_function vbinary, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const { 233 std::random_device random_device; 234 auto rng = std::mt19937(random_device()); 235 std::uniform_real_distribution<float> f32dist(0.01f, 1.0f); 236 237 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 238 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 239 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 240 std::vector<float> y_ref(batch_size()); 241 for (size_t iteration = 0; iteration < iterations(); iteration++) { 242 std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); }); 243 std::generate(b.begin(), b.end(), [&]() { return f32dist(rng); }); 244 if (inplace_a() || inplace_b()) { 245 std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); }); 246 } else { 247 std::fill(y.begin(), y.end(), nanf("")); 248 } 249 const float* a_data = inplace_a() ? y.data() : a.data(); 250 const float* b_data = inplace_b() ? y.data() : b.data(); 251 252 // Compute reference results. 253 for (size_t i = 0; i < batch_size(); i++) { 254 switch (op_type) { 255 case OpType::Add: 256 y_ref[i] = a_data[i] + b_data[i]; 257 break; 258 case OpType::Div: 259 y_ref[i] = a_data[i] / b_data[i]; 260 break; 261 case OpType::Max: 262 y_ref[i] = std::max<float>(a_data[i], b_data[i]); 263 break; 264 case OpType::Min: 265 y_ref[i] = std::min<float>(a_data[i], b_data[i]); 266 break; 267 case OpType::Mul: 268 y_ref[i] = a_data[i] * b_data[i]; 269 break; 270 case OpType::SqrDiff: 271 { 272 const float diff = a_data[i] - b_data[i]; 273 y_ref[i] = diff * diff; 274 break; 275 } 276 case OpType::Sub: 277 y_ref[i] = a_data[i] - b_data[i]; 278 break; 279 } 280 } 281 282 // Prepare parameters. 283 xnn_f32_default_params params; 284 if (init_params) { 285 init_params(¶ms); 286 } 287 288 // Call optimized micro-kernel. 289 vbinary(batch_size() * sizeof(float), a_data, b_data, y.data(), init_params != nullptr ? ¶ms : nullptr); 290 291 // Verify results. 292 for (size_t i = 0; i < batch_size(); i++) { 293 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 294 << "at " << i << " / " << batch_size(); 295 } 296 } 297 } 298 Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu,OpType op_type)299 void Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu, OpType op_type) const { 300 std::random_device random_device; 301 auto rng = std::mt19937(random_device()); 302 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 303 304 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 305 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 306 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 307 std::vector<float> y_ref(batch_size()); 308 for (size_t iteration = 0; iteration < iterations(); iteration++) { 309 std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); }); 310 std::generate(b.begin(), b.end(), [&]() { return f32dist(rng); }); 311 if (inplace_a() || inplace_b()) { 312 std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); }); 313 } else { 314 std::fill(y.begin(), y.end(), nanf("")); 315 } 316 const float* a_data = inplace_a() ? y.data() : a.data(); 317 const float* b_data = inplace_b() ? y.data() : b.data(); 318 319 // Compute reference results. 320 for (size_t i = 0; i < batch_size(); i++) { 321 switch (op_type) { 322 case OpType::Add: 323 y_ref[i] = a_data[i] + b_data[i]; 324 break; 325 case OpType::Div: 326 y_ref[i] = a_data[i] / b_data[i]; 327 break; 328 case OpType::Max: 329 y_ref[i] = std::max<float>(a_data[i], b_data[i]); 330 break; 331 case OpType::Min: 332 y_ref[i] = std::min<float>(a_data[i], b_data[i]); 333 break; 334 case OpType::Mul: 335 y_ref[i] = a_data[i] * b_data[i]; 336 break; 337 case OpType::SqrDiff: 338 { 339 const float diff = a_data[i] - b_data[i]; 340 y_ref[i] = diff * diff; 341 break; 342 } 343 case OpType::Sub: 344 y_ref[i] = a_data[i] - b_data[i]; 345 break; 346 } 347 } 348 for (size_t i = 0; i < batch_size(); i++) { 349 y_ref[i] = std::max(y_ref[i], 0.0f); 350 } 351 352 // Call optimized micro-kernel. 353 vbinary_relu(batch_size() * sizeof(float), a_data, b_data, y.data(), nullptr); 354 355 // Verify results. 356 for (size_t i = 0; i < batch_size(); i++) { 357 ASSERT_GE(y[i], 0.0f) 358 << "at " << i << " / " << batch_size(); 359 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 360 << "at " << i << " / " << batch_size(); 361 } 362 } 363 } 364 Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)365 void Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const { 366 std::random_device random_device; 367 auto rng = std::mt19937(random_device()); 368 std::uniform_real_distribution<float> f32dist(0.01f, 1.0f); 369 370 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 371 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 372 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 373 std::vector<float> y_ref(batch_size()); 374 for (size_t iteration = 0; iteration < iterations(); iteration++) { 375 std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); }); 376 std::generate(b.begin(), b.end(), [&]() { return f32dist(rng); }); 377 if (inplace_a() || inplace_b()) { 378 std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); }); 379 } else { 380 std::fill(y.begin(), y.end(), nanf("")); 381 } 382 const float* a_data = inplace_a() ? y.data() : a.data(); 383 const float* b_data = inplace_b() ? y.data() : b.data(); 384 385 // Compute reference results. 386 for (size_t i = 0; i < batch_size(); i++) { 387 switch (op_type) { 388 case OpType::Add: 389 y_ref[i] = a_data[i] + b_data[i]; 390 break; 391 case OpType::Div: 392 y_ref[i] = a_data[i] / b_data[i]; 393 break; 394 case OpType::Max: 395 y_ref[i] = std::max<float>(a_data[i], b_data[i]); 396 break; 397 case OpType::Min: 398 y_ref[i] = std::min<float>(a_data[i], b_data[i]); 399 break; 400 case OpType::Mul: 401 y_ref[i] = a_data[i] * b_data[i]; 402 break; 403 case OpType::SqrDiff: 404 { 405 const float diff = a_data[i] - b_data[i]; 406 y_ref[i] = diff * diff; 407 break; 408 } 409 case OpType::Sub: 410 y_ref[i] = a_data[i] - b_data[i]; 411 break; 412 } 413 } 414 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 415 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 416 const float accumulated_range = accumulated_max - accumulated_min; 417 const float y_max = accumulated_range > 0.0f ? 418 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 419 +std::numeric_limits<float>::infinity(); 420 const float y_min = accumulated_range > 0.0f ? 421 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 422 -std::numeric_limits<float>::infinity(); 423 for (size_t i = 0; i < batch_size(); i++) { 424 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 425 } 426 427 // Prepare parameters. 428 xnn_f32_minmax_params params; 429 init_params(¶ms, y_min, y_max); 430 431 // Call optimized micro-kernel. 432 vbinary_minmax(batch_size() * sizeof(float), a_data, b_data, y.data(), ¶ms); 433 434 // Verify results. 435 for (size_t i = 0; i < batch_size(); i++) { 436 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 437 << "at " << i << " / " << batch_size(); 438 } 439 } 440 } 441 442 private: 443 size_t batch_size_{1}; 444 bool inplace_a_{false}; 445 bool inplace_b_{false}; 446 uint8_t qmin_{0}; 447 uint8_t qmax_{255}; 448 size_t iterations_{15}; 449 }; 450