1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2020 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cstddef> 16 #include <cstdlib> 17 #include <functional> 18 #include <limits> 19 #include <random> 20 #include <vector> 21 22 #include <xnnpack.h> 23 #include <xnnpack/microfnptr.h> 24 #include <xnnpack/microparams-init.h> 25 #include <xnnpack/requantization.h> 26 27 28 class VAddCMicrokernelTester { 29 public: batch_size(size_t batch_size)30 inline VAddCMicrokernelTester& batch_size(size_t batch_size) { 31 assert(batch_size != 0); 32 this->batch_size_ = batch_size; 33 return *this; 34 } 35 batch_size()36 inline size_t batch_size() const { 37 return this->batch_size_; 38 } 39 inplace(bool inplace)40 inline VAddCMicrokernelTester& inplace(bool inplace) { 41 this->inplace_ = inplace; 42 return *this; 43 } 44 inplace()45 inline bool inplace() const { 46 return this->inplace_; 47 } 48 a_scale(float a_scale)49 inline VAddCMicrokernelTester& a_scale(float a_scale) { 50 assert(a_scale > 0.0f); 51 assert(std::isnormal(a_scale)); 52 this->a_scale_ = a_scale; 53 return *this; 54 } 55 a_scale()56 inline float a_scale() const { 57 return this->a_scale_; 58 } 59 a_zero_point(uint8_t a_zero_point)60 inline VAddCMicrokernelTester& a_zero_point(uint8_t a_zero_point) { 61 this->a_zero_point_ = a_zero_point; 62 return *this; 63 } 64 a_zero_point()65 inline uint8_t a_zero_point() const { 66 return this->a_zero_point_; 67 } 68 b_scale(float b_scale)69 inline VAddCMicrokernelTester& b_scale(float b_scale) { 70 assert(b_scale > 0.0f); 71 assert(std::isnormal(b_scale)); 72 this->b_scale_ = b_scale; 73 return *this; 74 } 75 b_scale()76 inline float b_scale() const { 77 return this->b_scale_; 78 } 79 b_zero_point(uint8_t b_zero_point)80 inline VAddCMicrokernelTester& b_zero_point(uint8_t b_zero_point) { 81 this->b_zero_point_ = b_zero_point; 82 return *this; 83 } 84 b_zero_point()85 inline uint8_t b_zero_point() const { 86 return this->b_zero_point_; 87 } 88 y_scale(float y_scale)89 inline VAddCMicrokernelTester& y_scale(float y_scale) { 90 assert(y_scale > 0.0f); 91 assert(std::isnormal(y_scale)); 92 this->y_scale_ = y_scale; 93 return *this; 94 } 95 y_scale()96 inline float y_scale() const { 97 return this->y_scale_; 98 } 99 y_zero_point(uint8_t y_zero_point)100 inline VAddCMicrokernelTester& y_zero_point(uint8_t y_zero_point) { 101 this->y_zero_point_ = y_zero_point; 102 return *this; 103 } 104 y_zero_point()105 inline uint8_t y_zero_point() const { 106 return this->y_zero_point_; 107 } 108 qmin(uint8_t qmin)109 inline VAddCMicrokernelTester& qmin(uint8_t qmin) { 110 this->qmin_ = qmin; 111 return *this; 112 } 113 qmin()114 inline uint8_t qmin() const { 115 return this->qmin_; 116 } 117 qmax(uint8_t qmax)118 inline VAddCMicrokernelTester& qmax(uint8_t qmax) { 119 this->qmax_ = qmax; 120 return *this; 121 } 122 qmax()123 inline uint8_t qmax() const { 124 return this->qmax_; 125 } 126 iterations(size_t iterations)127 inline VAddCMicrokernelTester& iterations(size_t iterations) { 128 this->iterations_ = iterations; 129 return *this; 130 } 131 iterations()132 inline size_t iterations() const { 133 return this->iterations_; 134 } 135 Test(xnn_qu8_vadd_minmax_ukernel_function vaddc_minmax,xnn_init_qu8_add_minmax_params_fn init_params)136 void Test(xnn_qu8_vadd_minmax_ukernel_function vaddc_minmax, xnn_init_qu8_add_minmax_params_fn init_params) const { 137 std::random_device random_device; 138 auto rng = std::mt19937(random_device()); 139 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 140 141 std::vector<uint8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 142 std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0)); 143 std::vector<float> y_fp(batch_size()); 144 std::vector<uint8_t> y_ref(batch_size()); 145 for (size_t iteration = 0; iteration < iterations(); iteration++) { 146 std::generate(a.begin(), a.end(), std::ref(u8rng)); 147 if (inplace()) { 148 std::generate(y.begin(), y.end(), std::ref(u8rng)); 149 } else { 150 std::fill(y.begin(), y.end(), 0xA5); 151 } 152 const uint8_t* a_data = inplace() ? y.data() : a.data(); 153 const uint8_t b = u8rng(); 154 155 // Prepare parameters. 156 xnn_qu8_add_minmax_params quantization_params; 157 init_params( 158 &quantization_params, 159 a_zero_point(), b_zero_point(), y_zero_point(), 160 a_scale() / y_scale(), b_scale() / y_scale(), 161 qmin(), qmax()); 162 xnn_qu8_add_minmax_params scalar_quantization_params; 163 xnn_init_qu8_add_minmax_scalar_params( 164 &scalar_quantization_params, 165 a_zero_point(), b_zero_point(), y_zero_point(), 166 a_scale() / y_scale(), b_scale() / y_scale(), 167 qmin(), qmax()); 168 169 // Compute reference results. 170 for (size_t i = 0; i < batch_size(); i++) { 171 y_fp[i] = float(y_zero_point()) + 172 float(int32_t(a_data[i]) - int32_t(a_zero_point())) * (a_scale() / y_scale()) + 173 float(int32_t(b) - int32_t(b_zero_point())) * (b_scale() / y_scale()); 174 y_fp[i] = std::min<float>(y_fp[i], float(qmax())); 175 y_fp[i] = std::max<float>(y_fp[i], float(qmin())); 176 y_ref[i] = xnn_qu8_quantize_add(a_data[i], b, scalar_quantization_params); 177 } 178 179 // Call optimized micro-kernel. 180 vaddc_minmax(batch_size(), a_data, &b, y.data(), &quantization_params); 181 182 // Verify results. 183 for (size_t i = 0; i < batch_size(); i++) { 184 ASSERT_LE(uint32_t(y[i]), uint32_t(qmax())) 185 << "at element " << i << " / " << batch_size(); 186 ASSERT_GE(uint32_t(y[i]), uint32_t(qmin())) 187 << "at element " << i << " / " << batch_size(); 188 ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f) 189 << "at element " << i << " / " << batch_size(); 190 ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i])) 191 << "at element " << i << " / " << batch_size(); 192 } 193 } 194 } 195 Test(xnn_qs8_vadd_minmax_ukernel_function vaddc_minmax,xnn_init_qs8_add_minmax_params_fn init_params)196 void Test(xnn_qs8_vadd_minmax_ukernel_function vaddc_minmax, xnn_init_qs8_add_minmax_params_fn init_params) const { 197 std::random_device random_device; 198 auto rng = std::mt19937(random_device()); 199 auto i8rng = std::bind( 200 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng); 201 202 std::vector<int8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); 203 std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0)); 204 std::vector<float> y_fp(batch_size()); 205 std::vector<int8_t> y_ref(batch_size()); 206 for (size_t iteration = 0; iteration < iterations(); iteration++) { 207 std::generate(a.begin(), a.end(), std::ref(i8rng)); 208 if (inplace()) { 209 std::generate(y.begin(), y.end(), std::ref(i8rng)); 210 } else { 211 std::fill(y.begin(), y.end(), 0xA5); 212 } 213 const int8_t* a_data = inplace() ? y.data() : a.data(); 214 const int8_t b = i8rng(); 215 216 // Prepare parameters. 217 xnn_qs8_add_minmax_params quantization_params; 218 init_params( 219 &quantization_params, 220 int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80), 221 a_scale() / y_scale(), b_scale() / y_scale(), 222 int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 223 xnn_qs8_add_minmax_params scalar_quantization_params; 224 xnn_init_qs8_add_minmax_scalar_params( 225 &scalar_quantization_params, 226 int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80), 227 a_scale() / y_scale(), b_scale() / y_scale(), 228 int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 229 230 // Compute reference results. 231 for (size_t i = 0; i < batch_size(); i++) { 232 y_fp[i] = float(int32_t(y_zero_point() - 0x80)) + 233 float(int32_t(a_data[i]) - int32_t(a_zero_point() - 0x80)) * (a_scale() / y_scale()) + 234 float(int32_t(b) - int32_t(b_zero_point() - 0x80)) * (b_scale() / y_scale()); 235 y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax() - 0x80))); 236 y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin() - 0x80))); 237 y_ref[i] = xnn_qs8_quantize_add(a_data[i], b, scalar_quantization_params); 238 } 239 240 // Call optimized micro-kernel. 241 vaddc_minmax(batch_size(), a_data, &b, y.data(), &quantization_params); 242 243 // Verify results. 244 for (size_t i = 0; i < batch_size(); i++) { 245 ASSERT_LE(int32_t(y[i]), int32_t(qmax() - 0x80)) 246 << "at element " << i << " / " << batch_size(); 247 ASSERT_GE(int32_t(y[i]), int32_t(qmin() - 0x80)) 248 << "at element " << i << " / " << batch_size(); 249 ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i])) 250 << "at element " << i << " / " << batch_size(); 251 ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f) 252 << "at element " << i << " / " << batch_size(); 253 } 254 } 255 } 256 257 private: 258 size_t batch_size_{1}; 259 bool inplace_{false}; 260 float a_scale_{0.75f}; 261 float b_scale_{1.25f}; 262 float y_scale_{0.96875f}; 263 uint8_t a_zero_point_{121}; 264 uint8_t b_zero_point_{127}; 265 uint8_t y_zero_point_{133}; 266 uint8_t qmin_{0}; 267 uint8_t qmax_{255}; 268 size_t iterations_{15}; 269 }; 270