1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cstddef> 13 #include <cstdlib> 14 #include <functional> 15 #include <limits> 16 #include <random> 17 #include <vector> 18 19 #include <xnnpack.h> 20 #include <xnnpack/microfnptr.h> 21 #include <xnnpack/microparams-init.h> 22 #include <xnnpack/requantization.h> 23 24 25 class VMulMicrokernelTester { 26 public: batch_size(size_t batch_size)27 inline VMulMicrokernelTester& batch_size(size_t batch_size) { 28 assert(batch_size != 0); 29 this->batch_size_ = batch_size; 30 return *this; 31 } 32 batch_size()33 inline size_t batch_size() const { 34 return this->batch_size_; 35 } 36 inplace_a(bool inplace_a)37 inline VMulMicrokernelTester& inplace_a(bool inplace_a) { 38 this->inplace_a_ = inplace_a; 39 return *this; 40 } 41 inplace_a()42 inline bool inplace_a() const { 43 return this->inplace_a_; 44 } 45 inplace_b(bool inplace_b)46 inline VMulMicrokernelTester& inplace_b(bool inplace_b) { 47 this->inplace_b_ = inplace_b; 48 return *this; 49 } 50 inplace_b()51 inline bool inplace_b() const { 52 return this->inplace_b_; 53 } 54 a_scale(float a_scale)55 inline VMulMicrokernelTester& a_scale(float a_scale) { 56 assert(a_scale > 0.0f); 57 assert(std::isnormal(a_scale)); 58 this->a_scale_ = a_scale; 59 return *this; 60 } 61 a_scale()62 inline float a_scale() const { 63 return this->a_scale_; 64 } 65 a_zero_point(uint8_t a_zero_point)66 inline VMulMicrokernelTester& a_zero_point(uint8_t a_zero_point) { 67 this->a_zero_point_ = a_zero_point; 68 return *this; 69 } 70 a_zero_point()71 inline uint8_t a_zero_point() const { 72 return this->a_zero_point_; 73 } 74 b_scale(float b_scale)75 inline VMulMicrokernelTester& b_scale(float b_scale) { 76 assert(b_scale > 0.0f); 77 assert(std::isnormal(b_scale)); 78 this->b_scale_ = b_scale; 79 return *this; 80 } 81 b_scale()82 inline float b_scale() const { 83 return this->b_scale_; 84 } 85 b_zero_point(uint8_t b_zero_point)86 inline VMulMicrokernelTester& b_zero_point(uint8_t b_zero_point) { 87 this->b_zero_point_ = b_zero_point; 88 return *this; 89 } 90 b_zero_point()91 inline uint8_t b_zero_point() const { 92 return this->b_zero_point_; 93 } 94 y_scale(float y_scale)95 inline VMulMicrokernelTester& y_scale(float y_scale) { 96 assert(y_scale > 0.0f); 97 assert(std::isnormal(y_scale)); 98 this->y_scale_ = y_scale; 99 return *this; 100 } 101 y_scale()102 inline float y_scale() const { 103 return this->y_scale_; 104 } 105 y_zero_point(uint8_t y_zero_point)106 inline VMulMicrokernelTester& y_zero_point(uint8_t y_zero_point) { 107 this->y_zero_point_ = y_zero_point; 108 return *this; 109 } 110 y_zero_point()111 inline uint8_t y_zero_point() const { 112 return this->y_zero_point_; 113 } 114 qmin(uint8_t qmin)115 inline VMulMicrokernelTester& qmin(uint8_t qmin) { 116 this->qmin_ = qmin; 117 return *this; 118 } 119 qmin()120 inline uint8_t qmin() const { 121 return this->qmin_; 122 } 123 qmax(uint8_t qmax)124 inline VMulMicrokernelTester& qmax(uint8_t qmax) { 125 this->qmax_ = qmax; 126 return *this; 127 } 128 qmax()129 inline uint8_t qmax() const { 130 return this->qmax_; 131 } 132 iterations(size_t iterations)133 inline VMulMicrokernelTester& iterations(size_t iterations) { 134 this->iterations_ = iterations; 135 return *this; 136 } 137 iterations()138 inline size_t iterations() const { 139 return this->iterations_; 140 } 141 Test(xnn_qu8_vmul_minmax_ukernel_function vmul_minmax,xnn_init_qu8_mul_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)142 void Test( 143 xnn_qu8_vmul_minmax_ukernel_function vmul_minmax, 144 xnn_init_qu8_mul_minmax_params_fn init_params, 145 xnn_qu8_requantize_fn requantize) const 146 { 147 std::random_device random_device; 148 auto rng = std::mt19937(random_device()); 149 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 150 151 std::vector<uint8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 152 std::vector<uint8_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 153 std::vector<uint8_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0)); 154 std::vector<float> y_fp(batch_size()); 155 std::vector<uint8_t> y_ref(batch_size()); 156 for (size_t iteration = 0; iteration < iterations(); iteration++) { 157 std::generate(a.begin(), a.end(), std::ref(u8rng)); 158 std::generate(b.begin(), b.end(), std::ref(u8rng)); 159 if (inplace_a() || inplace_b()) { 160 std::generate(y.begin(), y.end(), std::ref(u8rng)); 161 } else { 162 std::fill(y.begin(), y.end(), 0xA5); 163 } 164 const uint8_t* a_data = inplace_a() ? y.data() : a.data(); 165 const uint8_t* b_data = inplace_b() ? y.data() : b.data(); 166 167 // Prepare parameters. 168 const float product_scale = a_scale() * b_scale(); 169 const float product_output_scale = product_scale / y_scale(); 170 xnn_qu8_mul_minmax_params quantization_params; 171 init_params( 172 &quantization_params, 173 a_zero_point(), b_zero_point(), y_zero_point(), 174 product_output_scale, qmin(), qmax()); 175 176 // Compute reference results. 177 for (size_t i = 0; i < batch_size(); i++) { 178 const int32_t acc = 179 (int32_t(a_data[i]) - int32_t(a_zero_point())) * (int32_t(b_data[i]) - int32_t(b_zero_point())); 180 y_fp[i] = float(y_zero_point()) + product_output_scale * float(acc); 181 y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax()))); 182 y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin()))); 183 y_ref[i] = requantize( 184 acc, product_output_scale, y_zero_point(), qmin(), qmax()); 185 } 186 187 // Call optimized micro-kernel. 188 vmul_minmax(batch_size(), a_data, b_data, y.data(), &quantization_params); 189 190 // Verify results. 191 for (size_t i = 0; i < batch_size(); i++) { 192 ASSERT_LE(uint32_t(y[i]), uint32_t(qmax())) 193 << "at element " << i << " / " << batch_size(); 194 ASSERT_GE(uint32_t(y[i]), uint32_t(qmin())) 195 << "at element " << i << " / " << batch_size(); 196 ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f) 197 << "at element " << i << " / " << batch_size(); 198 ASSERT_EQ(uint32_t(y[i]), uint32_t(y_ref[i])) 199 << "at element " << i << " / " << batch_size(); 200 } 201 } 202 } 203 Test(xnn_qs8_vmul_minmax_ukernel_function vmul_minmax,xnn_init_qs8_mul_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)204 void Test( 205 xnn_qs8_vmul_minmax_ukernel_function vmul_minmax, 206 xnn_init_qs8_mul_minmax_params_fn init_params, 207 xnn_qs8_requantize_fn requantize) const 208 { 209 std::random_device random_device; 210 auto rng = std::mt19937(random_device()); 211 auto i8rng = std::bind( 212 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), 213 rng); 214 215 std::vector<int8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); 216 std::vector<int8_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); 217 std::vector<int8_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0)); 218 std::vector<float> y_fp(batch_size()); 219 std::vector<int8_t> y_ref(batch_size()); 220 for (size_t iteration = 0; iteration < iterations(); iteration++) { 221 std::generate(a.begin(), a.end(), std::ref(i8rng)); 222 std::generate(b.begin(), b.end(), std::ref(i8rng)); 223 if (inplace_a() || inplace_b()) { 224 std::generate(y.begin(), y.end(), std::ref(i8rng)); 225 } else { 226 std::fill(y.begin(), y.end(), 0xA5); 227 } 228 const int8_t* a_data = inplace_a() ? y.data() : a.data(); 229 const int8_t* b_data = inplace_b() ? y.data() : b.data(); 230 231 // Prepare parameters. 232 const float product_scale = a_scale() * b_scale(); 233 const float product_output_scale = product_scale / y_scale(); 234 EXPECT_GE(product_output_scale, 0x1.0p-32f); 235 xnn_qs8_mul_minmax_params quantization_params; 236 init_params( 237 &quantization_params, 238 int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80), 239 product_output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 240 241 // Compute reference results. 242 for (size_t i = 0; i < batch_size(); i++) { 243 const int32_t acc = 244 (int32_t(a_data[i]) - int32_t(a_zero_point() - 0x80)) * (int32_t(b_data[i]) - int32_t(b_zero_point() - 0x80)); 245 y_fp[i] = float(y_zero_point() - 0x80) + product_output_scale * float(acc); 246 y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax() - 0x80))); 247 y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin() - 0x80))); 248 y_ref[i] = requantize( 249 acc, product_output_scale, int8_t(y_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 250 } 251 252 // Call optimized micro-kernel. 253 vmul_minmax(batch_size(), a_data, b_data, y.data(), &quantization_params); 254 255 // Verify results. 256 for (size_t i = 0; i < batch_size(); i++) { 257 ASSERT_LE(int32_t(y[i]), int32_t(qmax() - 0x80)) 258 << "at element " << i << " / " << batch_size(); 259 ASSERT_GE(int32_t(y[i]), int32_t(qmin() - 0x80)) 260 << "at element " << i << " / " << batch_size(); 261 ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i])) 262 << "at element " << i << " / " << batch_size(); 263 ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f) 264 << "at element " << i << " / " << batch_size(); 265 } 266 } 267 } 268 269 private: 270 size_t batch_size_{1}; 271 bool inplace_a_{false}; 272 bool inplace_b_{false}; 273 float a_scale_{0.75f}; 274 float b_scale_{1.25f}; 275 float y_scale_{0.96875f}; 276 uint8_t a_zero_point_{121}; 277 uint8_t b_zero_point_{127}; 278 uint8_t y_zero_point_{133}; 279 uint8_t qmin_{0}; 280 uint8_t qmax_{255}; 281 size_t iterations_{15}; 282 }; 283