1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #include <algorithm> 7 #include <cmath> 8 #include <cstddef> 9 #include <cstdint> 10 #include <cstdlib> 11 #include <iomanip> 12 #include <ios> 13 #include <vector> 14 15 #include <gtest/gtest.h> 16 17 #include <fp16.h> 18 19 #include <xnnpack/aligned-allocator.h> 20 #include <xnnpack/common.h> 21 #include <xnnpack/isa-checks.h> 22 #include <xnnpack/math.h> 23 #include <xnnpack/math-stubs.h> 24 25 26 constexpr int kBlockSize = 1024; 27 28 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(CVT__NEON,positive_normal)29 TEST(CVT__NEON, positive_normal) { 30 TEST_REQUIRES_ARM_NEON; 31 32 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 33 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 34 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 35 zero_point <= std::numeric_limits<uint8_t>::max(); 36 zero_point++) 37 { 38 const uint32_t max_input = float_as_uint32((float) (std::numeric_limits<uint8_t>::max() - zero_point)); 39 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 40 for (uint32_t i = 0; i < kBlockSize; i++) { 41 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, max_input)); 42 } 43 xnn_math_f32_qu8_cvt__neon(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 44 for (uint32_t i = 0; i < kBlockSize; i++) { 45 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 46 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 47 reference_output = std::numeric_limits<uint8_t>::max(); 48 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 49 reference_output = std::numeric_limits<uint8_t>::min(); 50 } 51 ASSERT_EQ(reference_output, long(outputs[i])) 52 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 53 << ", reference = " << std::dec << reference_output 54 << ", optimized = " << std::dec << uint32_t(outputs[i]) 55 << ", zero point = " << std::dec << zero_point; 56 } 57 } 58 } 59 } 60 TEST(CVT__NEON,negative_normal)61 TEST(CVT__NEON, negative_normal) { 62 TEST_REQUIRES_ARM_NEON; 63 64 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 65 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 66 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 67 zero_point <= std::numeric_limits<uint8_t>::max(); 68 zero_point++) 69 { 70 const uint32_t max_input = float_as_uint32((float) zero_point); 71 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 72 for (uint32_t i = 0; i < kBlockSize; i++) { 73 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 74 } 75 xnn_math_f32_qu8_cvt__neon(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 76 for (uint32_t i = 0; i < kBlockSize; i++) { 77 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 78 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 79 reference_output = std::numeric_limits<uint8_t>::max(); 80 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 81 reference_output = std::numeric_limits<uint8_t>::min(); 82 } 83 ASSERT_EQ(reference_output, long(outputs[i])) 84 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 85 << ", reference = " << std::dec << reference_output 86 << ", optimized = " << std::dec << uint32_t(outputs[i]) 87 << ", zero point = " << std::dec << zero_point; 88 } 89 } 90 } 91 } 92 TEST(CVT__NEON,positive_saturation)93 TEST(CVT__NEON, positive_saturation) { 94 TEST_REQUIRES_ARM_NEON; 95 96 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 97 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 98 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 99 zero_point <= std::numeric_limits<uint8_t>::max(); 100 zero_point++) 101 { 102 const uint32_t min_input = float_as_uint32((float) (std::numeric_limits<uint8_t>::max() - zero_point)); 103 const uint32_t max_input = UINT32_C(0x7F800000); 104 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 105 for (uint32_t i = 0; i < kBlockSize; i++) { 106 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, max_input)); 107 } 108 xnn_math_f32_qu8_cvt__neon(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 109 for (uint32_t i = 0; i < kBlockSize; i++) { 110 const int32_t reference_output = std::numeric_limits<uint8_t>::max(); 111 ASSERT_EQ(reference_output, uint32_t(outputs[i])) 112 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 113 << ", reference = " << std::dec << reference_output 114 << ", optimized = " << std::dec << uint32_t(outputs[i]) 115 << ", zero point = " << std::dec << zero_point; 116 } 117 } 118 } 119 } 120 TEST(CVT__NEON,negative_saturation)121 TEST(CVT__NEON, negative_saturation) { 122 TEST_REQUIRES_ARM_NEON; 123 124 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 125 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 126 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 127 zero_point <= std::numeric_limits<uint8_t>::max(); 128 zero_point++) 129 { 130 const uint32_t min_input = float_as_uint32((float) zero_point); 131 const uint32_t max_input = UINT32_C(0x7F800000); 132 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 133 for (uint32_t i = 0; i < kBlockSize; i++) { 134 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 135 } 136 xnn_math_f32_qu8_cvt__neon(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 137 for (uint32_t i = 0; i < kBlockSize; i++) { 138 const int32_t reference_output = std::numeric_limits<uint8_t>::min(); 139 ASSERT_EQ(reference_output, uint32_t(outputs[i])) 140 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 141 << ", reference = " << std::dec << reference_output 142 << ", optimized = " << std::dec << uint32_t(outputs[i]) 143 << ", zero point = " << std::dec << zero_point; 144 } 145 } 146 } 147 } 148 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 149 150 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(CVT__NEONV8,positive_normal)151 TEST(CVT__NEONV8, positive_normal) { 152 TEST_REQUIRES_ARM_NEON_V8; 153 154 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 155 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 156 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 157 zero_point <= std::numeric_limits<uint8_t>::max(); 158 zero_point++) 159 { 160 const uint32_t max_input = float_as_uint32((float) (std::numeric_limits<uint8_t>::max() - zero_point)); 161 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 162 for (uint32_t i = 0; i < kBlockSize; i++) { 163 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, max_input)); 164 } 165 xnn_math_f32_qu8_cvt__neonv8(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 166 for (uint32_t i = 0; i < kBlockSize; i++) { 167 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 168 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 169 reference_output = std::numeric_limits<uint8_t>::max(); 170 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 171 reference_output = std::numeric_limits<uint8_t>::min(); 172 } 173 ASSERT_EQ(reference_output, long(outputs[i])) 174 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 175 << ", reference = " << std::dec << reference_output 176 << ", optimized = " << std::dec << uint32_t(outputs[i]) 177 << ", zero point = " << std::dec << zero_point; 178 } 179 } 180 } 181 } 182 TEST(CVT__NEONV8,negative_normal)183 TEST(CVT__NEONV8, negative_normal) { 184 TEST_REQUIRES_ARM_NEON_V8; 185 186 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 187 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 188 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 189 zero_point <= std::numeric_limits<uint8_t>::max(); 190 zero_point++) 191 { 192 const uint32_t max_input = float_as_uint32((float) zero_point); 193 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 194 for (uint32_t i = 0; i < kBlockSize; i++) { 195 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 196 } 197 xnn_math_f32_qu8_cvt__neonv8(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 198 for (uint32_t i = 0; i < kBlockSize; i++) { 199 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 200 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 201 reference_output = std::numeric_limits<uint8_t>::max(); 202 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 203 reference_output = std::numeric_limits<uint8_t>::min(); 204 } 205 ASSERT_EQ(reference_output, long(outputs[i])) 206 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 207 << ", reference = " << std::dec << reference_output 208 << ", optimized = " << std::dec << uint32_t(outputs[i]) 209 << ", zero point = " << std::dec << zero_point; 210 } 211 } 212 } 213 } 214 TEST(CVT__NEONV8,positive_saturation)215 TEST(CVT__NEONV8, positive_saturation) { 216 TEST_REQUIRES_ARM_NEON_V8; 217 218 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 219 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 220 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 221 zero_point <= std::numeric_limits<uint8_t>::max(); 222 zero_point++) 223 { 224 const uint32_t min_input = float_as_uint32((float) (std::numeric_limits<uint8_t>::max() - zero_point)); 225 const uint32_t max_input = UINT32_C(0x7F800000); 226 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 227 for (uint32_t i = 0; i < kBlockSize; i++) { 228 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, max_input)); 229 } 230 xnn_math_f32_qu8_cvt__neonv8(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 231 for (uint32_t i = 0; i < kBlockSize; i++) { 232 const int32_t reference_output = std::numeric_limits<uint8_t>::max(); 233 ASSERT_EQ(reference_output, uint32_t(outputs[i])) 234 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 235 << ", reference = " << std::dec << reference_output 236 << ", optimized = " << std::dec << uint32_t(outputs[i]) 237 << ", zero point = " << std::dec << zero_point; 238 } 239 } 240 } 241 } 242 TEST(CVT__NEONV8,negative_saturation)243 TEST(CVT__NEONV8, negative_saturation) { 244 TEST_REQUIRES_ARM_NEON_V8; 245 246 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 247 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 248 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 249 zero_point <= std::numeric_limits<uint8_t>::max(); 250 zero_point++) 251 { 252 const uint32_t min_input = float_as_uint32((float) zero_point); 253 const uint32_t max_input = UINT32_C(0x7F800000); 254 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 255 for (uint32_t i = 0; i < kBlockSize; i++) { 256 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 257 } 258 xnn_math_f32_qu8_cvt__neonv8(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 259 for (uint32_t i = 0; i < kBlockSize; i++) { 260 const int32_t reference_output = std::numeric_limits<uint8_t>::min(); 261 ASSERT_EQ(reference_output, uint32_t(outputs[i])) 262 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 263 << ", reference = " << std::dec << reference_output 264 << ", optimized = " << std::dec << uint32_t(outputs[i]) 265 << ", zero point = " << std::dec << zero_point; 266 } 267 } 268 } 269 } 270 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 271 272 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD TEST(CVT__WASMSIMD,positive_normal)273 TEST(CVT__WASMSIMD, positive_normal) { 274 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 275 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 276 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 277 zero_point <= std::numeric_limits<uint8_t>::max(); 278 zero_point++) 279 { 280 const uint32_t max_input = float_as_uint32((float) (std::numeric_limits<uint8_t>::max() - zero_point)); 281 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 282 for (uint32_t i = 0; i < kBlockSize; i++) { 283 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, max_input)); 284 } 285 xnn_math_f32_qu8_cvt__wasmsimd(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 286 for (uint32_t i = 0; i < kBlockSize; i++) { 287 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 288 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 289 reference_output = std::numeric_limits<uint8_t>::max(); 290 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 291 reference_output = std::numeric_limits<uint8_t>::min(); 292 } 293 ASSERT_EQ(reference_output, long(outputs[i])) 294 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 295 << ", reference = " << std::dec << reference_output 296 << ", optimized = " << std::dec << uint32_t(outputs[i]) 297 << ", zero point = " << std::dec << zero_point; 298 } 299 } 300 } 301 } 302 TEST(CVT__WASMSIMD,negative_normal)303 TEST(CVT__WASMSIMD, negative_normal) { 304 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 305 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 306 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 307 zero_point <= std::numeric_limits<uint8_t>::max(); 308 zero_point++) 309 { 310 const uint32_t max_input = float_as_uint32((float) zero_point); 311 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 312 for (uint32_t i = 0; i < kBlockSize; i++) { 313 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 314 } 315 xnn_math_f32_qu8_cvt__wasmsimd(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 316 for (uint32_t i = 0; i < kBlockSize; i++) { 317 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 318 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 319 reference_output = std::numeric_limits<uint8_t>::max(); 320 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 321 reference_output = std::numeric_limits<uint8_t>::min(); 322 } 323 ASSERT_EQ(reference_output, long(outputs[i])) 324 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 325 << ", reference = " << std::dec << reference_output 326 << ", optimized = " << std::dec << uint32_t(outputs[i]) 327 << ", zero point = " << std::dec << zero_point; 328 } 329 } 330 } 331 } 332 TEST(CVT__WASMSIMD,positive_saturation)333 TEST(CVT__WASMSIMD, positive_saturation) { 334 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 335 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 336 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 337 zero_point <= std::numeric_limits<uint8_t>::max(); 338 zero_point++) 339 { 340 const uint32_t min_input = float_as_uint32((float) (std::numeric_limits<uint8_t>::max() - zero_point)); 341 const uint32_t max_input = UINT32_C(0x7F800000); 342 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 343 for (uint32_t i = 0; i < kBlockSize; i++) { 344 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, max_input)); 345 } 346 xnn_math_f32_qu8_cvt__wasmsimd(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 347 for (uint32_t i = 0; i < kBlockSize; i++) { 348 const int32_t reference_output = std::numeric_limits<uint8_t>::max(); 349 ASSERT_EQ(reference_output, uint32_t(outputs[i])) 350 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 351 << ", reference = " << std::dec << reference_output 352 << ", optimized = " << std::dec << uint32_t(outputs[i]) 353 << ", zero point = " << std::dec << zero_point; 354 } 355 } 356 } 357 } 358 TEST(CVT__WASMSIMD,negative_saturation)359 TEST(CVT__WASMSIMD, negative_saturation) { 360 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 361 std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> outputs(kBlockSize); 362 for (int32_t zero_point = std::numeric_limits<uint8_t>::min(); 363 zero_point <= std::numeric_limits<uint8_t>::max(); 364 zero_point++) 365 { 366 const uint32_t min_input = float_as_uint32((float) zero_point); 367 const uint32_t max_input = UINT32_C(0x7F800000); 368 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 369 for (uint32_t i = 0; i < kBlockSize; i++) { 370 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 371 } 372 xnn_math_f32_qu8_cvt__wasmsimd(kBlockSize * sizeof(uint8_t), inputs.data(), outputs.data(), uint8_t(zero_point)); 373 for (uint32_t i = 0; i < kBlockSize; i++) { 374 const int32_t reference_output = std::numeric_limits<uint8_t>::min(); 375 ASSERT_EQ(reference_output, uint32_t(outputs[i])) 376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 377 << ", reference = " << std::dec << reference_output 378 << ", optimized = " << std::dec << uint32_t(outputs[i]) 379 << ", zero point = " << std::dec << zero_point; 380 } 381 } 382 } 383 } 384 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD 385