1 // Copyright 2020 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #include <algorithm> 7 #include <cmath> 8 #include <cstddef> 9 #include <cstdint> 10 #include <cstdlib> 11 #include <iomanip> 12 #include <ios> 13 #include <vector> 14 15 #include <gtest/gtest.h> 16 17 #include <fp16.h> 18 19 #include <xnnpack/aligned-allocator.h> 20 #include <xnnpack/common.h> 21 #include <xnnpack/isa-checks.h> 22 #include <xnnpack/math.h> 23 #include <xnnpack/math-stubs.h> 24 25 26 constexpr int kBlockSize = 1024; 27 28 29 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(EXP__NEONFMA_RR2_LUT64_P2,negative_zero)30 TEST(EXP__NEONFMA_RR2_LUT64_P2, negative_zero) { 31 TEST_REQUIRES_ARM_NEON_FMA; 32 33 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 34 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 35 std::fill(inputs.begin(), inputs.end(), -0.0f); 36 xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 37 const float reference_output = 1.0f; 38 ASSERT_EQ(reference_output, outputs[0]) 39 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 40 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 41 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 42 } 43 TEST(EXP__NEONFMA_RR2_LUT64_P2,positive_zero)44 TEST(EXP__NEONFMA_RR2_LUT64_P2, positive_zero) { 45 TEST_REQUIRES_ARM_NEON_FMA; 46 47 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 48 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 49 std::fill(inputs.begin(), inputs.end(), +0.0f); 50 xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 51 const float reference_output = 1.0f; 52 ASSERT_EQ(reference_output, outputs[0]) 53 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 54 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 55 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 56 } 57 TEST(EXP__NEONFMA_RR2_LUT64_P2,negative_saturation)58 TEST(EXP__NEONFMA_RR2_LUT64_P2, negative_saturation) { 59 TEST_REQUIRES_ARM_NEON_FMA; 60 61 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 62 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 63 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 64 for (uint32_t i = 0; i < kBlockSize; i++) { 65 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 66 } 67 xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 68 for (uint32_t i = 0; i < kBlockSize; i++) { 69 const uint32_t reference_output = UINT32_C(0x00000000); 70 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 71 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 72 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 73 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 74 } 75 } 76 } 77 TEST(EXP__NEONFMA_RR2_LUT64_P2,positive_overflow)78 TEST(EXP__NEONFMA_RR2_LUT64_P2, positive_overflow) { 79 TEST_REQUIRES_ARM_NEON_FMA; 80 81 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 82 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 83 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 84 for (uint32_t i = 0; i < kBlockSize; i++) { 85 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 86 } 87 xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 88 for (uint32_t i = 0; i < kBlockSize; i++) { 89 const uint32_t reference_output = UINT32_C(0x7F800000); 90 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 91 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 92 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 93 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 94 } 95 } 96 } 97 TEST(EXP__NEONFMA_RR2_LUT64_P2,positive_nan)98 TEST(EXP__NEONFMA_RR2_LUT64_P2, positive_nan) { 99 TEST_REQUIRES_ARM_NEON_FMA; 100 101 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 102 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 103 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 104 for (uint32_t i = 0; i < kBlockSize; i++) { 105 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 106 } 107 xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 108 for (uint32_t i = 0; i < kBlockSize; i++) { 109 ASSERT_TRUE(std::isnan(outputs[i])) 110 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 111 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 112 } 113 } 114 } 115 TEST(EXP__NEONFMA_RR2_LUT64_P2,negative_nan)116 TEST(EXP__NEONFMA_RR2_LUT64_P2, negative_nan) { 117 TEST_REQUIRES_ARM_NEON_FMA; 118 119 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 120 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 121 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 122 for (uint32_t i = 0; i < kBlockSize; i++) { 123 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 124 } 125 xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 126 for (uint32_t i = 0; i < kBlockSize; i++) { 127 ASSERT_TRUE(std::isnan(outputs[i])) 128 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 129 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 130 } 131 } 132 } 133 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 134 135 136 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(EXP__NEONFMA_RR2_P5,negative_zero)137 TEST(EXP__NEONFMA_RR2_P5, negative_zero) { 138 TEST_REQUIRES_ARM_NEON_FMA; 139 140 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 141 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 142 std::fill(inputs.begin(), inputs.end(), -0.0f); 143 xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 144 const float reference_output = 1.0f; 145 ASSERT_EQ(reference_output, outputs[0]) 146 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 147 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 148 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 149 } 150 TEST(EXP__NEONFMA_RR2_P5,positive_zero)151 TEST(EXP__NEONFMA_RR2_P5, positive_zero) { 152 TEST_REQUIRES_ARM_NEON_FMA; 153 154 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 155 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 156 std::fill(inputs.begin(), inputs.end(), +0.0f); 157 xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 158 const float reference_output = 1.0f; 159 ASSERT_EQ(reference_output, outputs[0]) 160 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 161 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 162 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 163 } 164 TEST(EXP__NEONFMA_RR2_P5,negative_saturation)165 TEST(EXP__NEONFMA_RR2_P5, negative_saturation) { 166 TEST_REQUIRES_ARM_NEON_FMA; 167 168 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 169 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 170 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 171 for (uint32_t i = 0; i < kBlockSize; i++) { 172 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 173 } 174 xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 175 for (uint32_t i = 0; i < kBlockSize; i++) { 176 const uint32_t reference_output = UINT32_C(0x00000000); 177 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 178 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 179 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 180 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 181 } 182 } 183 } 184 TEST(EXP__NEONFMA_RR2_P5,positive_overflow)185 TEST(EXP__NEONFMA_RR2_P5, positive_overflow) { 186 TEST_REQUIRES_ARM_NEON_FMA; 187 188 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 189 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 190 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 191 for (uint32_t i = 0; i < kBlockSize; i++) { 192 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 193 } 194 xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 195 for (uint32_t i = 0; i < kBlockSize; i++) { 196 const uint32_t reference_output = UINT32_C(0x7F800000); 197 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 198 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 199 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 200 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 201 } 202 } 203 } 204 TEST(EXP__NEONFMA_RR2_P5,positive_nan)205 TEST(EXP__NEONFMA_RR2_P5, positive_nan) { 206 TEST_REQUIRES_ARM_NEON_FMA; 207 208 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 209 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 210 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 211 for (uint32_t i = 0; i < kBlockSize; i++) { 212 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 213 } 214 xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 215 for (uint32_t i = 0; i < kBlockSize; i++) { 216 ASSERT_TRUE(std::isnan(outputs[i])) 217 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 218 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 219 } 220 } 221 } 222 TEST(EXP__NEONFMA_RR2_P5,negative_nan)223 TEST(EXP__NEONFMA_RR2_P5, negative_nan) { 224 TEST_REQUIRES_ARM_NEON_FMA; 225 226 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 227 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 228 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 229 for (uint32_t i = 0; i < kBlockSize; i++) { 230 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 231 } 232 xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 233 for (uint32_t i = 0; i < kBlockSize; i++) { 234 ASSERT_TRUE(std::isnan(outputs[i])) 235 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 236 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 237 } 238 } 239 } 240 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 241 242 243 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,negative_zero)244 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, negative_zero) { 245 TEST_REQUIRES_X86_AVX512F; 246 247 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 248 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 249 std::fill(inputs.begin(), inputs.end(), -0.0f); 250 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 251 const float reference_output = 1.0f; 252 ASSERT_EQ(reference_output, outputs[0]) 253 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 254 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 255 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 256 } 257 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,positive_zero)258 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, positive_zero) { 259 TEST_REQUIRES_X86_AVX512F; 260 261 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 262 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 263 std::fill(inputs.begin(), inputs.end(), +0.0f); 264 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 265 const float reference_output = 1.0f; 266 ASSERT_EQ(reference_output, outputs[0]) 267 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 268 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 269 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 270 } 271 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,negative_saturation)272 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, negative_saturation) { 273 TEST_REQUIRES_X86_AVX512F; 274 275 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 276 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 277 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 278 for (uint32_t i = 0; i < kBlockSize; i++) { 279 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 280 } 281 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 282 for (uint32_t i = 0; i < kBlockSize; i++) { 283 const uint32_t reference_output = UINT32_C(0x00000000); 284 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 285 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 286 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 287 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 288 } 289 } 290 } 291 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,positive_overflow)292 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, positive_overflow) { 293 TEST_REQUIRES_X86_AVX512F; 294 295 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 296 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 297 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 298 for (uint32_t i = 0; i < kBlockSize; i++) { 299 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 300 } 301 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 302 for (uint32_t i = 0; i < kBlockSize; i++) { 303 const uint32_t reference_output = UINT32_C(0x7F800000); 304 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 305 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 306 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 307 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 308 } 309 } 310 } 311 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,positive_nan)312 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, positive_nan) { 313 TEST_REQUIRES_X86_AVX512F; 314 315 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 316 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 317 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 318 for (uint32_t i = 0; i < kBlockSize; i++) { 319 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 320 } 321 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 322 for (uint32_t i = 0; i < kBlockSize; i++) { 323 ASSERT_TRUE(std::isnan(outputs[i])) 324 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 325 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 326 } 327 } 328 } 329 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,negative_nan)330 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, negative_nan) { 331 TEST_REQUIRES_X86_AVX512F; 332 333 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 334 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 335 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 336 for (uint32_t i = 0; i < kBlockSize; i++) { 337 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 338 } 339 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 340 for (uint32_t i = 0; i < kBlockSize; i++) { 341 ASSERT_TRUE(std::isnan(outputs[i])) 342 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 343 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 344 } 345 } 346 } 347 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 348 349 350 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,negative_zero)351 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, negative_zero) { 352 TEST_REQUIRES_X86_AVX512F; 353 354 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 355 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 356 std::fill(inputs.begin(), inputs.end(), -0.0f); 357 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 358 const float reference_output = 1.0f; 359 ASSERT_EQ(reference_output, outputs[0]) 360 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 361 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 362 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 363 } 364 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,positive_zero)365 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, positive_zero) { 366 TEST_REQUIRES_X86_AVX512F; 367 368 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 369 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 370 std::fill(inputs.begin(), inputs.end(), +0.0f); 371 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 372 const float reference_output = 1.0f; 373 ASSERT_EQ(reference_output, outputs[0]) 374 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 375 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 376 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 377 } 378 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,negative_saturation)379 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, negative_saturation) { 380 TEST_REQUIRES_X86_AVX512F; 381 382 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 383 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 384 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 385 for (uint32_t i = 0; i < kBlockSize; i++) { 386 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 387 } 388 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 389 for (uint32_t i = 0; i < kBlockSize; i++) { 390 const uint32_t reference_output = UINT32_C(0x00000000); 391 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 392 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 393 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 394 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 395 } 396 } 397 } 398 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,positive_overflow)399 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, positive_overflow) { 400 TEST_REQUIRES_X86_AVX512F; 401 402 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 404 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 405 for (uint32_t i = 0; i < kBlockSize; i++) { 406 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 407 } 408 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 409 for (uint32_t i = 0; i < kBlockSize; i++) { 410 const uint32_t reference_output = UINT32_C(0x7F800000); 411 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 413 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 414 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 415 } 416 } 417 } 418 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,positive_nan)419 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, positive_nan) { 420 TEST_REQUIRES_X86_AVX512F; 421 422 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 423 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 424 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 425 for (uint32_t i = 0; i < kBlockSize; i++) { 426 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 427 } 428 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 429 for (uint32_t i = 0; i < kBlockSize; i++) { 430 ASSERT_TRUE(std::isnan(outputs[i])) 431 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 432 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 433 } 434 } 435 } 436 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,negative_nan)437 TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, negative_nan) { 438 TEST_REQUIRES_X86_AVX512F; 439 440 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 441 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 442 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 443 for (uint32_t i = 0; i < kBlockSize; i++) { 444 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 445 } 446 xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 447 for (uint32_t i = 0; i < kBlockSize; i++) { 448 ASSERT_TRUE(std::isnan(outputs[i])) 449 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 450 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 451 } 452 } 453 } 454 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 455 456 457 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,negative_zero)458 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, negative_zero) { 459 TEST_REQUIRES_X86_AVX512F; 460 461 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 462 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 463 std::fill(inputs.begin(), inputs.end(), -0.0f); 464 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 465 const float reference_output = 1.0f; 466 ASSERT_EQ(reference_output, outputs[0]) 467 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 468 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 469 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 470 } 471 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,positive_zero)472 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, positive_zero) { 473 TEST_REQUIRES_X86_AVX512F; 474 475 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 476 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 477 std::fill(inputs.begin(), inputs.end(), +0.0f); 478 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 479 const float reference_output = 1.0f; 480 ASSERT_EQ(reference_output, outputs[0]) 481 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 482 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 483 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 484 } 485 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,negative_saturation)486 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, negative_saturation) { 487 TEST_REQUIRES_X86_AVX512F; 488 489 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 490 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 491 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 492 for (uint32_t i = 0; i < kBlockSize; i++) { 493 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 494 } 495 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 496 for (uint32_t i = 0; i < kBlockSize; i++) { 497 const uint32_t reference_output = UINT32_C(0x00000000); 498 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 499 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 500 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 501 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 502 } 503 } 504 } 505 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,positive_overflow)506 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, positive_overflow) { 507 TEST_REQUIRES_X86_AVX512F; 508 509 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 510 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 511 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 512 for (uint32_t i = 0; i < kBlockSize; i++) { 513 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 514 } 515 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 516 for (uint32_t i = 0; i < kBlockSize; i++) { 517 const uint32_t reference_output = UINT32_C(0x7F800000); 518 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 519 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 520 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 521 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 522 } 523 } 524 } 525 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,positive_nan)526 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, positive_nan) { 527 TEST_REQUIRES_X86_AVX512F; 528 529 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 530 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 531 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 532 for (uint32_t i = 0; i < kBlockSize; i++) { 533 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 534 } 535 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 536 for (uint32_t i = 0; i < kBlockSize; i++) { 537 ASSERT_TRUE(std::isnan(outputs[i])) 538 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 539 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 540 } 541 } 542 } 543 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,negative_nan)544 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, negative_nan) { 545 TEST_REQUIRES_X86_AVX512F; 546 547 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 548 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 549 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 550 for (uint32_t i = 0; i < kBlockSize; i++) { 551 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 552 } 553 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 554 for (uint32_t i = 0; i < kBlockSize; i++) { 555 ASSERT_TRUE(std::isnan(outputs[i])) 556 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 557 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 558 } 559 } 560 } 561 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 562 563 564 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,negative_zero)565 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, negative_zero) { 566 TEST_REQUIRES_X86_AVX512F; 567 568 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 569 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 570 std::fill(inputs.begin(), inputs.end(), -0.0f); 571 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 572 const float reference_output = 1.0f; 573 ASSERT_EQ(reference_output, outputs[0]) 574 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 575 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 576 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 577 } 578 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,positive_zero)579 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, positive_zero) { 580 TEST_REQUIRES_X86_AVX512F; 581 582 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 583 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 584 std::fill(inputs.begin(), inputs.end(), +0.0f); 585 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 586 const float reference_output = 1.0f; 587 ASSERT_EQ(reference_output, outputs[0]) 588 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 589 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 590 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 591 } 592 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,negative_saturation)593 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, negative_saturation) { 594 TEST_REQUIRES_X86_AVX512F; 595 596 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 597 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 598 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 599 for (uint32_t i = 0; i < kBlockSize; i++) { 600 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 601 } 602 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 603 for (uint32_t i = 0; i < kBlockSize; i++) { 604 const uint32_t reference_output = UINT32_C(0x00000000); 605 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 606 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 607 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 608 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 609 } 610 } 611 } 612 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,positive_overflow)613 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, positive_overflow) { 614 TEST_REQUIRES_X86_AVX512F; 615 616 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 617 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 618 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 619 for (uint32_t i = 0; i < kBlockSize; i++) { 620 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 621 } 622 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 623 for (uint32_t i = 0; i < kBlockSize; i++) { 624 const uint32_t reference_output = UINT32_C(0x7F800000); 625 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 626 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 627 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 628 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 629 } 630 } 631 } 632 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,positive_nan)633 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, positive_nan) { 634 TEST_REQUIRES_X86_AVX512F; 635 636 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 637 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 638 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 639 for (uint32_t i = 0; i < kBlockSize; i++) { 640 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 641 } 642 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 643 for (uint32_t i = 0; i < kBlockSize; i++) { 644 ASSERT_TRUE(std::isnan(outputs[i])) 645 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 646 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 647 } 648 } 649 } 650 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,negative_nan)651 TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, negative_nan) { 652 TEST_REQUIRES_X86_AVX512F; 653 654 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 655 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 656 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 657 for (uint32_t i = 0; i < kBlockSize; i++) { 658 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 659 } 660 xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 661 for (uint32_t i = 0; i < kBlockSize; i++) { 662 ASSERT_TRUE(std::isnan(outputs[i])) 663 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 664 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 665 } 666 } 667 } 668 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 669 670 671 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX512F_RR2_P5,negative_zero)672 TEST(EXP__AVX512F_RR2_P5, negative_zero) { 673 TEST_REQUIRES_X86_AVX512F; 674 675 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 676 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 677 std::fill(inputs.begin(), inputs.end(), -0.0f); 678 xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 679 const float reference_output = 1.0f; 680 ASSERT_EQ(reference_output, outputs[0]) 681 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 682 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 683 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 684 } 685 TEST(EXP__AVX512F_RR2_P5,positive_zero)686 TEST(EXP__AVX512F_RR2_P5, positive_zero) { 687 TEST_REQUIRES_X86_AVX512F; 688 689 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 690 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 691 std::fill(inputs.begin(), inputs.end(), +0.0f); 692 xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 693 const float reference_output = 1.0f; 694 ASSERT_EQ(reference_output, outputs[0]) 695 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 696 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 697 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 698 } 699 TEST(EXP__AVX512F_RR2_P5,negative_saturation)700 TEST(EXP__AVX512F_RR2_P5, negative_saturation) { 701 TEST_REQUIRES_X86_AVX512F; 702 703 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 704 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 705 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 706 for (uint32_t i = 0; i < kBlockSize; i++) { 707 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 708 } 709 xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 710 for (uint32_t i = 0; i < kBlockSize; i++) { 711 const uint32_t reference_output = UINT32_C(0x00000000); 712 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 713 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 714 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 715 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 716 } 717 } 718 } 719 TEST(EXP__AVX512F_RR2_P5,positive_overflow)720 TEST(EXP__AVX512F_RR2_P5, positive_overflow) { 721 TEST_REQUIRES_X86_AVX512F; 722 723 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 724 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 725 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 726 for (uint32_t i = 0; i < kBlockSize; i++) { 727 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 728 } 729 xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 730 for (uint32_t i = 0; i < kBlockSize; i++) { 731 const uint32_t reference_output = UINT32_C(0x7F800000); 732 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 733 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 734 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 735 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 736 } 737 } 738 } 739 TEST(EXP__AVX512F_RR2_P5,positive_nan)740 TEST(EXP__AVX512F_RR2_P5, positive_nan) { 741 TEST_REQUIRES_X86_AVX512F; 742 743 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 744 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 745 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 746 for (uint32_t i = 0; i < kBlockSize; i++) { 747 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 748 } 749 xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 750 for (uint32_t i = 0; i < kBlockSize; i++) { 751 ASSERT_TRUE(std::isnan(outputs[i])) 752 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 753 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 754 } 755 } 756 } 757 TEST(EXP__AVX512F_RR2_P5,negative_nan)758 TEST(EXP__AVX512F_RR2_P5, negative_nan) { 759 TEST_REQUIRES_X86_AVX512F; 760 761 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 762 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 763 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 764 for (uint32_t i = 0; i < kBlockSize; i++) { 765 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 766 } 767 xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 768 for (uint32_t i = 0; i < kBlockSize; i++) { 769 ASSERT_TRUE(std::isnan(outputs[i])) 770 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 771 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 772 } 773 } 774 } 775 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 776 777 778 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX512F_RR2_P5_SCALEF,negative_zero)779 TEST(EXP__AVX512F_RR2_P5_SCALEF, negative_zero) { 780 TEST_REQUIRES_X86_AVX512F; 781 782 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 783 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 784 std::fill(inputs.begin(), inputs.end(), -0.0f); 785 xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 786 const float reference_output = 1.0f; 787 ASSERT_EQ(reference_output, outputs[0]) 788 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 789 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 790 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 791 } 792 TEST(EXP__AVX512F_RR2_P5_SCALEF,positive_zero)793 TEST(EXP__AVX512F_RR2_P5_SCALEF, positive_zero) { 794 TEST_REQUIRES_X86_AVX512F; 795 796 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 797 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 798 std::fill(inputs.begin(), inputs.end(), +0.0f); 799 xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 800 const float reference_output = 1.0f; 801 ASSERT_EQ(reference_output, outputs[0]) 802 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 803 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 804 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 805 } 806 TEST(EXP__AVX512F_RR2_P5_SCALEF,negative_saturation)807 TEST(EXP__AVX512F_RR2_P5_SCALEF, negative_saturation) { 808 TEST_REQUIRES_X86_AVX512F; 809 810 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 811 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 812 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 813 for (uint32_t i = 0; i < kBlockSize; i++) { 814 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 815 } 816 xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 817 for (uint32_t i = 0; i < kBlockSize; i++) { 818 const uint32_t reference_output = UINT32_C(0x00000000); 819 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 820 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 821 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 822 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 823 } 824 } 825 } 826 TEST(EXP__AVX512F_RR2_P5_SCALEF,positive_overflow)827 TEST(EXP__AVX512F_RR2_P5_SCALEF, positive_overflow) { 828 TEST_REQUIRES_X86_AVX512F; 829 830 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 831 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 832 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 833 for (uint32_t i = 0; i < kBlockSize; i++) { 834 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 835 } 836 xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 837 for (uint32_t i = 0; i < kBlockSize; i++) { 838 const uint32_t reference_output = UINT32_C(0x7F800000); 839 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 840 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 841 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 842 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 843 } 844 } 845 } 846 TEST(EXP__AVX512F_RR2_P5_SCALEF,positive_nan)847 TEST(EXP__AVX512F_RR2_P5_SCALEF, positive_nan) { 848 TEST_REQUIRES_X86_AVX512F; 849 850 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 851 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 852 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 853 for (uint32_t i = 0; i < kBlockSize; i++) { 854 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 855 } 856 xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 857 for (uint32_t i = 0; i < kBlockSize; i++) { 858 ASSERT_TRUE(std::isnan(outputs[i])) 859 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 860 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 861 } 862 } 863 } 864 TEST(EXP__AVX512F_RR2_P5_SCALEF,negative_nan)865 TEST(EXP__AVX512F_RR2_P5_SCALEF, negative_nan) { 866 TEST_REQUIRES_X86_AVX512F; 867 868 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 869 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 870 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 871 for (uint32_t i = 0; i < kBlockSize; i++) { 872 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 873 } 874 xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 875 for (uint32_t i = 0; i < kBlockSize; i++) { 876 ASSERT_TRUE(std::isnan(outputs[i])) 877 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 878 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 879 } 880 } 881 } 882 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 883 884 885 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX2_RR2_LUT8_P3_PERM,negative_zero)886 TEST(EXP__AVX2_RR2_LUT8_P3_PERM, negative_zero) { 887 TEST_REQUIRES_X86_AVX2; 888 889 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 890 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 891 std::fill(inputs.begin(), inputs.end(), -0.0f); 892 xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 893 const float reference_output = 1.0f; 894 ASSERT_EQ(reference_output, outputs[0]) 895 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 896 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 897 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 898 } 899 TEST(EXP__AVX2_RR2_LUT8_P3_PERM,positive_zero)900 TEST(EXP__AVX2_RR2_LUT8_P3_PERM, positive_zero) { 901 TEST_REQUIRES_X86_AVX2; 902 903 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 904 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 905 std::fill(inputs.begin(), inputs.end(), +0.0f); 906 xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 907 const float reference_output = 1.0f; 908 ASSERT_EQ(reference_output, outputs[0]) 909 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 910 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 911 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 912 } 913 TEST(EXP__AVX2_RR2_LUT8_P3_PERM,negative_saturation)914 TEST(EXP__AVX2_RR2_LUT8_P3_PERM, negative_saturation) { 915 TEST_REQUIRES_X86_AVX2; 916 917 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 918 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 919 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 920 for (uint32_t i = 0; i < kBlockSize; i++) { 921 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 922 } 923 xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 924 for (uint32_t i = 0; i < kBlockSize; i++) { 925 const uint32_t reference_output = UINT32_C(0x00000000); 926 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 927 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 928 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 929 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 930 } 931 } 932 } 933 TEST(EXP__AVX2_RR2_LUT8_P3_PERM,positive_overflow)934 TEST(EXP__AVX2_RR2_LUT8_P3_PERM, positive_overflow) { 935 TEST_REQUIRES_X86_AVX2; 936 937 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 938 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 939 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 940 for (uint32_t i = 0; i < kBlockSize; i++) { 941 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 942 } 943 xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 944 for (uint32_t i = 0; i < kBlockSize; i++) { 945 const uint32_t reference_output = UINT32_C(0x7F800000); 946 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 947 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 948 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 949 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 950 } 951 } 952 } 953 TEST(EXP__AVX2_RR2_LUT8_P3_PERM,positive_nan)954 TEST(EXP__AVX2_RR2_LUT8_P3_PERM, positive_nan) { 955 TEST_REQUIRES_X86_AVX2; 956 957 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 958 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 959 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 960 for (uint32_t i = 0; i < kBlockSize; i++) { 961 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 962 } 963 xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 964 for (uint32_t i = 0; i < kBlockSize; i++) { 965 ASSERT_TRUE(std::isnan(outputs[i])) 966 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 967 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 968 } 969 } 970 } 971 TEST(EXP__AVX2_RR2_LUT8_P3_PERM,negative_nan)972 TEST(EXP__AVX2_RR2_LUT8_P3_PERM, negative_nan) { 973 TEST_REQUIRES_X86_AVX2; 974 975 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 976 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 977 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 978 for (uint32_t i = 0; i < kBlockSize; i++) { 979 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 980 } 981 xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 982 for (uint32_t i = 0; i < kBlockSize; i++) { 983 ASSERT_TRUE(std::isnan(outputs[i])) 984 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 985 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 986 } 987 } 988 } 989 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 990 991 992 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX2_RR2_LUT8_P4_PERM,negative_zero)993 TEST(EXP__AVX2_RR2_LUT8_P4_PERM, negative_zero) { 994 TEST_REQUIRES_X86_AVX2; 995 996 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 997 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 998 std::fill(inputs.begin(), inputs.end(), -0.0f); 999 xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1000 const float reference_output = 1.0f; 1001 ASSERT_EQ(reference_output, outputs[0]) 1002 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1003 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1004 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1005 } 1006 TEST(EXP__AVX2_RR2_LUT8_P4_PERM,positive_zero)1007 TEST(EXP__AVX2_RR2_LUT8_P4_PERM, positive_zero) { 1008 TEST_REQUIRES_X86_AVX2; 1009 1010 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1011 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1012 std::fill(inputs.begin(), inputs.end(), +0.0f); 1013 xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1014 const float reference_output = 1.0f; 1015 ASSERT_EQ(reference_output, outputs[0]) 1016 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1017 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1018 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1019 } 1020 TEST(EXP__AVX2_RR2_LUT8_P4_PERM,negative_saturation)1021 TEST(EXP__AVX2_RR2_LUT8_P4_PERM, negative_saturation) { 1022 TEST_REQUIRES_X86_AVX2; 1023 1024 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1025 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1026 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 1027 for (uint32_t i = 0; i < kBlockSize; i++) { 1028 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 1029 } 1030 xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1031 for (uint32_t i = 0; i < kBlockSize; i++) { 1032 const uint32_t reference_output = UINT32_C(0x00000000); 1033 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1034 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1035 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1036 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1037 } 1038 } 1039 } 1040 TEST(EXP__AVX2_RR2_LUT8_P4_PERM,positive_overflow)1041 TEST(EXP__AVX2_RR2_LUT8_P4_PERM, positive_overflow) { 1042 TEST_REQUIRES_X86_AVX2; 1043 1044 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1045 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1046 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 1047 for (uint32_t i = 0; i < kBlockSize; i++) { 1048 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 1049 } 1050 xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1051 for (uint32_t i = 0; i < kBlockSize; i++) { 1052 const uint32_t reference_output = UINT32_C(0x7F800000); 1053 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1054 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1055 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1056 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1057 } 1058 } 1059 } 1060 TEST(EXP__AVX2_RR2_LUT8_P4_PERM,positive_nan)1061 TEST(EXP__AVX2_RR2_LUT8_P4_PERM, positive_nan) { 1062 TEST_REQUIRES_X86_AVX2; 1063 1064 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1065 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1066 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1067 for (uint32_t i = 0; i < kBlockSize; i++) { 1068 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 1069 } 1070 xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1071 for (uint32_t i = 0; i < kBlockSize; i++) { 1072 ASSERT_TRUE(std::isnan(outputs[i])) 1073 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1074 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1075 } 1076 } 1077 } 1078 TEST(EXP__AVX2_RR2_LUT8_P4_PERM,negative_nan)1079 TEST(EXP__AVX2_RR2_LUT8_P4_PERM, negative_nan) { 1080 TEST_REQUIRES_X86_AVX2; 1081 1082 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1083 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1084 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1085 for (uint32_t i = 0; i < kBlockSize; i++) { 1086 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 1087 } 1088 xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1089 for (uint32_t i = 0; i < kBlockSize; i++) { 1090 ASSERT_TRUE(std::isnan(outputs[i])) 1091 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1092 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1093 } 1094 } 1095 } 1096 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 1097 1098 1099 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX2_RR2_P5,negative_zero)1100 TEST(EXP__AVX2_RR2_P5, negative_zero) { 1101 TEST_REQUIRES_X86_AVX2; 1102 1103 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1104 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1105 std::fill(inputs.begin(), inputs.end(), -0.0f); 1106 xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1107 const float reference_output = 1.0f; 1108 ASSERT_EQ(reference_output, outputs[0]) 1109 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1110 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1111 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1112 } 1113 TEST(EXP__AVX2_RR2_P5,positive_zero)1114 TEST(EXP__AVX2_RR2_P5, positive_zero) { 1115 TEST_REQUIRES_X86_AVX2; 1116 1117 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1118 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1119 std::fill(inputs.begin(), inputs.end(), +0.0f); 1120 xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1121 const float reference_output = 1.0f; 1122 ASSERT_EQ(reference_output, outputs[0]) 1123 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1124 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1125 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1126 } 1127 TEST(EXP__AVX2_RR2_P5,negative_saturation)1128 TEST(EXP__AVX2_RR2_P5, negative_saturation) { 1129 TEST_REQUIRES_X86_AVX2; 1130 1131 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1132 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1133 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 1134 for (uint32_t i = 0; i < kBlockSize; i++) { 1135 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 1136 } 1137 xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1138 for (uint32_t i = 0; i < kBlockSize; i++) { 1139 const uint32_t reference_output = UINT32_C(0x00000000); 1140 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1141 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1142 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1143 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1144 } 1145 } 1146 } 1147 TEST(EXP__AVX2_RR2_P5,positive_overflow)1148 TEST(EXP__AVX2_RR2_P5, positive_overflow) { 1149 TEST_REQUIRES_X86_AVX2; 1150 1151 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1152 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1153 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 1154 for (uint32_t i = 0; i < kBlockSize; i++) { 1155 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 1156 } 1157 xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1158 for (uint32_t i = 0; i < kBlockSize; i++) { 1159 const uint32_t reference_output = UINT32_C(0x7F800000); 1160 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1161 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1162 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1163 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1164 } 1165 } 1166 } 1167 TEST(EXP__AVX2_RR2_P5,positive_nan)1168 TEST(EXP__AVX2_RR2_P5, positive_nan) { 1169 TEST_REQUIRES_X86_AVX2; 1170 1171 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1172 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1173 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1174 for (uint32_t i = 0; i < kBlockSize; i++) { 1175 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 1176 } 1177 xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1178 for (uint32_t i = 0; i < kBlockSize; i++) { 1179 ASSERT_TRUE(std::isnan(outputs[i])) 1180 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1181 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1182 } 1183 } 1184 } 1185 TEST(EXP__AVX2_RR2_P5,negative_nan)1186 TEST(EXP__AVX2_RR2_P5, negative_nan) { 1187 TEST_REQUIRES_X86_AVX2; 1188 1189 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1190 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1191 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1192 for (uint32_t i = 0; i < kBlockSize; i++) { 1193 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 1194 } 1195 xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1196 for (uint32_t i = 0; i < kBlockSize; i++) { 1197 ASSERT_TRUE(std::isnan(outputs[i])) 1198 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1199 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1200 } 1201 } 1202 } 1203 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 1204 1205 1206 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__AVX_RR2_P5,negative_zero)1207 TEST(EXP__AVX_RR2_P5, negative_zero) { 1208 TEST_REQUIRES_X86_AVX; 1209 1210 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1211 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1212 std::fill(inputs.begin(), inputs.end(), -0.0f); 1213 xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1214 const float reference_output = 1.0f; 1215 ASSERT_EQ(reference_output, outputs[0]) 1216 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1217 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1218 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1219 } 1220 TEST(EXP__AVX_RR2_P5,positive_zero)1221 TEST(EXP__AVX_RR2_P5, positive_zero) { 1222 TEST_REQUIRES_X86_AVX; 1223 1224 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1225 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1226 std::fill(inputs.begin(), inputs.end(), +0.0f); 1227 xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1228 const float reference_output = 1.0f; 1229 ASSERT_EQ(reference_output, outputs[0]) 1230 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1231 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1232 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1233 } 1234 TEST(EXP__AVX_RR2_P5,negative_saturation)1235 TEST(EXP__AVX_RR2_P5, negative_saturation) { 1236 TEST_REQUIRES_X86_AVX; 1237 1238 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1239 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1240 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 1241 for (uint32_t i = 0; i < kBlockSize; i++) { 1242 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 1243 } 1244 xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1245 for (uint32_t i = 0; i < kBlockSize; i++) { 1246 const uint32_t reference_output = UINT32_C(0x00000000); 1247 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1248 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1249 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1250 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1251 } 1252 } 1253 } 1254 TEST(EXP__AVX_RR2_P5,positive_overflow)1255 TEST(EXP__AVX_RR2_P5, positive_overflow) { 1256 TEST_REQUIRES_X86_AVX; 1257 1258 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1259 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1260 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 1261 for (uint32_t i = 0; i < kBlockSize; i++) { 1262 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 1263 } 1264 xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1265 for (uint32_t i = 0; i < kBlockSize; i++) { 1266 const uint32_t reference_output = UINT32_C(0x7F800000); 1267 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1268 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1269 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1270 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1271 } 1272 } 1273 } 1274 TEST(EXP__AVX_RR2_P5,positive_nan)1275 TEST(EXP__AVX_RR2_P5, positive_nan) { 1276 TEST_REQUIRES_X86_AVX; 1277 1278 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1279 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1280 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1281 for (uint32_t i = 0; i < kBlockSize; i++) { 1282 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 1283 } 1284 xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1285 for (uint32_t i = 0; i < kBlockSize; i++) { 1286 ASSERT_TRUE(std::isnan(outputs[i])) 1287 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1288 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1289 } 1290 } 1291 } 1292 TEST(EXP__AVX_RR2_P5,negative_nan)1293 TEST(EXP__AVX_RR2_P5, negative_nan) { 1294 TEST_REQUIRES_X86_AVX; 1295 1296 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1297 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1298 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1299 for (uint32_t i = 0; i < kBlockSize; i++) { 1300 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 1301 } 1302 xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1303 for (uint32_t i = 0; i < kBlockSize; i++) { 1304 ASSERT_TRUE(std::isnan(outputs[i])) 1305 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1306 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1307 } 1308 } 1309 } 1310 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 1311 1312 1313 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__SSE2_RR2_LUT64_P2,negative_zero)1314 TEST(EXP__SSE2_RR2_LUT64_P2, negative_zero) { 1315 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1316 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1317 std::fill(inputs.begin(), inputs.end(), -0.0f); 1318 xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1319 const float reference_output = 1.0f; 1320 ASSERT_EQ(reference_output, outputs[0]) 1321 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1322 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1323 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1324 } 1325 TEST(EXP__SSE2_RR2_LUT64_P2,positive_zero)1326 TEST(EXP__SSE2_RR2_LUT64_P2, positive_zero) { 1327 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1328 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1329 std::fill(inputs.begin(), inputs.end(), +0.0f); 1330 xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1331 const float reference_output = 1.0f; 1332 ASSERT_EQ(reference_output, outputs[0]) 1333 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1334 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1335 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1336 } 1337 TEST(EXP__SSE2_RR2_LUT64_P2,negative_saturation)1338 TEST(EXP__SSE2_RR2_LUT64_P2, negative_saturation) { 1339 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1340 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1341 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 1342 for (uint32_t i = 0; i < kBlockSize; i++) { 1343 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 1344 } 1345 xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1346 for (uint32_t i = 0; i < kBlockSize; i++) { 1347 const uint32_t reference_output = UINT32_C(0x00000000); 1348 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1349 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1350 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1351 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1352 } 1353 } 1354 } 1355 TEST(EXP__SSE2_RR2_LUT64_P2,positive_overflow)1356 TEST(EXP__SSE2_RR2_LUT64_P2, positive_overflow) { 1357 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1358 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1359 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 1360 for (uint32_t i = 0; i < kBlockSize; i++) { 1361 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 1362 } 1363 xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1364 for (uint32_t i = 0; i < kBlockSize; i++) { 1365 const uint32_t reference_output = UINT32_C(0x7F800000); 1366 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1367 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1368 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1369 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1370 } 1371 } 1372 } 1373 TEST(EXP__SSE2_RR2_LUT64_P2,positive_nan)1374 TEST(EXP__SSE2_RR2_LUT64_P2, positive_nan) { 1375 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1376 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1377 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1378 for (uint32_t i = 0; i < kBlockSize; i++) { 1379 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 1380 } 1381 xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1382 for (uint32_t i = 0; i < kBlockSize; i++) { 1383 ASSERT_TRUE(std::isnan(outputs[i])) 1384 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1385 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1386 } 1387 } 1388 } 1389 TEST(EXP__SSE2_RR2_LUT64_P2,negative_nan)1390 TEST(EXP__SSE2_RR2_LUT64_P2, negative_nan) { 1391 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1392 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1393 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1394 for (uint32_t i = 0; i < kBlockSize; i++) { 1395 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 1396 } 1397 xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1398 for (uint32_t i = 0; i < kBlockSize; i++) { 1399 ASSERT_TRUE(std::isnan(outputs[i])) 1400 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1401 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1402 } 1403 } 1404 } 1405 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 1406 1407 1408 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(EXP__SSE2_RR2_P5,negative_zero)1409 TEST(EXP__SSE2_RR2_P5, negative_zero) { 1410 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1411 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1412 std::fill(inputs.begin(), inputs.end(), -0.0f); 1413 xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1414 const float reference_output = 1.0f; 1415 ASSERT_EQ(reference_output, outputs[0]) 1416 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1417 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1418 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1419 } 1420 TEST(EXP__SSE2_RR2_P5,positive_zero)1421 TEST(EXP__SSE2_RR2_P5, positive_zero) { 1422 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1423 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1424 std::fill(inputs.begin(), inputs.end(), +0.0f); 1425 xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1426 const float reference_output = 1.0f; 1427 ASSERT_EQ(reference_output, outputs[0]) 1428 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0]) 1429 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output) 1430 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1431 } 1432 TEST(EXP__SSE2_RR2_P5,negative_saturation)1433 TEST(EXP__SSE2_RR2_P5, negative_saturation) { 1434 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1435 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1436 for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) { 1437 for (uint32_t i = 0; i < kBlockSize; i++) { 1438 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000))); 1439 } 1440 xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1441 for (uint32_t i = 0; i < kBlockSize; i++) { 1442 const uint32_t reference_output = UINT32_C(0x00000000); 1443 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1444 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1445 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1446 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1447 } 1448 } 1449 } 1450 TEST(EXP__SSE2_RR2_P5,positive_overflow)1451 TEST(EXP__SSE2_RR2_P5, positive_overflow) { 1452 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1453 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1454 for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) { 1455 for (uint32_t i = 0; i < kBlockSize; i++) { 1456 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000))); 1457 } 1458 xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1459 for (uint32_t i = 0; i < kBlockSize; i++) { 1460 const uint32_t reference_output = UINT32_C(0x7F800000); 1461 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1462 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1463 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1464 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1465 } 1466 } 1467 } 1468 TEST(EXP__SSE2_RR2_P5,positive_nan)1469 TEST(EXP__SSE2_RR2_P5, positive_nan) { 1470 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1471 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1472 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1473 for (uint32_t i = 0; i < kBlockSize; i++) { 1474 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i)); 1475 } 1476 xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1477 for (uint32_t i = 0; i < kBlockSize; i++) { 1478 ASSERT_TRUE(std::isnan(outputs[i])) 1479 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1480 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1481 } 1482 } 1483 } 1484 TEST(EXP__SSE2_RR2_P5,negative_nan)1485 TEST(EXP__SSE2_RR2_P5, negative_nan) { 1486 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 1487 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1488 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) { 1489 for (uint32_t i = 0; i < kBlockSize; i++) { 1490 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i))); 1491 } 1492 xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1493 for (uint32_t i = 0; i < kBlockSize; i++) { 1494 ASSERT_TRUE(std::isnan(outputs[i])) 1495 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i]) 1496 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1497 } 1498 } 1499 } 1500 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 1501