xref: /aosp_15_r20/external/XNNPACK/eval/f32-exp.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2020 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <algorithm>
7*4bdc9457SAndroid Build Coastguard Worker #include <cmath>
8*4bdc9457SAndroid Build Coastguard Worker #include <cstddef>
9*4bdc9457SAndroid Build Coastguard Worker #include <cstdint>
10*4bdc9457SAndroid Build Coastguard Worker #include <cstdlib>
11*4bdc9457SAndroid Build Coastguard Worker #include <iomanip>
12*4bdc9457SAndroid Build Coastguard Worker #include <ios>
13*4bdc9457SAndroid Build Coastguard Worker #include <vector>
14*4bdc9457SAndroid Build Coastguard Worker 
15*4bdc9457SAndroid Build Coastguard Worker #include <gtest/gtest.h>
16*4bdc9457SAndroid Build Coastguard Worker 
17*4bdc9457SAndroid Build Coastguard Worker #include <fp16.h>
18*4bdc9457SAndroid Build Coastguard Worker 
19*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/aligned-allocator.h>
20*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/isa-checks.h>
22*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
23*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math-stubs.h>
24*4bdc9457SAndroid Build Coastguard Worker 
25*4bdc9457SAndroid Build Coastguard Worker 
26*4bdc9457SAndroid Build Coastguard Worker constexpr int kBlockSize = 1024;
27*4bdc9457SAndroid Build Coastguard Worker 
28*4bdc9457SAndroid Build Coastguard Worker 
29*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXP__NEONFMA_RR2_LUT64_P2,negative_zero)30*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_LUT64_P2, negative_zero) {
31*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
32*4bdc9457SAndroid Build Coastguard Worker 
33*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
34*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
35*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
36*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
37*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
38*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
39*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
40*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
41*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
42*4bdc9457SAndroid Build Coastguard Worker   }
43*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_LUT64_P2,positive_zero)44*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_LUT64_P2, positive_zero) {
45*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
46*4bdc9457SAndroid Build Coastguard Worker 
47*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
50*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
51*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
52*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
53*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
54*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
55*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
56*4bdc9457SAndroid Build Coastguard Worker   }
57*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_LUT64_P2,negative_saturation)58*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_LUT64_P2, negative_saturation) {
59*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
60*4bdc9457SAndroid Build Coastguard Worker 
61*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
62*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
63*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
64*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
65*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
66*4bdc9457SAndroid Build Coastguard Worker       }
67*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
68*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
69*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
70*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
71*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
72*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
73*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
74*4bdc9457SAndroid Build Coastguard Worker       }
75*4bdc9457SAndroid Build Coastguard Worker     }
76*4bdc9457SAndroid Build Coastguard Worker   }
77*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_LUT64_P2,positive_overflow)78*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_LUT64_P2, positive_overflow) {
79*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
80*4bdc9457SAndroid Build Coastguard Worker 
81*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
82*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
83*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
84*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
85*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
86*4bdc9457SAndroid Build Coastguard Worker       }
87*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
88*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
89*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
90*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
91*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
92*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
93*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
94*4bdc9457SAndroid Build Coastguard Worker       }
95*4bdc9457SAndroid Build Coastguard Worker     }
96*4bdc9457SAndroid Build Coastguard Worker   }
97*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_LUT64_P2,positive_nan)98*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_LUT64_P2, positive_nan) {
99*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
100*4bdc9457SAndroid Build Coastguard Worker 
101*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
102*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
103*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
104*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
105*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
106*4bdc9457SAndroid Build Coastguard Worker       }
107*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
108*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
109*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
110*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
111*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
112*4bdc9457SAndroid Build Coastguard Worker       }
113*4bdc9457SAndroid Build Coastguard Worker     }
114*4bdc9457SAndroid Build Coastguard Worker   }
115*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_LUT64_P2,negative_nan)116*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_LUT64_P2, negative_nan) {
117*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
118*4bdc9457SAndroid Build Coastguard Worker 
119*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
120*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
121*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
122*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
123*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
124*4bdc9457SAndroid Build Coastguard Worker       }
125*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
126*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
127*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
128*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
129*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
130*4bdc9457SAndroid Build Coastguard Worker       }
131*4bdc9457SAndroid Build Coastguard Worker     }
132*4bdc9457SAndroid Build Coastguard Worker   }
133*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
134*4bdc9457SAndroid Build Coastguard Worker 
135*4bdc9457SAndroid Build Coastguard Worker 
136*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXP__NEONFMA_RR2_P5,negative_zero)137*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_P5, negative_zero) {
138*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
139*4bdc9457SAndroid Build Coastguard Worker 
140*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
141*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
142*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
143*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
144*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
145*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
146*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
147*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
148*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
149*4bdc9457SAndroid Build Coastguard Worker   }
150*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_P5,positive_zero)151*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_P5, positive_zero) {
152*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
153*4bdc9457SAndroid Build Coastguard Worker 
154*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
155*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
156*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
157*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
158*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
159*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
160*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
161*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
162*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
163*4bdc9457SAndroid Build Coastguard Worker   }
164*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_P5,negative_saturation)165*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_P5, negative_saturation) {
166*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
167*4bdc9457SAndroid Build Coastguard Worker 
168*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
169*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
170*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
171*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
172*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
173*4bdc9457SAndroid Build Coastguard Worker       }
174*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
175*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
176*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
177*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
178*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
179*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
180*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
181*4bdc9457SAndroid Build Coastguard Worker       }
182*4bdc9457SAndroid Build Coastguard Worker     }
183*4bdc9457SAndroid Build Coastguard Worker   }
184*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_P5,positive_overflow)185*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_P5, positive_overflow) {
186*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
187*4bdc9457SAndroid Build Coastguard Worker 
188*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
189*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
190*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
191*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
192*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
193*4bdc9457SAndroid Build Coastguard Worker       }
194*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
195*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
196*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
197*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
198*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
199*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
200*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
201*4bdc9457SAndroid Build Coastguard Worker       }
202*4bdc9457SAndroid Build Coastguard Worker     }
203*4bdc9457SAndroid Build Coastguard Worker   }
204*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_P5,positive_nan)205*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_P5, positive_nan) {
206*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
207*4bdc9457SAndroid Build Coastguard Worker 
208*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
209*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
210*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
211*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
212*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
213*4bdc9457SAndroid Build Coastguard Worker       }
214*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
215*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
216*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
217*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
218*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
219*4bdc9457SAndroid Build Coastguard Worker       }
220*4bdc9457SAndroid Build Coastguard Worker     }
221*4bdc9457SAndroid Build Coastguard Worker   }
222*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__NEONFMA_RR2_P5,negative_nan)223*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__NEONFMA_RR2_P5, negative_nan) {
224*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_ARM_NEON_FMA;
225*4bdc9457SAndroid Build Coastguard Worker 
226*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
227*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
228*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
229*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
230*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
231*4bdc9457SAndroid Build Coastguard Worker       }
232*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
233*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
234*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
235*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
236*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
237*4bdc9457SAndroid Build Coastguard Worker       }
238*4bdc9457SAndroid Build Coastguard Worker     }
239*4bdc9457SAndroid Build Coastguard Worker   }
240*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
241*4bdc9457SAndroid Build Coastguard Worker 
242*4bdc9457SAndroid Build Coastguard Worker 
243*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,negative_zero)244*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, negative_zero) {
245*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
246*4bdc9457SAndroid Build Coastguard Worker 
247*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
248*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
249*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
250*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
251*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
252*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
253*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
254*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
255*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
256*4bdc9457SAndroid Build Coastguard Worker   }
257*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,positive_zero)258*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, positive_zero) {
259*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
260*4bdc9457SAndroid Build Coastguard Worker 
261*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
262*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
263*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
264*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
265*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
266*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
267*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
268*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
269*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
270*4bdc9457SAndroid Build Coastguard Worker   }
271*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,negative_saturation)272*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, negative_saturation) {
273*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
274*4bdc9457SAndroid Build Coastguard Worker 
275*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
276*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
277*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
278*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
279*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
280*4bdc9457SAndroid Build Coastguard Worker       }
281*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
282*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
283*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
284*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
285*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
286*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
287*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
288*4bdc9457SAndroid Build Coastguard Worker       }
289*4bdc9457SAndroid Build Coastguard Worker     }
290*4bdc9457SAndroid Build Coastguard Worker   }
291*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,positive_overflow)292*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, positive_overflow) {
293*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
294*4bdc9457SAndroid Build Coastguard Worker 
295*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
296*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
297*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
298*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
299*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
300*4bdc9457SAndroid Build Coastguard Worker       }
301*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
302*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
303*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
304*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
305*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
306*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
307*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
308*4bdc9457SAndroid Build Coastguard Worker       }
309*4bdc9457SAndroid Build Coastguard Worker     }
310*4bdc9457SAndroid Build Coastguard Worker   }
311*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,positive_nan)312*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, positive_nan) {
313*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
314*4bdc9457SAndroid Build Coastguard Worker 
315*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
316*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
317*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
318*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
319*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
320*4bdc9457SAndroid Build Coastguard Worker       }
321*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
322*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
323*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
324*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
325*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
326*4bdc9457SAndroid Build Coastguard Worker       }
327*4bdc9457SAndroid Build Coastguard Worker     }
328*4bdc9457SAndroid Build Coastguard Worker   }
329*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM,negative_nan)330*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM, negative_nan) {
331*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
332*4bdc9457SAndroid Build Coastguard Worker 
333*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
334*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
335*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
336*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
337*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
338*4bdc9457SAndroid Build Coastguard Worker       }
339*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
340*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
341*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
342*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
343*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
344*4bdc9457SAndroid Build Coastguard Worker       }
345*4bdc9457SAndroid Build Coastguard Worker     }
346*4bdc9457SAndroid Build Coastguard Worker   }
347*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
348*4bdc9457SAndroid Build Coastguard Worker 
349*4bdc9457SAndroid Build Coastguard Worker 
350*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,negative_zero)351*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, negative_zero) {
352*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
353*4bdc9457SAndroid Build Coastguard Worker 
354*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
355*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
356*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
357*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
358*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
359*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
360*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
361*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
362*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
363*4bdc9457SAndroid Build Coastguard Worker   }
364*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,positive_zero)365*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, positive_zero) {
366*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
367*4bdc9457SAndroid Build Coastguard Worker 
368*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
369*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
370*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
371*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
372*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
373*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
374*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
375*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
376*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
377*4bdc9457SAndroid Build Coastguard Worker   }
378*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,negative_saturation)379*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, negative_saturation) {
380*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
381*4bdc9457SAndroid Build Coastguard Worker 
382*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
383*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
384*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
385*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
386*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
387*4bdc9457SAndroid Build Coastguard Worker       }
388*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
389*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
390*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
391*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
392*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
393*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
394*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
395*4bdc9457SAndroid Build Coastguard Worker       }
396*4bdc9457SAndroid Build Coastguard Worker     }
397*4bdc9457SAndroid Build Coastguard Worker   }
398*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,positive_overflow)399*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, positive_overflow) {
400*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
401*4bdc9457SAndroid Build Coastguard Worker 
402*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
403*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
405*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
406*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
407*4bdc9457SAndroid Build Coastguard Worker       }
408*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
409*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
410*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
411*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
412*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
413*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
414*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
415*4bdc9457SAndroid Build Coastguard Worker       }
416*4bdc9457SAndroid Build Coastguard Worker     }
417*4bdc9457SAndroid Build Coastguard Worker   }
418*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,positive_nan)419*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, positive_nan) {
420*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
421*4bdc9457SAndroid Build Coastguard Worker 
422*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
423*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
424*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
425*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
426*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
427*4bdc9457SAndroid Build Coastguard Worker       }
428*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
429*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
430*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
431*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
432*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
433*4bdc9457SAndroid Build Coastguard Worker       }
434*4bdc9457SAndroid Build Coastguard Worker     }
435*4bdc9457SAndroid Build Coastguard Worker   }
436*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF,negative_nan)437*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT16_P3_PERM_SCALEF, negative_nan) {
438*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
439*4bdc9457SAndroid Build Coastguard Worker 
440*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
441*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
442*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
443*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
444*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
445*4bdc9457SAndroid Build Coastguard Worker       }
446*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
447*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
448*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
449*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
450*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
451*4bdc9457SAndroid Build Coastguard Worker       }
452*4bdc9457SAndroid Build Coastguard Worker     }
453*4bdc9457SAndroid Build Coastguard Worker   }
454*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
455*4bdc9457SAndroid Build Coastguard Worker 
456*4bdc9457SAndroid Build Coastguard Worker 
457*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,negative_zero)458*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, negative_zero) {
459*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
460*4bdc9457SAndroid Build Coastguard Worker 
461*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
462*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
463*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
464*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
465*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
466*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
467*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
468*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
469*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
470*4bdc9457SAndroid Build Coastguard Worker   }
471*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,positive_zero)472*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, positive_zero) {
473*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
474*4bdc9457SAndroid Build Coastguard Worker 
475*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
476*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
477*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
478*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
479*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
480*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
481*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
482*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
483*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
484*4bdc9457SAndroid Build Coastguard Worker   }
485*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,negative_saturation)486*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, negative_saturation) {
487*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
488*4bdc9457SAndroid Build Coastguard Worker 
489*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
490*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
491*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
492*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
493*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
494*4bdc9457SAndroid Build Coastguard Worker       }
495*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
496*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
497*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
498*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
499*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
500*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
501*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
502*4bdc9457SAndroid Build Coastguard Worker       }
503*4bdc9457SAndroid Build Coastguard Worker     }
504*4bdc9457SAndroid Build Coastguard Worker   }
505*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,positive_overflow)506*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, positive_overflow) {
507*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
508*4bdc9457SAndroid Build Coastguard Worker 
509*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
510*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
511*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
512*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
513*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
514*4bdc9457SAndroid Build Coastguard Worker       }
515*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
516*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
517*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
518*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
519*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
520*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
521*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
522*4bdc9457SAndroid Build Coastguard Worker       }
523*4bdc9457SAndroid Build Coastguard Worker     }
524*4bdc9457SAndroid Build Coastguard Worker   }
525*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,positive_nan)526*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, positive_nan) {
527*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
528*4bdc9457SAndroid Build Coastguard Worker 
529*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
530*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
531*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
532*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
533*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
534*4bdc9457SAndroid Build Coastguard Worker       }
535*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
536*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
537*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
538*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
539*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
540*4bdc9457SAndroid Build Coastguard Worker       }
541*4bdc9457SAndroid Build Coastguard Worker     }
542*4bdc9457SAndroid Build Coastguard Worker   }
543*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2,negative_nan)544*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2, negative_nan) {
545*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
546*4bdc9457SAndroid Build Coastguard Worker 
547*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
548*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
549*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
550*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
551*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
552*4bdc9457SAndroid Build Coastguard Worker       }
553*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
554*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
555*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
556*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
557*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
558*4bdc9457SAndroid Build Coastguard Worker       }
559*4bdc9457SAndroid Build Coastguard Worker     }
560*4bdc9457SAndroid Build Coastguard Worker   }
561*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
562*4bdc9457SAndroid Build Coastguard Worker 
563*4bdc9457SAndroid Build Coastguard Worker 
564*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,negative_zero)565*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, negative_zero) {
566*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
567*4bdc9457SAndroid Build Coastguard Worker 
568*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
569*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
570*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
571*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
572*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
573*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
574*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
575*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
576*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
577*4bdc9457SAndroid Build Coastguard Worker   }
578*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,positive_zero)579*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, positive_zero) {
580*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
581*4bdc9457SAndroid Build Coastguard Worker 
582*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
583*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
584*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
585*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
586*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
587*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
588*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
589*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
590*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
591*4bdc9457SAndroid Build Coastguard Worker   }
592*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,negative_saturation)593*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, negative_saturation) {
594*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
595*4bdc9457SAndroid Build Coastguard Worker 
596*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
597*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
598*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
599*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
600*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
601*4bdc9457SAndroid Build Coastguard Worker       }
602*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
603*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
604*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
605*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
606*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
607*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
608*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
609*4bdc9457SAndroid Build Coastguard Worker       }
610*4bdc9457SAndroid Build Coastguard Worker     }
611*4bdc9457SAndroid Build Coastguard Worker   }
612*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,positive_overflow)613*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, positive_overflow) {
614*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
615*4bdc9457SAndroid Build Coastguard Worker 
616*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
617*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
618*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
619*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
620*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
621*4bdc9457SAndroid Build Coastguard Worker       }
622*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
623*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
624*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
625*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
626*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
627*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
628*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
629*4bdc9457SAndroid Build Coastguard Worker       }
630*4bdc9457SAndroid Build Coastguard Worker     }
631*4bdc9457SAndroid Build Coastguard Worker   }
632*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,positive_nan)633*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, positive_nan) {
634*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
635*4bdc9457SAndroid Build Coastguard Worker 
636*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
637*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
638*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
639*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
640*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
641*4bdc9457SAndroid Build Coastguard Worker       }
642*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
643*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
644*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
645*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
646*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
647*4bdc9457SAndroid Build Coastguard Worker       }
648*4bdc9457SAndroid Build Coastguard Worker     }
649*4bdc9457SAndroid Build Coastguard Worker   }
650*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF,negative_nan)651*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_LUT32_P2_PERM2_SCALEF, negative_nan) {
652*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
653*4bdc9457SAndroid Build Coastguard Worker 
654*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
655*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
656*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
657*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
658*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
659*4bdc9457SAndroid Build Coastguard Worker       }
660*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
661*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
662*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
663*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
664*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
665*4bdc9457SAndroid Build Coastguard Worker       }
666*4bdc9457SAndroid Build Coastguard Worker     }
667*4bdc9457SAndroid Build Coastguard Worker   }
668*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
669*4bdc9457SAndroid Build Coastguard Worker 
670*4bdc9457SAndroid Build Coastguard Worker 
671*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX512F_RR2_P5,negative_zero)672*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5, negative_zero) {
673*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
674*4bdc9457SAndroid Build Coastguard Worker 
675*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
676*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
677*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
678*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
679*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
680*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
681*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
682*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
683*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
684*4bdc9457SAndroid Build Coastguard Worker   }
685*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5,positive_zero)686*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5, positive_zero) {
687*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
688*4bdc9457SAndroid Build Coastguard Worker 
689*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
690*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
691*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
692*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
693*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
694*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
695*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
696*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
697*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
698*4bdc9457SAndroid Build Coastguard Worker   }
699*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5,negative_saturation)700*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5, negative_saturation) {
701*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
702*4bdc9457SAndroid Build Coastguard Worker 
703*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
704*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
705*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
706*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
707*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
708*4bdc9457SAndroid Build Coastguard Worker       }
709*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
710*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
711*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
712*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
713*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
714*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
715*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
716*4bdc9457SAndroid Build Coastguard Worker       }
717*4bdc9457SAndroid Build Coastguard Worker     }
718*4bdc9457SAndroid Build Coastguard Worker   }
719*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5,positive_overflow)720*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5, positive_overflow) {
721*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
722*4bdc9457SAndroid Build Coastguard Worker 
723*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
724*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
725*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
726*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
727*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
728*4bdc9457SAndroid Build Coastguard Worker       }
729*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
730*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
731*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
732*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
733*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
734*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
735*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
736*4bdc9457SAndroid Build Coastguard Worker       }
737*4bdc9457SAndroid Build Coastguard Worker     }
738*4bdc9457SAndroid Build Coastguard Worker   }
739*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5,positive_nan)740*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5, positive_nan) {
741*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
742*4bdc9457SAndroid Build Coastguard Worker 
743*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
744*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
745*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
746*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
747*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
748*4bdc9457SAndroid Build Coastguard Worker       }
749*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
750*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
751*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
752*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
753*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
754*4bdc9457SAndroid Build Coastguard Worker       }
755*4bdc9457SAndroid Build Coastguard Worker     }
756*4bdc9457SAndroid Build Coastguard Worker   }
757*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5,negative_nan)758*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5, negative_nan) {
759*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
760*4bdc9457SAndroid Build Coastguard Worker 
761*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
762*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
763*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
764*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
765*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
766*4bdc9457SAndroid Build Coastguard Worker       }
767*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
768*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
769*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
770*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
771*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
772*4bdc9457SAndroid Build Coastguard Worker       }
773*4bdc9457SAndroid Build Coastguard Worker     }
774*4bdc9457SAndroid Build Coastguard Worker   }
775*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
776*4bdc9457SAndroid Build Coastguard Worker 
777*4bdc9457SAndroid Build Coastguard Worker 
778*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX512F_RR2_P5_SCALEF,negative_zero)779*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5_SCALEF, negative_zero) {
780*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
781*4bdc9457SAndroid Build Coastguard Worker 
782*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
783*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
784*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
785*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
786*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
787*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
788*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
789*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
790*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
791*4bdc9457SAndroid Build Coastguard Worker   }
792*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5_SCALEF,positive_zero)793*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5_SCALEF, positive_zero) {
794*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
795*4bdc9457SAndroid Build Coastguard Worker 
796*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
797*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
798*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
799*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
800*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
801*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
802*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
803*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
804*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
805*4bdc9457SAndroid Build Coastguard Worker   }
806*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5_SCALEF,negative_saturation)807*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5_SCALEF, negative_saturation) {
808*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
809*4bdc9457SAndroid Build Coastguard Worker 
810*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
811*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
812*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
813*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
814*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
815*4bdc9457SAndroid Build Coastguard Worker       }
816*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
817*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
818*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
819*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
820*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
821*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
822*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
823*4bdc9457SAndroid Build Coastguard Worker       }
824*4bdc9457SAndroid Build Coastguard Worker     }
825*4bdc9457SAndroid Build Coastguard Worker   }
826*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5_SCALEF,positive_overflow)827*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5_SCALEF, positive_overflow) {
828*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
829*4bdc9457SAndroid Build Coastguard Worker 
830*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
831*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
832*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
833*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
834*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
835*4bdc9457SAndroid Build Coastguard Worker       }
836*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
837*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
838*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
839*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
840*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
841*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
842*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
843*4bdc9457SAndroid Build Coastguard Worker       }
844*4bdc9457SAndroid Build Coastguard Worker     }
845*4bdc9457SAndroid Build Coastguard Worker   }
846*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5_SCALEF,positive_nan)847*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5_SCALEF, positive_nan) {
848*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
849*4bdc9457SAndroid Build Coastguard Worker 
850*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
851*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
852*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
853*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
854*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
855*4bdc9457SAndroid Build Coastguard Worker       }
856*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
857*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
858*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
859*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
860*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
861*4bdc9457SAndroid Build Coastguard Worker       }
862*4bdc9457SAndroid Build Coastguard Worker     }
863*4bdc9457SAndroid Build Coastguard Worker   }
864*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX512F_RR2_P5_SCALEF,negative_nan)865*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX512F_RR2_P5_SCALEF, negative_nan) {
866*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX512F;
867*4bdc9457SAndroid Build Coastguard Worker 
868*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
869*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
870*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
871*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
872*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
873*4bdc9457SAndroid Build Coastguard Worker       }
874*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx512f_rr2_p5_scalef(kBlockSize * sizeof(float), inputs.data(), outputs.data());
875*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
876*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
877*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
878*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
879*4bdc9457SAndroid Build Coastguard Worker       }
880*4bdc9457SAndroid Build Coastguard Worker     }
881*4bdc9457SAndroid Build Coastguard Worker   }
882*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
883*4bdc9457SAndroid Build Coastguard Worker 
884*4bdc9457SAndroid Build Coastguard Worker 
885*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX2_RR2_LUT8_P3_PERM,negative_zero)886*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P3_PERM, negative_zero) {
887*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
888*4bdc9457SAndroid Build Coastguard Worker 
889*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
890*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
891*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
892*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
893*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
894*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
895*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
896*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
897*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
898*4bdc9457SAndroid Build Coastguard Worker   }
899*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P3_PERM,positive_zero)900*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P3_PERM, positive_zero) {
901*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
902*4bdc9457SAndroid Build Coastguard Worker 
903*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
904*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
905*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
906*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
907*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
908*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
909*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
910*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
911*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
912*4bdc9457SAndroid Build Coastguard Worker   }
913*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P3_PERM,negative_saturation)914*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P3_PERM, negative_saturation) {
915*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
916*4bdc9457SAndroid Build Coastguard Worker 
917*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
918*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
919*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
920*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
921*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
922*4bdc9457SAndroid Build Coastguard Worker       }
923*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
924*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
925*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
926*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
927*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
928*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
929*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
930*4bdc9457SAndroid Build Coastguard Worker       }
931*4bdc9457SAndroid Build Coastguard Worker     }
932*4bdc9457SAndroid Build Coastguard Worker   }
933*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P3_PERM,positive_overflow)934*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P3_PERM, positive_overflow) {
935*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
936*4bdc9457SAndroid Build Coastguard Worker 
937*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
938*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
939*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
940*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
941*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
942*4bdc9457SAndroid Build Coastguard Worker       }
943*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
944*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
945*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
946*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
947*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
948*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
949*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
950*4bdc9457SAndroid Build Coastguard Worker       }
951*4bdc9457SAndroid Build Coastguard Worker     }
952*4bdc9457SAndroid Build Coastguard Worker   }
953*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P3_PERM,positive_nan)954*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P3_PERM, positive_nan) {
955*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
956*4bdc9457SAndroid Build Coastguard Worker 
957*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
958*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
959*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
960*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
961*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
962*4bdc9457SAndroid Build Coastguard Worker       }
963*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
964*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
965*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
966*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
967*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
968*4bdc9457SAndroid Build Coastguard Worker       }
969*4bdc9457SAndroid Build Coastguard Worker     }
970*4bdc9457SAndroid Build Coastguard Worker   }
971*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P3_PERM,negative_nan)972*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P3_PERM, negative_nan) {
973*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
974*4bdc9457SAndroid Build Coastguard Worker 
975*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
976*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
977*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
978*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
979*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
980*4bdc9457SAndroid Build Coastguard Worker       }
981*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
982*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
983*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
984*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
985*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
986*4bdc9457SAndroid Build Coastguard Worker       }
987*4bdc9457SAndroid Build Coastguard Worker     }
988*4bdc9457SAndroid Build Coastguard Worker   }
989*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
990*4bdc9457SAndroid Build Coastguard Worker 
991*4bdc9457SAndroid Build Coastguard Worker 
992*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX2_RR2_LUT8_P4_PERM,negative_zero)993*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P4_PERM, negative_zero) {
994*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
995*4bdc9457SAndroid Build Coastguard Worker 
996*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
997*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
998*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
999*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1000*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1001*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1002*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1003*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1004*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1005*4bdc9457SAndroid Build Coastguard Worker   }
1006*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P4_PERM,positive_zero)1007*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P4_PERM, positive_zero) {
1008*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1009*4bdc9457SAndroid Build Coastguard Worker 
1010*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1011*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1012*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
1013*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1014*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1015*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1016*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1017*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1018*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1019*4bdc9457SAndroid Build Coastguard Worker   }
1020*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P4_PERM,negative_saturation)1021*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P4_PERM, negative_saturation) {
1022*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1023*4bdc9457SAndroid Build Coastguard Worker 
1024*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1025*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1026*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1027*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1028*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1029*4bdc9457SAndroid Build Coastguard Worker       }
1030*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1031*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1032*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
1033*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1034*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1035*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1036*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1037*4bdc9457SAndroid Build Coastguard Worker       }
1038*4bdc9457SAndroid Build Coastguard Worker     }
1039*4bdc9457SAndroid Build Coastguard Worker   }
1040*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P4_PERM,positive_overflow)1041*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P4_PERM, positive_overflow) {
1042*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1043*4bdc9457SAndroid Build Coastguard Worker 
1044*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1045*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1046*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
1047*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1048*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
1049*4bdc9457SAndroid Build Coastguard Worker       }
1050*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1051*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1052*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
1053*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1054*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1055*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1056*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1057*4bdc9457SAndroid Build Coastguard Worker       }
1058*4bdc9457SAndroid Build Coastguard Worker     }
1059*4bdc9457SAndroid Build Coastguard Worker   }
1060*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P4_PERM,positive_nan)1061*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P4_PERM, positive_nan) {
1062*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1063*4bdc9457SAndroid Build Coastguard Worker 
1064*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1065*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1066*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1067*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1068*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1069*4bdc9457SAndroid Build Coastguard Worker       }
1070*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1071*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1072*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1073*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1074*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1075*4bdc9457SAndroid Build Coastguard Worker       }
1076*4bdc9457SAndroid Build Coastguard Worker     }
1077*4bdc9457SAndroid Build Coastguard Worker   }
1078*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_LUT8_P4_PERM,negative_nan)1079*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_LUT8_P4_PERM, negative_nan) {
1080*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1081*4bdc9457SAndroid Build Coastguard Worker 
1082*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1083*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1084*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1085*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1086*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1087*4bdc9457SAndroid Build Coastguard Worker       }
1088*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1089*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1090*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1091*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1092*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1093*4bdc9457SAndroid Build Coastguard Worker       }
1094*4bdc9457SAndroid Build Coastguard Worker     }
1095*4bdc9457SAndroid Build Coastguard Worker   }
1096*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1097*4bdc9457SAndroid Build Coastguard Worker 
1098*4bdc9457SAndroid Build Coastguard Worker 
1099*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX2_RR2_P5,negative_zero)1100*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_P5, negative_zero) {
1101*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1102*4bdc9457SAndroid Build Coastguard Worker 
1103*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1104*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1105*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
1106*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1107*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1108*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1109*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1110*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1111*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1112*4bdc9457SAndroid Build Coastguard Worker   }
1113*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_P5,positive_zero)1114*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_P5, positive_zero) {
1115*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1116*4bdc9457SAndroid Build Coastguard Worker 
1117*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1118*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1119*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
1120*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1121*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1122*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1123*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1124*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1125*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1126*4bdc9457SAndroid Build Coastguard Worker   }
1127*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_P5,negative_saturation)1128*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_P5, negative_saturation) {
1129*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1130*4bdc9457SAndroid Build Coastguard Worker 
1131*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1132*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1133*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1134*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1135*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1136*4bdc9457SAndroid Build Coastguard Worker       }
1137*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1138*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1139*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
1140*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1141*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1142*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1143*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1144*4bdc9457SAndroid Build Coastguard Worker       }
1145*4bdc9457SAndroid Build Coastguard Worker     }
1146*4bdc9457SAndroid Build Coastguard Worker   }
1147*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_P5,positive_overflow)1148*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_P5, positive_overflow) {
1149*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1150*4bdc9457SAndroid Build Coastguard Worker 
1151*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1152*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1153*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
1154*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1155*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
1156*4bdc9457SAndroid Build Coastguard Worker       }
1157*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1158*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1159*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
1160*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1161*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1162*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1163*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1164*4bdc9457SAndroid Build Coastguard Worker       }
1165*4bdc9457SAndroid Build Coastguard Worker     }
1166*4bdc9457SAndroid Build Coastguard Worker   }
1167*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_P5,positive_nan)1168*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_P5, positive_nan) {
1169*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1170*4bdc9457SAndroid Build Coastguard Worker 
1171*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1172*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1173*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1174*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1175*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1176*4bdc9457SAndroid Build Coastguard Worker       }
1177*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1178*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1179*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1180*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1181*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1182*4bdc9457SAndroid Build Coastguard Worker       }
1183*4bdc9457SAndroid Build Coastguard Worker     }
1184*4bdc9457SAndroid Build Coastguard Worker   }
1185*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX2_RR2_P5,negative_nan)1186*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX2_RR2_P5, negative_nan) {
1187*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX2;
1188*4bdc9457SAndroid Build Coastguard Worker 
1189*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1190*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1191*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1192*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1193*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1194*4bdc9457SAndroid Build Coastguard Worker       }
1195*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1196*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1197*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1198*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1199*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1200*4bdc9457SAndroid Build Coastguard Worker       }
1201*4bdc9457SAndroid Build Coastguard Worker     }
1202*4bdc9457SAndroid Build Coastguard Worker   }
1203*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1204*4bdc9457SAndroid Build Coastguard Worker 
1205*4bdc9457SAndroid Build Coastguard Worker 
1206*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__AVX_RR2_P5,negative_zero)1207*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX_RR2_P5, negative_zero) {
1208*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX;
1209*4bdc9457SAndroid Build Coastguard Worker 
1210*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1211*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1212*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
1213*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1214*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1215*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1216*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1217*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1218*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1219*4bdc9457SAndroid Build Coastguard Worker   }
1220*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX_RR2_P5,positive_zero)1221*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX_RR2_P5, positive_zero) {
1222*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX;
1223*4bdc9457SAndroid Build Coastguard Worker 
1224*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1225*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1226*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
1227*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1228*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1229*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1230*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1231*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1232*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1233*4bdc9457SAndroid Build Coastguard Worker   }
1234*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX_RR2_P5,negative_saturation)1235*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX_RR2_P5, negative_saturation) {
1236*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX;
1237*4bdc9457SAndroid Build Coastguard Worker 
1238*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1239*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1240*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1241*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1242*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1243*4bdc9457SAndroid Build Coastguard Worker       }
1244*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1245*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1246*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
1247*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1248*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1249*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1250*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1251*4bdc9457SAndroid Build Coastguard Worker       }
1252*4bdc9457SAndroid Build Coastguard Worker     }
1253*4bdc9457SAndroid Build Coastguard Worker   }
1254*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX_RR2_P5,positive_overflow)1255*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX_RR2_P5, positive_overflow) {
1256*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX;
1257*4bdc9457SAndroid Build Coastguard Worker 
1258*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1259*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1260*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
1261*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1262*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
1263*4bdc9457SAndroid Build Coastguard Worker       }
1264*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1265*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1266*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
1267*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1268*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1269*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1270*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1271*4bdc9457SAndroid Build Coastguard Worker       }
1272*4bdc9457SAndroid Build Coastguard Worker     }
1273*4bdc9457SAndroid Build Coastguard Worker   }
1274*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX_RR2_P5,positive_nan)1275*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX_RR2_P5, positive_nan) {
1276*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX;
1277*4bdc9457SAndroid Build Coastguard Worker 
1278*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1279*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1280*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1281*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1282*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1283*4bdc9457SAndroid Build Coastguard Worker       }
1284*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1285*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1286*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1287*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1288*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1289*4bdc9457SAndroid Build Coastguard Worker       }
1290*4bdc9457SAndroid Build Coastguard Worker     }
1291*4bdc9457SAndroid Build Coastguard Worker   }
1292*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__AVX_RR2_P5,negative_nan)1293*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__AVX_RR2_P5, negative_nan) {
1294*4bdc9457SAndroid Build Coastguard Worker     TEST_REQUIRES_X86_AVX;
1295*4bdc9457SAndroid Build Coastguard Worker 
1296*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1297*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1298*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1299*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1300*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1301*4bdc9457SAndroid Build Coastguard Worker       }
1302*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__avx_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1303*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1304*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1305*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1306*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1307*4bdc9457SAndroid Build Coastguard Worker       }
1308*4bdc9457SAndroid Build Coastguard Worker     }
1309*4bdc9457SAndroid Build Coastguard Worker   }
1310*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1311*4bdc9457SAndroid Build Coastguard Worker 
1312*4bdc9457SAndroid Build Coastguard Worker 
1313*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__SSE2_RR2_LUT64_P2,negative_zero)1314*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_LUT64_P2, negative_zero) {
1315*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1316*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1317*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
1318*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1319*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1320*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1321*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1322*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1323*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1324*4bdc9457SAndroid Build Coastguard Worker   }
1325*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_LUT64_P2,positive_zero)1326*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_LUT64_P2, positive_zero) {
1327*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1328*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1329*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
1330*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1331*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1332*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1333*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1334*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1335*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1336*4bdc9457SAndroid Build Coastguard Worker   }
1337*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_LUT64_P2,negative_saturation)1338*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_LUT64_P2, negative_saturation) {
1339*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1340*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1341*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1342*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1343*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1344*4bdc9457SAndroid Build Coastguard Worker       }
1345*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1346*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1347*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
1348*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1349*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1350*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1351*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1352*4bdc9457SAndroid Build Coastguard Worker       }
1353*4bdc9457SAndroid Build Coastguard Worker     }
1354*4bdc9457SAndroid Build Coastguard Worker   }
1355*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_LUT64_P2,positive_overflow)1356*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_LUT64_P2, positive_overflow) {
1357*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1358*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1359*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
1360*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1361*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
1362*4bdc9457SAndroid Build Coastguard Worker       }
1363*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1364*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1365*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
1366*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1367*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1368*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1369*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1370*4bdc9457SAndroid Build Coastguard Worker       }
1371*4bdc9457SAndroid Build Coastguard Worker     }
1372*4bdc9457SAndroid Build Coastguard Worker   }
1373*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_LUT64_P2,positive_nan)1374*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_LUT64_P2, positive_nan) {
1375*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1376*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1377*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1378*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1379*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1380*4bdc9457SAndroid Build Coastguard Worker       }
1381*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1382*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1383*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1384*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1385*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1386*4bdc9457SAndroid Build Coastguard Worker       }
1387*4bdc9457SAndroid Build Coastguard Worker     }
1388*4bdc9457SAndroid Build Coastguard Worker   }
1389*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_LUT64_P2,negative_nan)1390*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_LUT64_P2, negative_nan) {
1391*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1392*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1393*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1394*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1395*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1396*4bdc9457SAndroid Build Coastguard Worker       }
1397*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1398*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1399*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1400*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1401*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1402*4bdc9457SAndroid Build Coastguard Worker       }
1403*4bdc9457SAndroid Build Coastguard Worker     }
1404*4bdc9457SAndroid Build Coastguard Worker   }
1405*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1406*4bdc9457SAndroid Build Coastguard Worker 
1407*4bdc9457SAndroid Build Coastguard Worker 
1408*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXP__SSE2_RR2_P5,negative_zero)1409*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_P5, negative_zero) {
1410*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1411*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1412*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), -0.0f);
1413*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1414*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1415*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1416*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1417*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1418*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1419*4bdc9457SAndroid Build Coastguard Worker   }
1420*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_P5,positive_zero)1421*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_P5, positive_zero) {
1422*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1423*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1424*4bdc9457SAndroid Build Coastguard Worker     std::fill(inputs.begin(), inputs.end(), +0.0f);
1425*4bdc9457SAndroid Build Coastguard Worker     xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1426*4bdc9457SAndroid Build Coastguard Worker     const float reference_output = 1.0f;
1427*4bdc9457SAndroid Build Coastguard Worker     ASSERT_EQ(reference_output, outputs[0])
1428*4bdc9457SAndroid Build Coastguard Worker       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1429*4bdc9457SAndroid Build Coastguard Worker       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1430*4bdc9457SAndroid Build Coastguard Worker       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1431*4bdc9457SAndroid Build Coastguard Worker   }
1432*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_P5,negative_saturation)1433*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_P5, negative_saturation) {
1434*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1435*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1436*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0xC2CFF1B5); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1437*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1438*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1439*4bdc9457SAndroid Build Coastguard Worker       }
1440*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1441*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1442*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x00000000);
1443*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1444*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1445*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1446*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1447*4bdc9457SAndroid Build Coastguard Worker       }
1448*4bdc9457SAndroid Build Coastguard Worker     }
1449*4bdc9457SAndroid Build Coastguard Worker   }
1450*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_P5,positive_overflow)1451*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_P5, positive_overflow) {
1452*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1453*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1454*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x42B17218); n <= UINT32_C(0x7F800000); n += kBlockSize) {
1455*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1456*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7F800000)));
1457*4bdc9457SAndroid Build Coastguard Worker       }
1458*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1459*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1460*4bdc9457SAndroid Build Coastguard Worker         const uint32_t reference_output = UINT32_C(0x7F800000);
1461*4bdc9457SAndroid Build Coastguard Worker         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1462*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1463*4bdc9457SAndroid Build Coastguard Worker           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1464*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1465*4bdc9457SAndroid Build Coastguard Worker       }
1466*4bdc9457SAndroid Build Coastguard Worker     }
1467*4bdc9457SAndroid Build Coastguard Worker   }
1468*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_P5,positive_nan)1469*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_P5, positive_nan) {
1470*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1471*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1472*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1473*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1474*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1475*4bdc9457SAndroid Build Coastguard Worker       }
1476*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1477*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1478*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1479*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1480*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1481*4bdc9457SAndroid Build Coastguard Worker       }
1482*4bdc9457SAndroid Build Coastguard Worker     }
1483*4bdc9457SAndroid Build Coastguard Worker   }
1484*4bdc9457SAndroid Build Coastguard Worker 
TEST(EXP__SSE2_RR2_P5,negative_nan)1485*4bdc9457SAndroid Build Coastguard Worker   TEST(EXP__SSE2_RR2_P5, negative_nan) {
1486*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1487*4bdc9457SAndroid Build Coastguard Worker     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1488*4bdc9457SAndroid Build Coastguard Worker     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1489*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1490*4bdc9457SAndroid Build Coastguard Worker         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1491*4bdc9457SAndroid Build Coastguard Worker       }
1492*4bdc9457SAndroid Build Coastguard Worker       xnn_math_f32_exp__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1493*4bdc9457SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < kBlockSize; i++) {
1494*4bdc9457SAndroid Build Coastguard Worker         ASSERT_TRUE(std::isnan(outputs[i]))
1495*4bdc9457SAndroid Build Coastguard Worker           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1496*4bdc9457SAndroid Build Coastguard Worker           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1497*4bdc9457SAndroid Build Coastguard Worker       }
1498*4bdc9457SAndroid Build Coastguard Worker     }
1499*4bdc9457SAndroid Build Coastguard Worker   }
1500*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1501