xref: /aosp_15_r20/external/XNNPACK/eval/f32-expm1minus.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/math-stubs.h>
24 
25 
26 constexpr int kBlockSize = 1024;
27 
28 
29 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,negative_zero)30   TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_zero) {
31     TEST_REQUIRES_ARM_NEON;
32 
33     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
34     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
35     std::fill(inputs.begin(), inputs.end(), -0.0f);
36     xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
37     const float reference_output = 0.0f;
38     ASSERT_EQ(reference_output, outputs[0])
39       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
40       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
41       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
42   }
43 
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,negative_saturation)44   TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_saturation) {
45     TEST_REQUIRES_ARM_NEON;
46 
47     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
50       for (uint32_t i = 0; i < kBlockSize; i++) {
51         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
52       }
53       xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
54       for (uint32_t i = 0; i < kBlockSize; i++) {
55         const float reference_output = -1.0f;
56         ASSERT_EQ(reference_output, outputs[i])
57           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
58           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
59           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
60       }
61     }
62   }
63 
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,positive_nan)64   TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, positive_nan) {
65     TEST_REQUIRES_ARM_NEON;
66 
67     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
68     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
69     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
70       for (uint32_t i = 0; i < kBlockSize; i++) {
71         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
72       }
73       xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
74       for (uint32_t i = 0; i < kBlockSize; i++) {
75         ASSERT_TRUE(std::isnan(outputs[i]))
76           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
77           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
78       }
79     }
80   }
81 
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,negative_nan)82   TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_nan) {
83     TEST_REQUIRES_ARM_NEON;
84 
85     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
86     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
87     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
88       for (uint32_t i = 0; i < kBlockSize; i++) {
89         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
90       }
91       xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
92       for (uint32_t i = 0; i < kBlockSize; i++) {
93         ASSERT_TRUE(std::isnan(outputs[i]))
94           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
95           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
96       }
97     }
98   }
99 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
100 
101 
102 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEON_RR2_P6,negative_zero)103   TEST(EXPM1MINUS__NEON_RR2_P6, negative_zero) {
104     TEST_REQUIRES_ARM_NEON;
105 
106     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
107     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
108     std::fill(inputs.begin(), inputs.end(), -0.0f);
109     xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
110     const float reference_output = 0.0f;
111     ASSERT_EQ(reference_output, outputs[0])
112       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
113       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
114       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
115   }
116 
TEST(EXPM1MINUS__NEON_RR2_P6,negative_saturation)117   TEST(EXPM1MINUS__NEON_RR2_P6, negative_saturation) {
118     TEST_REQUIRES_ARM_NEON;
119 
120     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
121     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
122     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
123       for (uint32_t i = 0; i < kBlockSize; i++) {
124         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
125       }
126       xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
127       for (uint32_t i = 0; i < kBlockSize; i++) {
128         const float reference_output = -1.0f;
129         ASSERT_EQ(reference_output, outputs[i])
130           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
131           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
132           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
133       }
134     }
135   }
136 
TEST(EXPM1MINUS__NEON_RR2_P6,positive_nan)137   TEST(EXPM1MINUS__NEON_RR2_P6, positive_nan) {
138     TEST_REQUIRES_ARM_NEON;
139 
140     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
141     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
142     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
143       for (uint32_t i = 0; i < kBlockSize; i++) {
144         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
145       }
146       xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
147       for (uint32_t i = 0; i < kBlockSize; i++) {
148         ASSERT_TRUE(std::isnan(outputs[i]))
149           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
150           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
151       }
152     }
153   }
154 
TEST(EXPM1MINUS__NEON_RR2_P6,negative_nan)155   TEST(EXPM1MINUS__NEON_RR2_P6, negative_nan) {
156     TEST_REQUIRES_ARM_NEON;
157 
158     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
159     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
160     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
161       for (uint32_t i = 0; i < kBlockSize; i++) {
162         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
163       }
164       xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
165       for (uint32_t i = 0; i < kBlockSize; i++) {
166         ASSERT_TRUE(std::isnan(outputs[i]))
167           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
168           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
169       }
170     }
171   }
172 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
173 
174 
175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,negative_zero)176   TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_zero) {
177     TEST_REQUIRES_ARM_NEON_FMA;
178 
179     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
180     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
181     std::fill(inputs.begin(), inputs.end(), -0.0f);
182     xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
183     const float reference_output = 0.0f;
184     ASSERT_EQ(reference_output, outputs[0])
185       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
186       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
187       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
188   }
189 
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,negative_saturation)190   TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_saturation) {
191     TEST_REQUIRES_ARM_NEON_FMA;
192 
193     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
194     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
195     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
196       for (uint32_t i = 0; i < kBlockSize; i++) {
197         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
198       }
199       xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
200       for (uint32_t i = 0; i < kBlockSize; i++) {
201         const float reference_output = -1.0f;
202         ASSERT_EQ(reference_output, outputs[i])
203           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
204           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
205           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
206       }
207     }
208   }
209 
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,positive_nan)210   TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, positive_nan) {
211     TEST_REQUIRES_ARM_NEON_FMA;
212 
213     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
214     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
215     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
216       for (uint32_t i = 0; i < kBlockSize; i++) {
217         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
218       }
219       xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
220       for (uint32_t i = 0; i < kBlockSize; i++) {
221         ASSERT_TRUE(std::isnan(outputs[i]))
222           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
223           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
224       }
225     }
226   }
227 
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,negative_nan)228   TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_nan) {
229     TEST_REQUIRES_ARM_NEON_FMA;
230 
231     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
232     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
233     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
234       for (uint32_t i = 0; i < kBlockSize; i++) {
235         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
236       }
237       xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
238       for (uint32_t i = 0; i < kBlockSize; i++) {
239         ASSERT_TRUE(std::isnan(outputs[i]))
240           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
241           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
242       }
243     }
244   }
245 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
246 
247 
248 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEONFMA_RR1_P6,negative_zero)249   TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_zero) {
250     TEST_REQUIRES_ARM_NEON_FMA;
251 
252     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
253     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
254     std::fill(inputs.begin(), inputs.end(), -0.0f);
255     xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
256     const float reference_output = 0.0f;
257     ASSERT_EQ(reference_output, outputs[0])
258       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
259       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
260       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
261   }
262 
TEST(EXPM1MINUS__NEONFMA_RR1_P6,negative_saturation)263   TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_saturation) {
264     TEST_REQUIRES_ARM_NEON_FMA;
265 
266     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
267     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
268     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
269       for (uint32_t i = 0; i < kBlockSize; i++) {
270         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
271       }
272       xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
273       for (uint32_t i = 0; i < kBlockSize; i++) {
274         const float reference_output = -1.0f;
275         ASSERT_EQ(reference_output, outputs[i])
276           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
277           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
278           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
279       }
280     }
281   }
282 
TEST(EXPM1MINUS__NEONFMA_RR1_P6,positive_nan)283   TEST(EXPM1MINUS__NEONFMA_RR1_P6, positive_nan) {
284     TEST_REQUIRES_ARM_NEON_FMA;
285 
286     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
287     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
288     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
289       for (uint32_t i = 0; i < kBlockSize; i++) {
290         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
291       }
292       xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
293       for (uint32_t i = 0; i < kBlockSize; i++) {
294         ASSERT_TRUE(std::isnan(outputs[i]))
295           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
296           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
297       }
298     }
299   }
300 
TEST(EXPM1MINUS__NEONFMA_RR1_P6,negative_nan)301   TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_nan) {
302     TEST_REQUIRES_ARM_NEON_FMA;
303 
304     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
305     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
306     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
307       for (uint32_t i = 0; i < kBlockSize; i++) {
308         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
309       }
310       xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311       for (uint32_t i = 0; i < kBlockSize; i++) {
312         ASSERT_TRUE(std::isnan(outputs[i]))
313           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
314           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
315       }
316     }
317   }
318 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
319 
320 
321 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,negative_zero)322   TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_zero) {
323     TEST_REQUIRES_X86_AVX512F;
324 
325     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
326     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
327     std::fill(inputs.begin(), inputs.end(), -0.0f);
328     xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
329     const float reference_output = 0.0f;
330     ASSERT_EQ(reference_output, outputs[0])
331       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
332       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
333       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
334   }
335 
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,negative_saturation)336   TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_saturation) {
337     TEST_REQUIRES_X86_AVX512F;
338 
339     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
340     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
341     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
342       for (uint32_t i = 0; i < kBlockSize; i++) {
343         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
344       }
345       xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
346       for (uint32_t i = 0; i < kBlockSize; i++) {
347         const float reference_output = -1.0f;
348         ASSERT_EQ(reference_output, outputs[i])
349           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
350           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
351           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
352       }
353     }
354   }
355 
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,positive_nan)356   TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, positive_nan) {
357     TEST_REQUIRES_X86_AVX512F;
358 
359     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
360     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
361     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
362       for (uint32_t i = 0; i < kBlockSize; i++) {
363         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
364       }
365       xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
366       for (uint32_t i = 0; i < kBlockSize; i++) {
367         ASSERT_TRUE(std::isnan(outputs[i]))
368           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
369           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
370       }
371     }
372   }
373 
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,negative_nan)374   TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_nan) {
375     TEST_REQUIRES_X86_AVX512F;
376 
377     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
378     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
379     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
380       for (uint32_t i = 0; i < kBlockSize; i++) {
381         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
382       }
383       xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
384       for (uint32_t i = 0; i < kBlockSize; i++) {
385         ASSERT_TRUE(std::isnan(outputs[i]))
386           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
387           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
388       }
389     }
390   }
391 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
392 
393 
394 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX512F_RR1_P6,negative_zero)395   TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_zero) {
396     TEST_REQUIRES_X86_AVX512F;
397 
398     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
399     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
400     std::fill(inputs.begin(), inputs.end(), -0.0f);
401     xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
402     const float reference_output = 0.0f;
403     ASSERT_EQ(reference_output, outputs[0])
404       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
405       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
406       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
407   }
408 
TEST(EXPM1MINUS__AVX512F_RR1_P6,negative_saturation)409   TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_saturation) {
410     TEST_REQUIRES_X86_AVX512F;
411 
412     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
413     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
414     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
415       for (uint32_t i = 0; i < kBlockSize; i++) {
416         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
417       }
418       xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
419       for (uint32_t i = 0; i < kBlockSize; i++) {
420         const float reference_output = -1.0f;
421         ASSERT_EQ(reference_output, outputs[i])
422           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
423           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
424           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
425       }
426     }
427   }
428 
TEST(EXPM1MINUS__AVX512F_RR1_P6,positive_nan)429   TEST(EXPM1MINUS__AVX512F_RR1_P6, positive_nan) {
430     TEST_REQUIRES_X86_AVX512F;
431 
432     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
433     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
434     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
435       for (uint32_t i = 0; i < kBlockSize; i++) {
436         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
437       }
438       xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
439       for (uint32_t i = 0; i < kBlockSize; i++) {
440         ASSERT_TRUE(std::isnan(outputs[i]))
441           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
442           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
443       }
444     }
445   }
446 
TEST(EXPM1MINUS__AVX512F_RR1_P6,negative_nan)447   TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_nan) {
448     TEST_REQUIRES_X86_AVX512F;
449 
450     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
451     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
452     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
453       for (uint32_t i = 0; i < kBlockSize; i++) {
454         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
455       }
456       xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
457       for (uint32_t i = 0; i < kBlockSize; i++) {
458         ASSERT_TRUE(std::isnan(outputs[i]))
459           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
460           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
461       }
462     }
463   }
464 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
465 
466 
467 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,negative_zero)468   TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_zero) {
469     TEST_REQUIRES_X86_AVX2;
470 
471     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
472     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
473     std::fill(inputs.begin(), inputs.end(), -0.0f);
474     xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
475     const float reference_output = 0.0f;
476     ASSERT_EQ(reference_output, outputs[0])
477       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
478       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
479       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
480   }
481 
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,negative_saturation)482   TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_saturation) {
483     TEST_REQUIRES_X86_AVX2;
484 
485     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
486     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
487     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
488       for (uint32_t i = 0; i < kBlockSize; i++) {
489         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
490       }
491       xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
492       for (uint32_t i = 0; i < kBlockSize; i++) {
493         const float reference_output = -1.0f;
494         ASSERT_EQ(reference_output, outputs[i])
495           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
496           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
497           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
498       }
499     }
500   }
501 
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,positive_nan)502   TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, positive_nan) {
503     TEST_REQUIRES_X86_AVX2;
504 
505     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
506     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
507     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
508       for (uint32_t i = 0; i < kBlockSize; i++) {
509         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
510       }
511       xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
512       for (uint32_t i = 0; i < kBlockSize; i++) {
513         ASSERT_TRUE(std::isnan(outputs[i]))
514           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
515           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
516       }
517     }
518   }
519 
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,negative_nan)520   TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_nan) {
521     TEST_REQUIRES_X86_AVX2;
522 
523     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
524     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
525     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
526       for (uint32_t i = 0; i < kBlockSize; i++) {
527         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
528       }
529       xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
530       for (uint32_t i = 0; i < kBlockSize; i++) {
531         ASSERT_TRUE(std::isnan(outputs[i]))
532           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
533           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
534       }
535     }
536   }
537 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
538 
539 
540 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,negative_zero)541   TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_zero) {
542     TEST_REQUIRES_X86_AVX2;
543 
544     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
545     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
546     std::fill(inputs.begin(), inputs.end(), -0.0f);
547     xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
548     const float reference_output = 0.0f;
549     ASSERT_EQ(reference_output, outputs[0])
550       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
551       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
552       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
553   }
554 
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,negative_saturation)555   TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_saturation) {
556     TEST_REQUIRES_X86_AVX2;
557 
558     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
559     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
560     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
561       for (uint32_t i = 0; i < kBlockSize; i++) {
562         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
563       }
564       xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
565       for (uint32_t i = 0; i < kBlockSize; i++) {
566         const float reference_output = -1.0f;
567         ASSERT_EQ(reference_output, outputs[i])
568           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
569           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
570           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
571       }
572     }
573   }
574 
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,positive_nan)575   TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, positive_nan) {
576     TEST_REQUIRES_X86_AVX2;
577 
578     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
579     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
580     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
581       for (uint32_t i = 0; i < kBlockSize; i++) {
582         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
583       }
584       xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
585       for (uint32_t i = 0; i < kBlockSize; i++) {
586         ASSERT_TRUE(std::isnan(outputs[i]))
587           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
588           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
589       }
590     }
591   }
592 
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,negative_nan)593   TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_nan) {
594     TEST_REQUIRES_X86_AVX2;
595 
596     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
597     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
598     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
599       for (uint32_t i = 0; i < kBlockSize; i++) {
600         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
601       }
602       xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
603       for (uint32_t i = 0; i < kBlockSize; i++) {
604         ASSERT_TRUE(std::isnan(outputs[i]))
605           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
606           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
607       }
608     }
609   }
610 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
611 
612 
613 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,negative_zero)614   TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_zero) {
615     TEST_REQUIRES_X86_AVX2;
616 
617     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
618     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
619     std::fill(inputs.begin(), inputs.end(), -0.0f);
620     xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
621     const float reference_output = 0.0f;
622     ASSERT_EQ(reference_output, outputs[0])
623       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
624       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
625       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
626   }
627 
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,negative_saturation)628   TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_saturation) {
629     TEST_REQUIRES_X86_AVX2;
630 
631     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
632     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
633     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
634       for (uint32_t i = 0; i < kBlockSize; i++) {
635         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
636       }
637       xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
638       for (uint32_t i = 0; i < kBlockSize; i++) {
639         const float reference_output = -1.0f;
640         ASSERT_EQ(reference_output, outputs[i])
641           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
642           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
643           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
644       }
645     }
646   }
647 
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,positive_nan)648   TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, positive_nan) {
649     TEST_REQUIRES_X86_AVX2;
650 
651     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
652     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
653     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
654       for (uint32_t i = 0; i < kBlockSize; i++) {
655         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
656       }
657       xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
658       for (uint32_t i = 0; i < kBlockSize; i++) {
659         ASSERT_TRUE(std::isnan(outputs[i]))
660           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
661           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
662       }
663     }
664   }
665 
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,negative_nan)666   TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_nan) {
667     TEST_REQUIRES_X86_AVX2;
668 
669     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
670     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
671     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
672       for (uint32_t i = 0; i < kBlockSize; i++) {
673         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
674       }
675       xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
676       for (uint32_t i = 0; i < kBlockSize; i++) {
677         ASSERT_TRUE(std::isnan(outputs[i]))
678           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
679           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
680       }
681     }
682   }
683 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
684 
685 
686 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_P6,negative_zero)687   TEST(EXPM1MINUS__AVX2_RR1_P6, negative_zero) {
688     TEST_REQUIRES_X86_AVX2;
689 
690     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
691     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
692     std::fill(inputs.begin(), inputs.end(), -0.0f);
693     xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
694     const float reference_output = 0.0f;
695     ASSERT_EQ(reference_output, outputs[0])
696       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
697       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
698       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
699   }
700 
TEST(EXPM1MINUS__AVX2_RR1_P6,negative_saturation)701   TEST(EXPM1MINUS__AVX2_RR1_P6, negative_saturation) {
702     TEST_REQUIRES_X86_AVX2;
703 
704     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
705     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
706     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
707       for (uint32_t i = 0; i < kBlockSize; i++) {
708         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
709       }
710       xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
711       for (uint32_t i = 0; i < kBlockSize; i++) {
712         const float reference_output = -1.0f;
713         ASSERT_EQ(reference_output, outputs[i])
714           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
715           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
716           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
717       }
718     }
719   }
720 
TEST(EXPM1MINUS__AVX2_RR1_P6,positive_nan)721   TEST(EXPM1MINUS__AVX2_RR1_P6, positive_nan) {
722     TEST_REQUIRES_X86_AVX2;
723 
724     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
725     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
726     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
727       for (uint32_t i = 0; i < kBlockSize; i++) {
728         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
729       }
730       xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
731       for (uint32_t i = 0; i < kBlockSize; i++) {
732         ASSERT_TRUE(std::isnan(outputs[i]))
733           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
734           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
735       }
736     }
737   }
738 
TEST(EXPM1MINUS__AVX2_RR1_P6,negative_nan)739   TEST(EXPM1MINUS__AVX2_RR1_P6, negative_nan) {
740     TEST_REQUIRES_X86_AVX2;
741 
742     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
743     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
744     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
745       for (uint32_t i = 0; i < kBlockSize; i++) {
746         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
747       }
748       xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
749       for (uint32_t i = 0; i < kBlockSize; i++) {
750         ASSERT_TRUE(std::isnan(outputs[i]))
751           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
752           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
753       }
754     }
755   }
756 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
757 
758 
759 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,negative_zero)760   TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_zero) {
761     TEST_REQUIRES_X86_AVX;
762 
763     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
764     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
765     std::fill(inputs.begin(), inputs.end(), -0.0f);
766     xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
767     const float reference_output = 0.0f;
768     ASSERT_EQ(reference_output, outputs[0])
769       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
770       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
771       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
772   }
773 
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,negative_saturation)774   TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_saturation) {
775     TEST_REQUIRES_X86_AVX;
776 
777     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
778     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
779     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
780       for (uint32_t i = 0; i < kBlockSize; i++) {
781         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
782       }
783       xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
784       for (uint32_t i = 0; i < kBlockSize; i++) {
785         const float reference_output = -1.0f;
786         ASSERT_EQ(reference_output, outputs[i])
787           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
788           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
789           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
790       }
791     }
792   }
793 
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,positive_nan)794   TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, positive_nan) {
795     TEST_REQUIRES_X86_AVX;
796 
797     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
798     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
799     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
800       for (uint32_t i = 0; i < kBlockSize; i++) {
801         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
802       }
803       xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
804       for (uint32_t i = 0; i < kBlockSize; i++) {
805         ASSERT_TRUE(std::isnan(outputs[i]))
806           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
807           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
808       }
809     }
810   }
811 
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,negative_nan)812   TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_nan) {
813     TEST_REQUIRES_X86_AVX;
814 
815     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
816     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
817     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
818       for (uint32_t i = 0; i < kBlockSize; i++) {
819         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
820       }
821       xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
822       for (uint32_t i = 0; i < kBlockSize; i++) {
823         ASSERT_TRUE(std::isnan(outputs[i]))
824           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
825           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
826       }
827     }
828   }
829 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
830 
831 
832 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,negative_zero)833   TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_zero) {
834     TEST_REQUIRES_X86_AVX;
835 
836     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
837     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
838     std::fill(inputs.begin(), inputs.end(), -0.0f);
839     xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
840     const float reference_output = 0.0f;
841     ASSERT_EQ(reference_output, outputs[0])
842       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
843       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
844       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
845   }
846 
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,negative_saturation)847   TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_saturation) {
848     TEST_REQUIRES_X86_AVX;
849 
850     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
851     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
852     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
853       for (uint32_t i = 0; i < kBlockSize; i++) {
854         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
855       }
856       xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
857       for (uint32_t i = 0; i < kBlockSize; i++) {
858         const float reference_output = -1.0f;
859         ASSERT_EQ(reference_output, outputs[i])
860           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
861           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
862           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
863       }
864     }
865   }
866 
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,positive_nan)867   TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, positive_nan) {
868     TEST_REQUIRES_X86_AVX;
869 
870     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
871     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
872     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
873       for (uint32_t i = 0; i < kBlockSize; i++) {
874         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
875       }
876       xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
877       for (uint32_t i = 0; i < kBlockSize; i++) {
878         ASSERT_TRUE(std::isnan(outputs[i]))
879           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
880           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
881       }
882     }
883   }
884 
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,negative_nan)885   TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_nan) {
886     TEST_REQUIRES_X86_AVX;
887 
888     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
889     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
890     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
891       for (uint32_t i = 0; i < kBlockSize; i++) {
892         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
893       }
894       xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
895       for (uint32_t i = 0; i < kBlockSize; i++) {
896         ASSERT_TRUE(std::isnan(outputs[i]))
897           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
898           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
899       }
900     }
901   }
902 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
903 
904 
905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX_RR2_P6,negative_zero)906   TEST(EXPM1MINUS__AVX_RR2_P6, negative_zero) {
907     TEST_REQUIRES_X86_AVX;
908 
909     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
910     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
911     std::fill(inputs.begin(), inputs.end(), -0.0f);
912     xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
913     const float reference_output = 0.0f;
914     ASSERT_EQ(reference_output, outputs[0])
915       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
916       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
917       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
918   }
919 
TEST(EXPM1MINUS__AVX_RR2_P6,negative_saturation)920   TEST(EXPM1MINUS__AVX_RR2_P6, negative_saturation) {
921     TEST_REQUIRES_X86_AVX;
922 
923     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
924     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
925     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
926       for (uint32_t i = 0; i < kBlockSize; i++) {
927         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
928       }
929       xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
930       for (uint32_t i = 0; i < kBlockSize; i++) {
931         const float reference_output = -1.0f;
932         ASSERT_EQ(reference_output, outputs[i])
933           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
934           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
935           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
936       }
937     }
938   }
939 
TEST(EXPM1MINUS__AVX_RR2_P6,positive_nan)940   TEST(EXPM1MINUS__AVX_RR2_P6, positive_nan) {
941     TEST_REQUIRES_X86_AVX;
942 
943     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
944     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
945     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
946       for (uint32_t i = 0; i < kBlockSize; i++) {
947         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
948       }
949       xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
950       for (uint32_t i = 0; i < kBlockSize; i++) {
951         ASSERT_TRUE(std::isnan(outputs[i]))
952           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
953           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
954       }
955     }
956   }
957 
TEST(EXPM1MINUS__AVX_RR2_P6,negative_nan)958   TEST(EXPM1MINUS__AVX_RR2_P6, negative_nan) {
959     TEST_REQUIRES_X86_AVX;
960 
961     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
962     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
963     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
964       for (uint32_t i = 0; i < kBlockSize; i++) {
965         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
966       }
967       xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
968       for (uint32_t i = 0; i < kBlockSize; i++) {
969         ASSERT_TRUE(std::isnan(outputs[i]))
970           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
971           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
972       }
973     }
974   }
975 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
976 
977 
978 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,negative_zero)979   TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_zero) {
980     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
981     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
982     std::fill(inputs.begin(), inputs.end(), -0.0f);
983     xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
984     const float reference_output = 0.0f;
985     ASSERT_EQ(reference_output, outputs[0])
986       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
987       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
988       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
989   }
990 
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,negative_saturation)991   TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_saturation) {
992     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
993     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
994     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
995       for (uint32_t i = 0; i < kBlockSize; i++) {
996         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
997       }
998       xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
999       for (uint32_t i = 0; i < kBlockSize; i++) {
1000         const float reference_output = -1.0f;
1001         ASSERT_EQ(reference_output, outputs[i])
1002           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1003           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1004           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1005       }
1006     }
1007   }
1008 
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,positive_nan)1009   TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, positive_nan) {
1010     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1011     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1012     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1013       for (uint32_t i = 0; i < kBlockSize; i++) {
1014         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1015       }
1016       xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1017       for (uint32_t i = 0; i < kBlockSize; i++) {
1018         ASSERT_TRUE(std::isnan(outputs[i]))
1019           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1020           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1021       }
1022     }
1023   }
1024 
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,negative_nan)1025   TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_nan) {
1026     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1027     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1028     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1029       for (uint32_t i = 0; i < kBlockSize; i++) {
1030         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1031       }
1032       xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1033       for (uint32_t i = 0; i < kBlockSize; i++) {
1034         ASSERT_TRUE(std::isnan(outputs[i]))
1035           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1036           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1037       }
1038     }
1039   }
1040 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1041 
1042 
1043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__SSE2_RR2_P6,negative_zero)1044   TEST(EXPM1MINUS__SSE2_RR2_P6, negative_zero) {
1045     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1046     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1047     std::fill(inputs.begin(), inputs.end(), -0.0f);
1048     xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1049     const float reference_output = 0.0f;
1050     ASSERT_EQ(reference_output, outputs[0])
1051       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1052       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1053       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1054   }
1055 
TEST(EXPM1MINUS__SSE2_RR2_P6,negative_saturation)1056   TEST(EXPM1MINUS__SSE2_RR2_P6, negative_saturation) {
1057     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1058     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1059     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1060       for (uint32_t i = 0; i < kBlockSize; i++) {
1061         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1062       }
1063       xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1064       for (uint32_t i = 0; i < kBlockSize; i++) {
1065         const float reference_output = -1.0f;
1066         ASSERT_EQ(reference_output, outputs[i])
1067           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1068           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1069           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1070       }
1071     }
1072   }
1073 
TEST(EXPM1MINUS__SSE2_RR2_P6,positive_nan)1074   TEST(EXPM1MINUS__SSE2_RR2_P6, positive_nan) {
1075     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1076     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1077     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1078       for (uint32_t i = 0; i < kBlockSize; i++) {
1079         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1080       }
1081       xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1082       for (uint32_t i = 0; i < kBlockSize; i++) {
1083         ASSERT_TRUE(std::isnan(outputs[i]))
1084           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1085           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1086       }
1087     }
1088   }
1089 
TEST(EXPM1MINUS__SSE2_RR2_P6,negative_nan)1090   TEST(EXPM1MINUS__SSE2_RR2_P6, negative_nan) {
1091     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1092     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1093     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1094       for (uint32_t i = 0; i < kBlockSize; i++) {
1095         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1096       }
1097       xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1098       for (uint32_t i = 0; i < kBlockSize; i++) {
1099         ASSERT_TRUE(std::isnan(outputs[i]))
1100           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1101           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1102       }
1103     }
1104   }
1105 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1106 
1107 
1108 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,negative_zero)1109   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_zero) {
1110     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1111     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1112     std::fill(inputs.begin(), inputs.end(), -0.0f);
1113     xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1114     const float reference_output = 0.0f;
1115     ASSERT_EQ(reference_output, outputs[0])
1116       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1117       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1118       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1119   }
1120 
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,negative_saturation)1121   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_saturation) {
1122     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1123     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1124     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1125       for (uint32_t i = 0; i < kBlockSize; i++) {
1126         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1127       }
1128       xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1129       for (uint32_t i = 0; i < kBlockSize; i++) {
1130         const float reference_output = -1.0f;
1131         ASSERT_EQ(reference_output, outputs[i])
1132           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1133           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1134           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1135       }
1136     }
1137   }
1138 
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,positive_nan)1139   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, positive_nan) {
1140     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1141     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1142     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1143       for (uint32_t i = 0; i < kBlockSize; i++) {
1144         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1145       }
1146       xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1147       for (uint32_t i = 0; i < kBlockSize; i++) {
1148         ASSERT_TRUE(std::isnan(outputs[i]))
1149           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1150           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1151       }
1152     }
1153   }
1154 
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,negative_nan)1155   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_nan) {
1156     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1157     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1158     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1159       for (uint32_t i = 0; i < kBlockSize; i++) {
1160         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1161       }
1162       xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1163       for (uint32_t i = 0; i < kBlockSize; i++) {
1164         ASSERT_TRUE(std::isnan(outputs[i]))
1165           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1166           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1167       }
1168     }
1169   }
1170 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1171 
1172 
1173 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,negative_zero)1174   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_zero) {
1175     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1176     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1177     std::fill(inputs.begin(), inputs.end(), -0.0f);
1178     xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1179     const float reference_output = 0.0f;
1180     ASSERT_EQ(reference_output, outputs[0])
1181       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1182       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1183       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1184   }
1185 
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,negative_saturation)1186   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_saturation) {
1187     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1188     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1189     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1190       for (uint32_t i = 0; i < kBlockSize; i++) {
1191         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1192       }
1193       xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1194       for (uint32_t i = 0; i < kBlockSize; i++) {
1195         const float reference_output = -1.0f;
1196         ASSERT_EQ(reference_output, outputs[i])
1197           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1198           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1199           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1200       }
1201     }
1202   }
1203 
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,positive_nan)1204   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, positive_nan) {
1205     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1206     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1207     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1208       for (uint32_t i = 0; i < kBlockSize; i++) {
1209         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1210       }
1211       xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1212       for (uint32_t i = 0; i < kBlockSize; i++) {
1213         ASSERT_TRUE(std::isnan(outputs[i]))
1214           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1215           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1216       }
1217     }
1218   }
1219 
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,negative_nan)1220   TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_nan) {
1221     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1222     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1223     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1224       for (uint32_t i = 0; i < kBlockSize; i++) {
1225         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1226       }
1227       xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1228       for (uint32_t i = 0; i < kBlockSize; i++) {
1229         ASSERT_TRUE(std::isnan(outputs[i]))
1230           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1231           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1232       }
1233     }
1234   }
1235 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1236 
1237 
1238 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,negative_zero)1239   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_zero) {
1240     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1241     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1242     std::fill(inputs.begin(), inputs.end(), -0.0f);
1243     xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1244     const float reference_output = 0.0f;
1245     ASSERT_EQ(reference_output, outputs[0])
1246       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1247       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1248       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1249   }
1250 
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,negative_saturation)1251   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_saturation) {
1252     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1253     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1254     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1255       for (uint32_t i = 0; i < kBlockSize; i++) {
1256         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1257       }
1258       xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1259       for (uint32_t i = 0; i < kBlockSize; i++) {
1260         const float reference_output = -1.0f;
1261         ASSERT_EQ(reference_output, outputs[i])
1262           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1263           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1264           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1265       }
1266     }
1267   }
1268 
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,positive_nan)1269   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, positive_nan) {
1270     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1271     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1272     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1273       for (uint32_t i = 0; i < kBlockSize; i++) {
1274         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1275       }
1276       xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1277       for (uint32_t i = 0; i < kBlockSize; i++) {
1278         ASSERT_TRUE(std::isnan(outputs[i]))
1279           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1280           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1281       }
1282     }
1283   }
1284 
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,negative_nan)1285   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_nan) {
1286     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1287     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1288     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1289       for (uint32_t i = 0; i < kBlockSize; i++) {
1290         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1291       }
1292       xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1293       for (uint32_t i = 0; i < kBlockSize; i++) {
1294         ASSERT_TRUE(std::isnan(outputs[i]))
1295           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1296           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1297       }
1298     }
1299   }
1300 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1301 
1302 
1303 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,negative_zero)1304   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_zero) {
1305     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1306     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1307     std::fill(inputs.begin(), inputs.end(), -0.0f);
1308     xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1309     const float reference_output = 0.0f;
1310     ASSERT_EQ(reference_output, outputs[0])
1311       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1312       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1313       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1314   }
1315 
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,negative_saturation)1316   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_saturation) {
1317     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1318     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1319     for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1320       for (uint32_t i = 0; i < kBlockSize; i++) {
1321         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1322       }
1323       xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1324       for (uint32_t i = 0; i < kBlockSize; i++) {
1325         const float reference_output = -1.0f;
1326         ASSERT_EQ(reference_output, outputs[i])
1327           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1328           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1329           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1330       }
1331     }
1332   }
1333 
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,positive_nan)1334   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, positive_nan) {
1335     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1336     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1337     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1338       for (uint32_t i = 0; i < kBlockSize; i++) {
1339         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1340       }
1341       xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1342       for (uint32_t i = 0; i < kBlockSize; i++) {
1343         ASSERT_TRUE(std::isnan(outputs[i]))
1344           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1345           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1346       }
1347     }
1348   }
1349 
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,negative_nan)1350   TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_nan) {
1351     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1352     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1353     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1354       for (uint32_t i = 0; i < kBlockSize; i++) {
1355         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1356       }
1357       xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1358       for (uint32_t i = 0; i < kBlockSize; i++) {
1359         ASSERT_TRUE(std::isnan(outputs[i]))
1360           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1361           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1362       }
1363     }
1364   }
1365 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1366 
1367 
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,negative_zero)1368 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_zero) {
1369   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1370   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1371   std::fill(inputs.begin(), inputs.end(), -0.0f);
1372   xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1373   const float reference_output = 0.0f;
1374   ASSERT_EQ(reference_output, outputs[0])
1375     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1376     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1377     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1378 }
1379 
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,negative_saturation)1380 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_saturation) {
1381   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1382   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1383   for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1384     for (uint32_t i = 0; i < kBlockSize; i++) {
1385       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1386     }
1387     xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1388     for (uint32_t i = 0; i < kBlockSize; i++) {
1389       const float reference_output = -1.0f;
1390       ASSERT_EQ(reference_output, outputs[i])
1391         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1392         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1393         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1394     }
1395   }
1396 }
1397 
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,positive_nan)1398 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, positive_nan) {
1399   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1400   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1401   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1402     for (uint32_t i = 0; i < kBlockSize; i++) {
1403       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1404     }
1405     xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1406     for (uint32_t i = 0; i < kBlockSize; i++) {
1407       ASSERT_TRUE(std::isnan(outputs[i]))
1408         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1409         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1410     }
1411   }
1412 }
1413 
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,negative_nan)1414 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_nan) {
1415   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1416   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1417   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1418     for (uint32_t i = 0; i < kBlockSize; i++) {
1419       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1420     }
1421     xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1422     for (uint32_t i = 0; i < kBlockSize; i++) {
1423       ASSERT_TRUE(std::isnan(outputs[i]))
1424         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1425         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1426     }
1427   }
1428 }
1429 
1430 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,negative_zero)1431 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_zero) {
1432   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1433   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1434   std::fill(inputs.begin(), inputs.end(), -0.0f);
1435   xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1436   const float reference_output = 0.0f;
1437   ASSERT_EQ(reference_output, outputs[0])
1438     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1439     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1440     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1441 }
1442 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,negative_saturation)1443 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_saturation) {
1444   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1445   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1446   for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1447     for (uint32_t i = 0; i < kBlockSize; i++) {
1448       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1449     }
1450     xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1451     for (uint32_t i = 0; i < kBlockSize; i++) {
1452       const float reference_output = -1.0f;
1453       ASSERT_EQ(reference_output, outputs[i])
1454         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1455         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1456         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1457     }
1458   }
1459 }
1460 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,positive_nan)1461 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, positive_nan) {
1462   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1463   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1464   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1465     for (uint32_t i = 0; i < kBlockSize; i++) {
1466       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1467     }
1468     xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1469     for (uint32_t i = 0; i < kBlockSize; i++) {
1470       ASSERT_TRUE(std::isnan(outputs[i]))
1471         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1472         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1473     }
1474   }
1475 }
1476 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,negative_nan)1477 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_nan) {
1478   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1479   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1480   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1481     for (uint32_t i = 0; i < kBlockSize; i++) {
1482       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1483     }
1484     xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1485     for (uint32_t i = 0; i < kBlockSize; i++) {
1486       ASSERT_TRUE(std::isnan(outputs[i]))
1487         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1488         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1489     }
1490   }
1491 }
1492 
1493 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,negative_zero)1494 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_zero) {
1495   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1496   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1497   std::fill(inputs.begin(), inputs.end(), -0.0f);
1498   xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1499   const float reference_output = 0.0f;
1500   ASSERT_EQ(reference_output, outputs[0])
1501     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1502     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1503     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1504 }
1505 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,negative_saturation)1506 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_saturation) {
1507   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1508   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1509   for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1510     for (uint32_t i = 0; i < kBlockSize; i++) {
1511       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1512     }
1513     xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1514     for (uint32_t i = 0; i < kBlockSize; i++) {
1515       const float reference_output = -1.0f;
1516       ASSERT_EQ(reference_output, outputs[i])
1517         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1518         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1519         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1520     }
1521   }
1522 }
1523 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,positive_nan)1524 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, positive_nan) {
1525   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1526   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1527   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1528     for (uint32_t i = 0; i < kBlockSize; i++) {
1529       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1530     }
1531     xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1532     for (uint32_t i = 0; i < kBlockSize; i++) {
1533       ASSERT_TRUE(std::isnan(outputs[i]))
1534         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1535         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1536     }
1537   }
1538 }
1539 
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,negative_nan)1540 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_nan) {
1541   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1542   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1543   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1544     for (uint32_t i = 0; i < kBlockSize; i++) {
1545       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1546     }
1547     xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1548     for (uint32_t i = 0; i < kBlockSize; i++) {
1549       ASSERT_TRUE(std::isnan(outputs[i]))
1550         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1551         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1552     }
1553   }
1554 }
1555 
1556 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,negative_zero)1557 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_zero) {
1558   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1559   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1560   std::fill(inputs.begin(), inputs.end(), -0.0f);
1561   xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1562   const float reference_output = 0.0f;
1563   ASSERT_EQ(reference_output, outputs[0])
1564     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1565     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1566     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1567 }
1568 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,negative_saturation)1569 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_saturation) {
1570   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1571   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1572   for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1573     for (uint32_t i = 0; i < kBlockSize; i++) {
1574       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1575     }
1576     xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1577     for (uint32_t i = 0; i < kBlockSize; i++) {
1578       const float reference_output = -1.0f;
1579       ASSERT_EQ(reference_output, outputs[i])
1580         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1581         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1582         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1583     }
1584   }
1585 }
1586 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,positive_nan)1587 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, positive_nan) {
1588   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1589   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1590   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1591     for (uint32_t i = 0; i < kBlockSize; i++) {
1592       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1593     }
1594     xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1595     for (uint32_t i = 0; i < kBlockSize; i++) {
1596       ASSERT_TRUE(std::isnan(outputs[i]))
1597         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1598         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1599     }
1600   }
1601 }
1602 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,negative_nan)1603 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_nan) {
1604   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1605   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1606   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1607     for (uint32_t i = 0; i < kBlockSize; i++) {
1608       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1609     }
1610     xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1611     for (uint32_t i = 0; i < kBlockSize; i++) {
1612       ASSERT_TRUE(std::isnan(outputs[i]))
1613         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1614         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1615     }
1616   }
1617 }
1618 
1619 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,negative_zero)1620 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_zero) {
1621   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1622   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1623   std::fill(inputs.begin(), inputs.end(), -0.0f);
1624   xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1625   const float reference_output = 0.0f;
1626   ASSERT_EQ(reference_output, outputs[0])
1627     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1628     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1629     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1630 }
1631 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,negative_saturation)1632 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_saturation) {
1633   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1634   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1635   for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1636     for (uint32_t i = 0; i < kBlockSize; i++) {
1637       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1638     }
1639     xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1640     for (uint32_t i = 0; i < kBlockSize; i++) {
1641       const float reference_output = -1.0f;
1642       ASSERT_EQ(reference_output, outputs[i])
1643         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1644         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1645         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1646     }
1647   }
1648 }
1649 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,positive_nan)1650 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, positive_nan) {
1651   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1652   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1653   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1654     for (uint32_t i = 0; i < kBlockSize; i++) {
1655       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1656     }
1657     xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1658     for (uint32_t i = 0; i < kBlockSize; i++) {
1659       ASSERT_TRUE(std::isnan(outputs[i]))
1660         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1661         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1662     }
1663   }
1664 }
1665 
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,negative_nan)1666 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_nan) {
1667   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1668   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1669   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1670     for (uint32_t i = 0; i < kBlockSize; i++) {
1671       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1672     }
1673     xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1674     for (uint32_t i = 0; i < kBlockSize; i++) {
1675       ASSERT_TRUE(std::isnan(outputs[i]))
1676         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1677         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1678     }
1679   }
1680 }
1681 
1682 
TEST(EXPM1MINUS__SCALAR_RR2_P5,negative_zero)1683 TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_zero) {
1684   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1685   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1686   std::fill(inputs.begin(), inputs.end(), -0.0f);
1687   xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1688   const float reference_output = 0.0f;
1689   ASSERT_EQ(reference_output, outputs[0])
1690     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1691     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1692     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1693 }
1694 
TEST(EXPM1MINUS__SCALAR_RR2_P5,negative_saturation)1695 TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_saturation) {
1696   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1697   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1698   for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1699     for (uint32_t i = 0; i < kBlockSize; i++) {
1700       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1701     }
1702     xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1703     for (uint32_t i = 0; i < kBlockSize; i++) {
1704       const float reference_output = -1.0f;
1705       ASSERT_EQ(reference_output, outputs[i])
1706         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1707         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1708         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1709     }
1710   }
1711 }
1712 
TEST(EXPM1MINUS__SCALAR_RR2_P5,positive_nan)1713 TEST(EXPM1MINUS__SCALAR_RR2_P5, positive_nan) {
1714   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1715   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1716   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1717     for (uint32_t i = 0; i < kBlockSize; i++) {
1718       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1719     }
1720     xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1721     for (uint32_t i = 0; i < kBlockSize; i++) {
1722       ASSERT_TRUE(std::isnan(outputs[i]))
1723         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1724         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1725     }
1726   }
1727 }
1728 
TEST(EXPM1MINUS__SCALAR_RR2_P5,negative_nan)1729 TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_nan) {
1730   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1731   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1732   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1733     for (uint32_t i = 0; i < kBlockSize; i++) {
1734       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1735     }
1736     xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1737     for (uint32_t i = 0; i < kBlockSize; i++) {
1738       ASSERT_TRUE(std::isnan(outputs[i]))
1739         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1740         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1741     }
1742   }
1743 }
1744 
1745 
TEST(EXPM1MINUS__SCALAR_RR2_P6,negative_zero)1746 TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_zero) {
1747   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1748   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1749   std::fill(inputs.begin(), inputs.end(), -0.0f);
1750   xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1751   const float reference_output = 0.0f;
1752   ASSERT_EQ(reference_output, outputs[0])
1753     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1754     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1755     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1756 }
1757 
TEST(EXPM1MINUS__SCALAR_RR2_P6,negative_saturation)1758 TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_saturation) {
1759   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1760   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1761   for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1762     for (uint32_t i = 0; i < kBlockSize; i++) {
1763       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1764     }
1765     xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1766     for (uint32_t i = 0; i < kBlockSize; i++) {
1767       const float reference_output = -1.0f;
1768       ASSERT_EQ(reference_output, outputs[i])
1769         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1770         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1771         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1772     }
1773   }
1774 }
1775 
TEST(EXPM1MINUS__SCALAR_RR2_P6,positive_nan)1776 TEST(EXPM1MINUS__SCALAR_RR2_P6, positive_nan) {
1777   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1778   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1779   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1780     for (uint32_t i = 0; i < kBlockSize; i++) {
1781       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1782     }
1783     xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1784     for (uint32_t i = 0; i < kBlockSize; i++) {
1785       ASSERT_TRUE(std::isnan(outputs[i]))
1786         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1787         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1788     }
1789   }
1790 }
1791 
TEST(EXPM1MINUS__SCALAR_RR2_P6,negative_nan)1792 TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_nan) {
1793   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1794   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1795   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1796     for (uint32_t i = 0; i < kBlockSize; i++) {
1797       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1798     }
1799     xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1800     for (uint32_t i = 0; i < kBlockSize; i++) {
1801       ASSERT_TRUE(std::isnan(outputs[i]))
1802         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1803         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1804     }
1805   }
1806 }
1807