1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/math-stubs.h>
24
25
26 constexpr int kBlockSize = 1024;
27
28
29 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,negative_zero)30 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_zero) {
31 TEST_REQUIRES_ARM_NEON;
32
33 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
34 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
35 std::fill(inputs.begin(), inputs.end(), -0.0f);
36 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
37 const float reference_output = 0.0f;
38 ASSERT_EQ(reference_output, outputs[0])
39 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
40 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
41 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
42 }
43
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,negative_saturation)44 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_saturation) {
45 TEST_REQUIRES_ARM_NEON;
46
47 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
50 for (uint32_t i = 0; i < kBlockSize; i++) {
51 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
52 }
53 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
54 for (uint32_t i = 0; i < kBlockSize; i++) {
55 const float reference_output = -1.0f;
56 ASSERT_EQ(reference_output, outputs[i])
57 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
58 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
59 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
60 }
61 }
62 }
63
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,positive_nan)64 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, positive_nan) {
65 TEST_REQUIRES_ARM_NEON;
66
67 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
68 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
69 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
70 for (uint32_t i = 0; i < kBlockSize; i++) {
71 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
72 }
73 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
74 for (uint32_t i = 0; i < kBlockSize; i++) {
75 ASSERT_TRUE(std::isnan(outputs[i]))
76 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
77 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
78 }
79 }
80 }
81
TEST(EXPM1MINUS__NEON_RR2_LUT16_P3,negative_nan)82 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_nan) {
83 TEST_REQUIRES_ARM_NEON;
84
85 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
86 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
87 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
88 for (uint32_t i = 0; i < kBlockSize; i++) {
89 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
90 }
91 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
92 for (uint32_t i = 0; i < kBlockSize; i++) {
93 ASSERT_TRUE(std::isnan(outputs[i]))
94 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
95 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
96 }
97 }
98 }
99 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
100
101
102 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEON_RR2_P6,negative_zero)103 TEST(EXPM1MINUS__NEON_RR2_P6, negative_zero) {
104 TEST_REQUIRES_ARM_NEON;
105
106 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
107 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
108 std::fill(inputs.begin(), inputs.end(), -0.0f);
109 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
110 const float reference_output = 0.0f;
111 ASSERT_EQ(reference_output, outputs[0])
112 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
113 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
114 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
115 }
116
TEST(EXPM1MINUS__NEON_RR2_P6,negative_saturation)117 TEST(EXPM1MINUS__NEON_RR2_P6, negative_saturation) {
118 TEST_REQUIRES_ARM_NEON;
119
120 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
121 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
122 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
123 for (uint32_t i = 0; i < kBlockSize; i++) {
124 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
125 }
126 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 const float reference_output = -1.0f;
129 ASSERT_EQ(reference_output, outputs[i])
130 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
131 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
132 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
133 }
134 }
135 }
136
TEST(EXPM1MINUS__NEON_RR2_P6,positive_nan)137 TEST(EXPM1MINUS__NEON_RR2_P6, positive_nan) {
138 TEST_REQUIRES_ARM_NEON;
139
140 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
141 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
142 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
143 for (uint32_t i = 0; i < kBlockSize; i++) {
144 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
145 }
146 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
147 for (uint32_t i = 0; i < kBlockSize; i++) {
148 ASSERT_TRUE(std::isnan(outputs[i]))
149 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
150 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
151 }
152 }
153 }
154
TEST(EXPM1MINUS__NEON_RR2_P6,negative_nan)155 TEST(EXPM1MINUS__NEON_RR2_P6, negative_nan) {
156 TEST_REQUIRES_ARM_NEON;
157
158 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
159 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
160 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
161 for (uint32_t i = 0; i < kBlockSize; i++) {
162 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
163 }
164 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
165 for (uint32_t i = 0; i < kBlockSize; i++) {
166 ASSERT_TRUE(std::isnan(outputs[i]))
167 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
168 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
169 }
170 }
171 }
172 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
173
174
175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,negative_zero)176 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_zero) {
177 TEST_REQUIRES_ARM_NEON_FMA;
178
179 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
180 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
181 std::fill(inputs.begin(), inputs.end(), -0.0f);
182 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
183 const float reference_output = 0.0f;
184 ASSERT_EQ(reference_output, outputs[0])
185 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
186 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
187 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
188 }
189
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,negative_saturation)190 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_saturation) {
191 TEST_REQUIRES_ARM_NEON_FMA;
192
193 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
194 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
195 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
196 for (uint32_t i = 0; i < kBlockSize; i++) {
197 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
198 }
199 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
200 for (uint32_t i = 0; i < kBlockSize; i++) {
201 const float reference_output = -1.0f;
202 ASSERT_EQ(reference_output, outputs[i])
203 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
204 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
205 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
206 }
207 }
208 }
209
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,positive_nan)210 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, positive_nan) {
211 TEST_REQUIRES_ARM_NEON_FMA;
212
213 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
214 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
215 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
216 for (uint32_t i = 0; i < kBlockSize; i++) {
217 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
218 }
219 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
220 for (uint32_t i = 0; i < kBlockSize; i++) {
221 ASSERT_TRUE(std::isnan(outputs[i]))
222 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
223 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
224 }
225 }
226 }
227
TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3,negative_nan)228 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_nan) {
229 TEST_REQUIRES_ARM_NEON_FMA;
230
231 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
232 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
233 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
234 for (uint32_t i = 0; i < kBlockSize; i++) {
235 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
236 }
237 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
238 for (uint32_t i = 0; i < kBlockSize; i++) {
239 ASSERT_TRUE(std::isnan(outputs[i]))
240 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
241 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
242 }
243 }
244 }
245 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
246
247
248 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPM1MINUS__NEONFMA_RR1_P6,negative_zero)249 TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_zero) {
250 TEST_REQUIRES_ARM_NEON_FMA;
251
252 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
253 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
254 std::fill(inputs.begin(), inputs.end(), -0.0f);
255 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
256 const float reference_output = 0.0f;
257 ASSERT_EQ(reference_output, outputs[0])
258 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
259 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
260 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
261 }
262
TEST(EXPM1MINUS__NEONFMA_RR1_P6,negative_saturation)263 TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_saturation) {
264 TEST_REQUIRES_ARM_NEON_FMA;
265
266 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
267 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
268 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
269 for (uint32_t i = 0; i < kBlockSize; i++) {
270 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
271 }
272 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
273 for (uint32_t i = 0; i < kBlockSize; i++) {
274 const float reference_output = -1.0f;
275 ASSERT_EQ(reference_output, outputs[i])
276 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
277 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
278 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
279 }
280 }
281 }
282
TEST(EXPM1MINUS__NEONFMA_RR1_P6,positive_nan)283 TEST(EXPM1MINUS__NEONFMA_RR1_P6, positive_nan) {
284 TEST_REQUIRES_ARM_NEON_FMA;
285
286 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
287 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
288 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
289 for (uint32_t i = 0; i < kBlockSize; i++) {
290 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
291 }
292 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
293 for (uint32_t i = 0; i < kBlockSize; i++) {
294 ASSERT_TRUE(std::isnan(outputs[i]))
295 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
296 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
297 }
298 }
299 }
300
TEST(EXPM1MINUS__NEONFMA_RR1_P6,negative_nan)301 TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_nan) {
302 TEST_REQUIRES_ARM_NEON_FMA;
303
304 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
305 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
306 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
307 for (uint32_t i = 0; i < kBlockSize; i++) {
308 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
309 }
310 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311 for (uint32_t i = 0; i < kBlockSize; i++) {
312 ASSERT_TRUE(std::isnan(outputs[i]))
313 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
315 }
316 }
317 }
318 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
319
320
321 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,negative_zero)322 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_zero) {
323 TEST_REQUIRES_X86_AVX512F;
324
325 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
326 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
327 std::fill(inputs.begin(), inputs.end(), -0.0f);
328 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
329 const float reference_output = 0.0f;
330 ASSERT_EQ(reference_output, outputs[0])
331 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
332 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
333 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
334 }
335
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,negative_saturation)336 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_saturation) {
337 TEST_REQUIRES_X86_AVX512F;
338
339 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
340 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
341 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
342 for (uint32_t i = 0; i < kBlockSize; i++) {
343 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
344 }
345 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
346 for (uint32_t i = 0; i < kBlockSize; i++) {
347 const float reference_output = -1.0f;
348 ASSERT_EQ(reference_output, outputs[i])
349 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
350 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
351 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
352 }
353 }
354 }
355
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,positive_nan)356 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, positive_nan) {
357 TEST_REQUIRES_X86_AVX512F;
358
359 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
360 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
361 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
362 for (uint32_t i = 0; i < kBlockSize; i++) {
363 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
364 }
365 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
366 for (uint32_t i = 0; i < kBlockSize; i++) {
367 ASSERT_TRUE(std::isnan(outputs[i]))
368 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
369 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
370 }
371 }
372 }
373
TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM,negative_nan)374 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_nan) {
375 TEST_REQUIRES_X86_AVX512F;
376
377 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
378 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
379 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
380 for (uint32_t i = 0; i < kBlockSize; i++) {
381 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
382 }
383 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
384 for (uint32_t i = 0; i < kBlockSize; i++) {
385 ASSERT_TRUE(std::isnan(outputs[i]))
386 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
387 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
388 }
389 }
390 }
391 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
392
393
394 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX512F_RR1_P6,negative_zero)395 TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_zero) {
396 TEST_REQUIRES_X86_AVX512F;
397
398 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
399 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
400 std::fill(inputs.begin(), inputs.end(), -0.0f);
401 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
402 const float reference_output = 0.0f;
403 ASSERT_EQ(reference_output, outputs[0])
404 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
405 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
406 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
407 }
408
TEST(EXPM1MINUS__AVX512F_RR1_P6,negative_saturation)409 TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_saturation) {
410 TEST_REQUIRES_X86_AVX512F;
411
412 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
413 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
414 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
415 for (uint32_t i = 0; i < kBlockSize; i++) {
416 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
417 }
418 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
419 for (uint32_t i = 0; i < kBlockSize; i++) {
420 const float reference_output = -1.0f;
421 ASSERT_EQ(reference_output, outputs[i])
422 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
423 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
424 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
425 }
426 }
427 }
428
TEST(EXPM1MINUS__AVX512F_RR1_P6,positive_nan)429 TEST(EXPM1MINUS__AVX512F_RR1_P6, positive_nan) {
430 TEST_REQUIRES_X86_AVX512F;
431
432 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
433 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
434 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
435 for (uint32_t i = 0; i < kBlockSize; i++) {
436 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
437 }
438 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
439 for (uint32_t i = 0; i < kBlockSize; i++) {
440 ASSERT_TRUE(std::isnan(outputs[i]))
441 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
442 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
443 }
444 }
445 }
446
TEST(EXPM1MINUS__AVX512F_RR1_P6,negative_nan)447 TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_nan) {
448 TEST_REQUIRES_X86_AVX512F;
449
450 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
451 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
452 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
453 for (uint32_t i = 0; i < kBlockSize; i++) {
454 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
455 }
456 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
457 for (uint32_t i = 0; i < kBlockSize; i++) {
458 ASSERT_TRUE(std::isnan(outputs[i]))
459 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
460 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
461 }
462 }
463 }
464 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
465
466
467 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,negative_zero)468 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_zero) {
469 TEST_REQUIRES_X86_AVX2;
470
471 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
472 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
473 std::fill(inputs.begin(), inputs.end(), -0.0f);
474 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
475 const float reference_output = 0.0f;
476 ASSERT_EQ(reference_output, outputs[0])
477 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
478 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
479 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
480 }
481
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,negative_saturation)482 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_saturation) {
483 TEST_REQUIRES_X86_AVX2;
484
485 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
486 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
487 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
488 for (uint32_t i = 0; i < kBlockSize; i++) {
489 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
490 }
491 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
492 for (uint32_t i = 0; i < kBlockSize; i++) {
493 const float reference_output = -1.0f;
494 ASSERT_EQ(reference_output, outputs[i])
495 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
496 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
497 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
498 }
499 }
500 }
501
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,positive_nan)502 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, positive_nan) {
503 TEST_REQUIRES_X86_AVX2;
504
505 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
506 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
507 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
508 for (uint32_t i = 0; i < kBlockSize; i++) {
509 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
510 }
511 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
512 for (uint32_t i = 0; i < kBlockSize; i++) {
513 ASSERT_TRUE(std::isnan(outputs[i]))
514 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
515 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
516 }
517 }
518 }
519
TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM,negative_nan)520 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_nan) {
521 TEST_REQUIRES_X86_AVX2;
522
523 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
524 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
525 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
526 for (uint32_t i = 0; i < kBlockSize; i++) {
527 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
528 }
529 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
530 for (uint32_t i = 0; i < kBlockSize; i++) {
531 ASSERT_TRUE(std::isnan(outputs[i]))
532 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
533 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
534 }
535 }
536 }
537 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
538
539
540 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,negative_zero)541 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_zero) {
542 TEST_REQUIRES_X86_AVX2;
543
544 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
545 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
546 std::fill(inputs.begin(), inputs.end(), -0.0f);
547 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
548 const float reference_output = 0.0f;
549 ASSERT_EQ(reference_output, outputs[0])
550 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
551 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
552 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
553 }
554
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,negative_saturation)555 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_saturation) {
556 TEST_REQUIRES_X86_AVX2;
557
558 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
559 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
560 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
561 for (uint32_t i = 0; i < kBlockSize; i++) {
562 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
563 }
564 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
565 for (uint32_t i = 0; i < kBlockSize; i++) {
566 const float reference_output = -1.0f;
567 ASSERT_EQ(reference_output, outputs[i])
568 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
569 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
570 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
571 }
572 }
573 }
574
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,positive_nan)575 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, positive_nan) {
576 TEST_REQUIRES_X86_AVX2;
577
578 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
579 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
580 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
581 for (uint32_t i = 0; i < kBlockSize; i++) {
582 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
583 }
584 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
585 for (uint32_t i = 0; i < kBlockSize; i++) {
586 ASSERT_TRUE(std::isnan(outputs[i]))
587 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
588 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
589 }
590 }
591 }
592
TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM,negative_nan)593 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_nan) {
594 TEST_REQUIRES_X86_AVX2;
595
596 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
597 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
598 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
599 for (uint32_t i = 0; i < kBlockSize; i++) {
600 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
601 }
602 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
603 for (uint32_t i = 0; i < kBlockSize; i++) {
604 ASSERT_TRUE(std::isnan(outputs[i]))
605 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
606 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
607 }
608 }
609 }
610 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
611
612
613 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,negative_zero)614 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_zero) {
615 TEST_REQUIRES_X86_AVX2;
616
617 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
618 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
619 std::fill(inputs.begin(), inputs.end(), -0.0f);
620 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
621 const float reference_output = 0.0f;
622 ASSERT_EQ(reference_output, outputs[0])
623 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
624 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
625 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
626 }
627
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,negative_saturation)628 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_saturation) {
629 TEST_REQUIRES_X86_AVX2;
630
631 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
632 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
633 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
634 for (uint32_t i = 0; i < kBlockSize; i++) {
635 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
636 }
637 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
638 for (uint32_t i = 0; i < kBlockSize; i++) {
639 const float reference_output = -1.0f;
640 ASSERT_EQ(reference_output, outputs[i])
641 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
642 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
643 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
644 }
645 }
646 }
647
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,positive_nan)648 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, positive_nan) {
649 TEST_REQUIRES_X86_AVX2;
650
651 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
652 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
653 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
654 for (uint32_t i = 0; i < kBlockSize; i++) {
655 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
656 }
657 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
658 for (uint32_t i = 0; i < kBlockSize; i++) {
659 ASSERT_TRUE(std::isnan(outputs[i]))
660 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
661 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
662 }
663 }
664 }
665
TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER,negative_nan)666 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_nan) {
667 TEST_REQUIRES_X86_AVX2;
668
669 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
670 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
671 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
672 for (uint32_t i = 0; i < kBlockSize; i++) {
673 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
674 }
675 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
676 for (uint32_t i = 0; i < kBlockSize; i++) {
677 ASSERT_TRUE(std::isnan(outputs[i]))
678 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
679 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
680 }
681 }
682 }
683 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
684
685
686 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX2_RR1_P6,negative_zero)687 TEST(EXPM1MINUS__AVX2_RR1_P6, negative_zero) {
688 TEST_REQUIRES_X86_AVX2;
689
690 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
691 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
692 std::fill(inputs.begin(), inputs.end(), -0.0f);
693 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
694 const float reference_output = 0.0f;
695 ASSERT_EQ(reference_output, outputs[0])
696 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
697 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
698 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
699 }
700
TEST(EXPM1MINUS__AVX2_RR1_P6,negative_saturation)701 TEST(EXPM1MINUS__AVX2_RR1_P6, negative_saturation) {
702 TEST_REQUIRES_X86_AVX2;
703
704 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
705 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
706 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
707 for (uint32_t i = 0; i < kBlockSize; i++) {
708 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
709 }
710 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
711 for (uint32_t i = 0; i < kBlockSize; i++) {
712 const float reference_output = -1.0f;
713 ASSERT_EQ(reference_output, outputs[i])
714 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
715 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
716 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
717 }
718 }
719 }
720
TEST(EXPM1MINUS__AVX2_RR1_P6,positive_nan)721 TEST(EXPM1MINUS__AVX2_RR1_P6, positive_nan) {
722 TEST_REQUIRES_X86_AVX2;
723
724 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
725 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
726 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
727 for (uint32_t i = 0; i < kBlockSize; i++) {
728 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
729 }
730 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
731 for (uint32_t i = 0; i < kBlockSize; i++) {
732 ASSERT_TRUE(std::isnan(outputs[i]))
733 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
734 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
735 }
736 }
737 }
738
TEST(EXPM1MINUS__AVX2_RR1_P6,negative_nan)739 TEST(EXPM1MINUS__AVX2_RR1_P6, negative_nan) {
740 TEST_REQUIRES_X86_AVX2;
741
742 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
743 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
744 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
745 for (uint32_t i = 0; i < kBlockSize; i++) {
746 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
747 }
748 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
749 for (uint32_t i = 0; i < kBlockSize; i++) {
750 ASSERT_TRUE(std::isnan(outputs[i]))
751 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
752 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
753 }
754 }
755 }
756 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
757
758
759 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,negative_zero)760 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_zero) {
761 TEST_REQUIRES_X86_AVX;
762
763 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
764 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
765 std::fill(inputs.begin(), inputs.end(), -0.0f);
766 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
767 const float reference_output = 0.0f;
768 ASSERT_EQ(reference_output, outputs[0])
769 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
770 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
771 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
772 }
773
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,negative_saturation)774 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_saturation) {
775 TEST_REQUIRES_X86_AVX;
776
777 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
778 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
779 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
780 for (uint32_t i = 0; i < kBlockSize; i++) {
781 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
782 }
783 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
784 for (uint32_t i = 0; i < kBlockSize; i++) {
785 const float reference_output = -1.0f;
786 ASSERT_EQ(reference_output, outputs[i])
787 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
788 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
789 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
790 }
791 }
792 }
793
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,positive_nan)794 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, positive_nan) {
795 TEST_REQUIRES_X86_AVX;
796
797 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
798 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
799 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
800 for (uint32_t i = 0; i < kBlockSize; i++) {
801 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
802 }
803 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
804 for (uint32_t i = 0; i < kBlockSize; i++) {
805 ASSERT_TRUE(std::isnan(outputs[i]))
806 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
807 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
808 }
809 }
810 }
811
TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM,negative_nan)812 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_nan) {
813 TEST_REQUIRES_X86_AVX;
814
815 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
816 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
817 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
818 for (uint32_t i = 0; i < kBlockSize; i++) {
819 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
820 }
821 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
822 for (uint32_t i = 0; i < kBlockSize; i++) {
823 ASSERT_TRUE(std::isnan(outputs[i]))
824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
825 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
826 }
827 }
828 }
829 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
830
831
832 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,negative_zero)833 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_zero) {
834 TEST_REQUIRES_X86_AVX;
835
836 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
837 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
838 std::fill(inputs.begin(), inputs.end(), -0.0f);
839 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
840 const float reference_output = 0.0f;
841 ASSERT_EQ(reference_output, outputs[0])
842 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
843 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
844 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
845 }
846
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,negative_saturation)847 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_saturation) {
848 TEST_REQUIRES_X86_AVX;
849
850 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
851 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
852 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
853 for (uint32_t i = 0; i < kBlockSize; i++) {
854 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
855 }
856 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
857 for (uint32_t i = 0; i < kBlockSize; i++) {
858 const float reference_output = -1.0f;
859 ASSERT_EQ(reference_output, outputs[i])
860 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
861 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
862 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
863 }
864 }
865 }
866
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,positive_nan)867 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, positive_nan) {
868 TEST_REQUIRES_X86_AVX;
869
870 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
871 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
872 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
873 for (uint32_t i = 0; i < kBlockSize; i++) {
874 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
875 }
876 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
877 for (uint32_t i = 0; i < kBlockSize; i++) {
878 ASSERT_TRUE(std::isnan(outputs[i]))
879 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
880 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
881 }
882 }
883 }
884
TEST(EXPM1MINUS__AVX_RR2_LUT16_P3,negative_nan)885 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_nan) {
886 TEST_REQUIRES_X86_AVX;
887
888 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
889 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
890 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
891 for (uint32_t i = 0; i < kBlockSize; i++) {
892 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
893 }
894 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
895 for (uint32_t i = 0; i < kBlockSize; i++) {
896 ASSERT_TRUE(std::isnan(outputs[i]))
897 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
898 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
899 }
900 }
901 }
902 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
903
904
905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__AVX_RR2_P6,negative_zero)906 TEST(EXPM1MINUS__AVX_RR2_P6, negative_zero) {
907 TEST_REQUIRES_X86_AVX;
908
909 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
910 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
911 std::fill(inputs.begin(), inputs.end(), -0.0f);
912 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
913 const float reference_output = 0.0f;
914 ASSERT_EQ(reference_output, outputs[0])
915 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
916 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
917 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
918 }
919
TEST(EXPM1MINUS__AVX_RR2_P6,negative_saturation)920 TEST(EXPM1MINUS__AVX_RR2_P6, negative_saturation) {
921 TEST_REQUIRES_X86_AVX;
922
923 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
924 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
925 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
926 for (uint32_t i = 0; i < kBlockSize; i++) {
927 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
928 }
929 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
930 for (uint32_t i = 0; i < kBlockSize; i++) {
931 const float reference_output = -1.0f;
932 ASSERT_EQ(reference_output, outputs[i])
933 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
934 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
935 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
936 }
937 }
938 }
939
TEST(EXPM1MINUS__AVX_RR2_P6,positive_nan)940 TEST(EXPM1MINUS__AVX_RR2_P6, positive_nan) {
941 TEST_REQUIRES_X86_AVX;
942
943 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
944 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
945 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
946 for (uint32_t i = 0; i < kBlockSize; i++) {
947 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
948 }
949 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
950 for (uint32_t i = 0; i < kBlockSize; i++) {
951 ASSERT_TRUE(std::isnan(outputs[i]))
952 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
953 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
954 }
955 }
956 }
957
TEST(EXPM1MINUS__AVX_RR2_P6,negative_nan)958 TEST(EXPM1MINUS__AVX_RR2_P6, negative_nan) {
959 TEST_REQUIRES_X86_AVX;
960
961 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
962 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
963 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
964 for (uint32_t i = 0; i < kBlockSize; i++) {
965 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
966 }
967 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
968 for (uint32_t i = 0; i < kBlockSize; i++) {
969 ASSERT_TRUE(std::isnan(outputs[i]))
970 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
971 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
972 }
973 }
974 }
975 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
976
977
978 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,negative_zero)979 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_zero) {
980 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
981 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
982 std::fill(inputs.begin(), inputs.end(), -0.0f);
983 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
984 const float reference_output = 0.0f;
985 ASSERT_EQ(reference_output, outputs[0])
986 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
987 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
988 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
989 }
990
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,negative_saturation)991 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_saturation) {
992 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
993 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
994 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
995 for (uint32_t i = 0; i < kBlockSize; i++) {
996 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
997 }
998 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
999 for (uint32_t i = 0; i < kBlockSize; i++) {
1000 const float reference_output = -1.0f;
1001 ASSERT_EQ(reference_output, outputs[i])
1002 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1003 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1004 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1005 }
1006 }
1007 }
1008
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,positive_nan)1009 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, positive_nan) {
1010 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1011 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1012 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1013 for (uint32_t i = 0; i < kBlockSize; i++) {
1014 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1015 }
1016 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1017 for (uint32_t i = 0; i < kBlockSize; i++) {
1018 ASSERT_TRUE(std::isnan(outputs[i]))
1019 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1020 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1021 }
1022 }
1023 }
1024
TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3,negative_nan)1025 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_nan) {
1026 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1027 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1028 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1029 for (uint32_t i = 0; i < kBlockSize; i++) {
1030 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1031 }
1032 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1033 for (uint32_t i = 0; i < kBlockSize; i++) {
1034 ASSERT_TRUE(std::isnan(outputs[i]))
1035 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1036 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1037 }
1038 }
1039 }
1040 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1041
1042
1043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPM1MINUS__SSE2_RR2_P6,negative_zero)1044 TEST(EXPM1MINUS__SSE2_RR2_P6, negative_zero) {
1045 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1046 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1047 std::fill(inputs.begin(), inputs.end(), -0.0f);
1048 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1049 const float reference_output = 0.0f;
1050 ASSERT_EQ(reference_output, outputs[0])
1051 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1052 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1053 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1054 }
1055
TEST(EXPM1MINUS__SSE2_RR2_P6,negative_saturation)1056 TEST(EXPM1MINUS__SSE2_RR2_P6, negative_saturation) {
1057 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1058 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1059 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1060 for (uint32_t i = 0; i < kBlockSize; i++) {
1061 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1062 }
1063 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1064 for (uint32_t i = 0; i < kBlockSize; i++) {
1065 const float reference_output = -1.0f;
1066 ASSERT_EQ(reference_output, outputs[i])
1067 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1068 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1069 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1070 }
1071 }
1072 }
1073
TEST(EXPM1MINUS__SSE2_RR2_P6,positive_nan)1074 TEST(EXPM1MINUS__SSE2_RR2_P6, positive_nan) {
1075 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1076 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1077 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1078 for (uint32_t i = 0; i < kBlockSize; i++) {
1079 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1080 }
1081 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1082 for (uint32_t i = 0; i < kBlockSize; i++) {
1083 ASSERT_TRUE(std::isnan(outputs[i]))
1084 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1085 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1086 }
1087 }
1088 }
1089
TEST(EXPM1MINUS__SSE2_RR2_P6,negative_nan)1090 TEST(EXPM1MINUS__SSE2_RR2_P6, negative_nan) {
1091 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1092 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1093 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1094 for (uint32_t i = 0; i < kBlockSize; i++) {
1095 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1096 }
1097 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1098 for (uint32_t i = 0; i < kBlockSize; i++) {
1099 ASSERT_TRUE(std::isnan(outputs[i]))
1100 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1101 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1102 }
1103 }
1104 }
1105 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1106
1107
1108 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,negative_zero)1109 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_zero) {
1110 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1111 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1112 std::fill(inputs.begin(), inputs.end(), -0.0f);
1113 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1114 const float reference_output = 0.0f;
1115 ASSERT_EQ(reference_output, outputs[0])
1116 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1117 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1118 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1119 }
1120
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,negative_saturation)1121 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_saturation) {
1122 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1123 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1124 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1125 for (uint32_t i = 0; i < kBlockSize; i++) {
1126 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1127 }
1128 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1129 for (uint32_t i = 0; i < kBlockSize; i++) {
1130 const float reference_output = -1.0f;
1131 ASSERT_EQ(reference_output, outputs[i])
1132 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1133 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1134 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1135 }
1136 }
1137 }
1138
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,positive_nan)1139 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, positive_nan) {
1140 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1141 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1142 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1143 for (uint32_t i = 0; i < kBlockSize; i++) {
1144 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1145 }
1146 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1147 for (uint32_t i = 0; i < kBlockSize; i++) {
1148 ASSERT_TRUE(std::isnan(outputs[i]))
1149 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1150 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1151 }
1152 }
1153 }
1154
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT,negative_nan)1155 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_nan) {
1156 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1157 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1158 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1159 for (uint32_t i = 0; i < kBlockSize; i++) {
1160 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1161 }
1162 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1163 for (uint32_t i = 0; i < kBlockSize; i++) {
1164 ASSERT_TRUE(std::isnan(outputs[i]))
1165 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1166 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1167 }
1168 }
1169 }
1170 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1171
1172
1173 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,negative_zero)1174 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_zero) {
1175 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1176 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1177 std::fill(inputs.begin(), inputs.end(), -0.0f);
1178 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1179 const float reference_output = 0.0f;
1180 ASSERT_EQ(reference_output, outputs[0])
1181 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1182 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1183 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1184 }
1185
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,negative_saturation)1186 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_saturation) {
1187 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1188 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1189 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1190 for (uint32_t i = 0; i < kBlockSize; i++) {
1191 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1192 }
1193 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1194 for (uint32_t i = 0; i < kBlockSize; i++) {
1195 const float reference_output = -1.0f;
1196 ASSERT_EQ(reference_output, outputs[i])
1197 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1198 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1199 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1200 }
1201 }
1202 }
1203
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,positive_nan)1204 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, positive_nan) {
1205 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1206 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1207 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1208 for (uint32_t i = 0; i < kBlockSize; i++) {
1209 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1210 }
1211 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1212 for (uint32_t i = 0; i < kBlockSize; i++) {
1213 ASSERT_TRUE(std::isnan(outputs[i]))
1214 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1215 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1216 }
1217 }
1218 }
1219
TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX,negative_nan)1220 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_nan) {
1221 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1222 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1223 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1224 for (uint32_t i = 0; i < kBlockSize; i++) {
1225 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1226 }
1227 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1228 for (uint32_t i = 0; i < kBlockSize; i++) {
1229 ASSERT_TRUE(std::isnan(outputs[i]))
1230 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1231 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1232 }
1233 }
1234 }
1235 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1236
1237
1238 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,negative_zero)1239 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_zero) {
1240 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1241 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1242 std::fill(inputs.begin(), inputs.end(), -0.0f);
1243 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1244 const float reference_output = 0.0f;
1245 ASSERT_EQ(reference_output, outputs[0])
1246 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1247 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1248 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1249 }
1250
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,negative_saturation)1251 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_saturation) {
1252 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1253 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1254 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1255 for (uint32_t i = 0; i < kBlockSize; i++) {
1256 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1257 }
1258 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1259 for (uint32_t i = 0; i < kBlockSize; i++) {
1260 const float reference_output = -1.0f;
1261 ASSERT_EQ(reference_output, outputs[i])
1262 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1263 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1264 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1265 }
1266 }
1267 }
1268
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,positive_nan)1269 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, positive_nan) {
1270 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1271 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1272 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1273 for (uint32_t i = 0; i < kBlockSize; i++) {
1274 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1275 }
1276 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1277 for (uint32_t i = 0; i < kBlockSize; i++) {
1278 ASSERT_TRUE(std::isnan(outputs[i]))
1279 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1280 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1281 }
1282 }
1283 }
1284
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT,negative_nan)1285 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_nan) {
1286 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1287 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1288 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1289 for (uint32_t i = 0; i < kBlockSize; i++) {
1290 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1291 }
1292 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1293 for (uint32_t i = 0; i < kBlockSize; i++) {
1294 ASSERT_TRUE(std::isnan(outputs[i]))
1295 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1296 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1297 }
1298 }
1299 }
1300 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1301
1302
1303 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,negative_zero)1304 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_zero) {
1305 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1306 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1307 std::fill(inputs.begin(), inputs.end(), -0.0f);
1308 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1309 const float reference_output = 0.0f;
1310 ASSERT_EQ(reference_output, outputs[0])
1311 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1312 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1313 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1314 }
1315
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,negative_saturation)1316 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_saturation) {
1317 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1318 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1319 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1320 for (uint32_t i = 0; i < kBlockSize; i++) {
1321 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1322 }
1323 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1324 for (uint32_t i = 0; i < kBlockSize; i++) {
1325 const float reference_output = -1.0f;
1326 ASSERT_EQ(reference_output, outputs[i])
1327 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1328 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1329 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1330 }
1331 }
1332 }
1333
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,positive_nan)1334 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, positive_nan) {
1335 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1336 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1337 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1338 for (uint32_t i = 0; i < kBlockSize; i++) {
1339 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1340 }
1341 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1342 for (uint32_t i = 0; i < kBlockSize; i++) {
1343 ASSERT_TRUE(std::isnan(outputs[i]))
1344 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1345 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1346 }
1347 }
1348 }
1349
TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX,negative_nan)1350 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_nan) {
1351 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1352 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1353 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1354 for (uint32_t i = 0; i < kBlockSize; i++) {
1355 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1356 }
1357 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1358 for (uint32_t i = 0; i < kBlockSize; i++) {
1359 ASSERT_TRUE(std::isnan(outputs[i]))
1360 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1361 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1362 }
1363 }
1364 }
1365 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1366
1367
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,negative_zero)1368 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_zero) {
1369 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1370 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1371 std::fill(inputs.begin(), inputs.end(), -0.0f);
1372 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1373 const float reference_output = 0.0f;
1374 ASSERT_EQ(reference_output, outputs[0])
1375 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1376 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1377 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1378 }
1379
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,negative_saturation)1380 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_saturation) {
1381 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1382 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1383 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1384 for (uint32_t i = 0; i < kBlockSize; i++) {
1385 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1386 }
1387 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1388 for (uint32_t i = 0; i < kBlockSize; i++) {
1389 const float reference_output = -1.0f;
1390 ASSERT_EQ(reference_output, outputs[i])
1391 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1392 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1393 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1394 }
1395 }
1396 }
1397
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,positive_nan)1398 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, positive_nan) {
1399 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1400 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1401 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1402 for (uint32_t i = 0; i < kBlockSize; i++) {
1403 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1404 }
1405 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1406 for (uint32_t i = 0; i < kBlockSize; i++) {
1407 ASSERT_TRUE(std::isnan(outputs[i]))
1408 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1409 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1410 }
1411 }
1412 }
1413
TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4,negative_nan)1414 TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_nan) {
1415 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1416 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1417 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1418 for (uint32_t i = 0; i < kBlockSize; i++) {
1419 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1420 }
1421 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1422 for (uint32_t i = 0; i < kBlockSize; i++) {
1423 ASSERT_TRUE(std::isnan(outputs[i]))
1424 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1425 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1426 }
1427 }
1428 }
1429
1430
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,negative_zero)1431 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_zero) {
1432 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1433 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1434 std::fill(inputs.begin(), inputs.end(), -0.0f);
1435 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1436 const float reference_output = 0.0f;
1437 ASSERT_EQ(reference_output, outputs[0])
1438 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1439 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1440 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1441 }
1442
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,negative_saturation)1443 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_saturation) {
1444 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1445 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1446 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1447 for (uint32_t i = 0; i < kBlockSize; i++) {
1448 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1449 }
1450 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1451 for (uint32_t i = 0; i < kBlockSize; i++) {
1452 const float reference_output = -1.0f;
1453 ASSERT_EQ(reference_output, outputs[i])
1454 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1455 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1456 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1457 }
1458 }
1459 }
1460
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,positive_nan)1461 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, positive_nan) {
1462 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1463 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1464 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1465 for (uint32_t i = 0; i < kBlockSize; i++) {
1466 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1467 }
1468 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1469 for (uint32_t i = 0; i < kBlockSize; i++) {
1470 ASSERT_TRUE(std::isnan(outputs[i]))
1471 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1472 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1473 }
1474 }
1475 }
1476
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3,negative_nan)1477 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_nan) {
1478 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1479 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1480 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1481 for (uint32_t i = 0; i < kBlockSize; i++) {
1482 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1483 }
1484 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1485 for (uint32_t i = 0; i < kBlockSize; i++) {
1486 ASSERT_TRUE(std::isnan(outputs[i]))
1487 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1488 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1489 }
1490 }
1491 }
1492
1493
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,negative_zero)1494 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_zero) {
1495 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1496 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1497 std::fill(inputs.begin(), inputs.end(), -0.0f);
1498 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1499 const float reference_output = 0.0f;
1500 ASSERT_EQ(reference_output, outputs[0])
1501 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1502 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1503 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1504 }
1505
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,negative_saturation)1506 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_saturation) {
1507 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1508 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1509 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1510 for (uint32_t i = 0; i < kBlockSize; i++) {
1511 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1512 }
1513 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1514 for (uint32_t i = 0; i < kBlockSize; i++) {
1515 const float reference_output = -1.0f;
1516 ASSERT_EQ(reference_output, outputs[i])
1517 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1518 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1519 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1520 }
1521 }
1522 }
1523
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,positive_nan)1524 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, positive_nan) {
1525 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1526 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1527 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1528 for (uint32_t i = 0; i < kBlockSize; i++) {
1529 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1530 }
1531 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1532 for (uint32_t i = 0; i < kBlockSize; i++) {
1533 ASSERT_TRUE(std::isnan(outputs[i]))
1534 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1535 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1536 }
1537 }
1538 }
1539
TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4,negative_nan)1540 TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_nan) {
1541 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1542 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1543 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1544 for (uint32_t i = 0; i < kBlockSize; i++) {
1545 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1546 }
1547 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1548 for (uint32_t i = 0; i < kBlockSize; i++) {
1549 ASSERT_TRUE(std::isnan(outputs[i]))
1550 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1551 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1552 }
1553 }
1554 }
1555
1556
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,negative_zero)1557 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_zero) {
1558 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1559 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1560 std::fill(inputs.begin(), inputs.end(), -0.0f);
1561 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1562 const float reference_output = 0.0f;
1563 ASSERT_EQ(reference_output, outputs[0])
1564 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1565 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1566 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1567 }
1568
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,negative_saturation)1569 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_saturation) {
1570 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1571 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1572 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1573 for (uint32_t i = 0; i < kBlockSize; i++) {
1574 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1575 }
1576 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1577 for (uint32_t i = 0; i < kBlockSize; i++) {
1578 const float reference_output = -1.0f;
1579 ASSERT_EQ(reference_output, outputs[i])
1580 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1581 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1582 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1583 }
1584 }
1585 }
1586
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,positive_nan)1587 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, positive_nan) {
1588 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1589 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1590 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1591 for (uint32_t i = 0; i < kBlockSize; i++) {
1592 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1593 }
1594 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1595 for (uint32_t i = 0; i < kBlockSize; i++) {
1596 ASSERT_TRUE(std::isnan(outputs[i]))
1597 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1598 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1599 }
1600 }
1601 }
1602
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3,negative_nan)1603 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_nan) {
1604 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1605 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1606 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1607 for (uint32_t i = 0; i < kBlockSize; i++) {
1608 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1609 }
1610 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1611 for (uint32_t i = 0; i < kBlockSize; i++) {
1612 ASSERT_TRUE(std::isnan(outputs[i]))
1613 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1614 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1615 }
1616 }
1617 }
1618
1619
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,negative_zero)1620 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_zero) {
1621 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1622 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1623 std::fill(inputs.begin(), inputs.end(), -0.0f);
1624 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1625 const float reference_output = 0.0f;
1626 ASSERT_EQ(reference_output, outputs[0])
1627 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1628 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1629 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1630 }
1631
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,negative_saturation)1632 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_saturation) {
1633 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1634 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1635 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1636 for (uint32_t i = 0; i < kBlockSize; i++) {
1637 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1638 }
1639 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1640 for (uint32_t i = 0; i < kBlockSize; i++) {
1641 const float reference_output = -1.0f;
1642 ASSERT_EQ(reference_output, outputs[i])
1643 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1644 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1645 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1646 }
1647 }
1648 }
1649
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,positive_nan)1650 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, positive_nan) {
1651 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1652 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1653 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1654 for (uint32_t i = 0; i < kBlockSize; i++) {
1655 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1656 }
1657 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1658 for (uint32_t i = 0; i < kBlockSize; i++) {
1659 ASSERT_TRUE(std::isnan(outputs[i]))
1660 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1661 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1662 }
1663 }
1664 }
1665
TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4,negative_nan)1666 TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_nan) {
1667 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1668 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1669 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1670 for (uint32_t i = 0; i < kBlockSize; i++) {
1671 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1672 }
1673 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1674 for (uint32_t i = 0; i < kBlockSize; i++) {
1675 ASSERT_TRUE(std::isnan(outputs[i]))
1676 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1677 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1678 }
1679 }
1680 }
1681
1682
TEST(EXPM1MINUS__SCALAR_RR2_P5,negative_zero)1683 TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_zero) {
1684 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1685 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1686 std::fill(inputs.begin(), inputs.end(), -0.0f);
1687 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1688 const float reference_output = 0.0f;
1689 ASSERT_EQ(reference_output, outputs[0])
1690 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1691 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1692 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1693 }
1694
TEST(EXPM1MINUS__SCALAR_RR2_P5,negative_saturation)1695 TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_saturation) {
1696 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1697 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1698 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1699 for (uint32_t i = 0; i < kBlockSize; i++) {
1700 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1701 }
1702 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1703 for (uint32_t i = 0; i < kBlockSize; i++) {
1704 const float reference_output = -1.0f;
1705 ASSERT_EQ(reference_output, outputs[i])
1706 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1707 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1708 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1709 }
1710 }
1711 }
1712
TEST(EXPM1MINUS__SCALAR_RR2_P5,positive_nan)1713 TEST(EXPM1MINUS__SCALAR_RR2_P5, positive_nan) {
1714 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1715 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1716 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1717 for (uint32_t i = 0; i < kBlockSize; i++) {
1718 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1719 }
1720 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1721 for (uint32_t i = 0; i < kBlockSize; i++) {
1722 ASSERT_TRUE(std::isnan(outputs[i]))
1723 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1724 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1725 }
1726 }
1727 }
1728
TEST(EXPM1MINUS__SCALAR_RR2_P5,negative_nan)1729 TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_nan) {
1730 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1731 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1732 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1733 for (uint32_t i = 0; i < kBlockSize; i++) {
1734 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1735 }
1736 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1737 for (uint32_t i = 0; i < kBlockSize; i++) {
1738 ASSERT_TRUE(std::isnan(outputs[i]))
1739 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1740 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1741 }
1742 }
1743 }
1744
1745
TEST(EXPM1MINUS__SCALAR_RR2_P6,negative_zero)1746 TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_zero) {
1747 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1748 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1749 std::fill(inputs.begin(), inputs.end(), -0.0f);
1750 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1751 const float reference_output = 0.0f;
1752 ASSERT_EQ(reference_output, outputs[0])
1753 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1754 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1755 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1756 }
1757
TEST(EXPM1MINUS__SCALAR_RR2_P6,negative_saturation)1758 TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_saturation) {
1759 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1760 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1761 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1762 for (uint32_t i = 0; i < kBlockSize; i++) {
1763 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
1764 }
1765 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1766 for (uint32_t i = 0; i < kBlockSize; i++) {
1767 const float reference_output = -1.0f;
1768 ASSERT_EQ(reference_output, outputs[i])
1769 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1770 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
1771 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1772 }
1773 }
1774 }
1775
TEST(EXPM1MINUS__SCALAR_RR2_P6,positive_nan)1776 TEST(EXPM1MINUS__SCALAR_RR2_P6, positive_nan) {
1777 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1778 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1779 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1780 for (uint32_t i = 0; i < kBlockSize; i++) {
1781 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
1782 }
1783 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1784 for (uint32_t i = 0; i < kBlockSize; i++) {
1785 ASSERT_TRUE(std::isnan(outputs[i]))
1786 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1787 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1788 }
1789 }
1790 }
1791
TEST(EXPM1MINUS__SCALAR_RR2_P6,negative_nan)1792 TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_nan) {
1793 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1794 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1795 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1796 for (uint32_t i = 0; i < kBlockSize; i++) {
1797 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1798 }
1799 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1800 for (uint32_t i = 0; i < kBlockSize; i++) {
1801 ASSERT_TRUE(std::isnan(outputs[i]))
1802 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1803 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1804 }
1805 }
1806 }
1807