xref: /aosp_15_r20/external/XNNPACK/eval/f32-roundne.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math.h>
22 #include <xnnpack/math-stubs.h>
23 
24 
25 constexpr int kBlockSize = 1024;
26 
27 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE_ADDSUB,positive_normal)28   TEST(ROUNDNE__SSE_ADDSUB, positive_normal) {
29     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
30     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
31     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
32       for (uint32_t i = 0; i < kBlockSize; i++) {
33         inputs[i] = uint32_as_float(n + i);
34       }
35       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
36       for (uint32_t i = 0; i < kBlockSize; i++) {
37         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
38         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
39           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
40           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
41           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
42       }
43     }
44   }
45 
TEST(ROUNDNE__SSE_ADDSUB,negative_normal)46   TEST(ROUNDNE__SSE_ADDSUB, negative_normal) {
47     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
50       for (uint32_t i = 0; i < kBlockSize; i++) {
51         inputs[i] = uint32_as_float(n + i);
52       }
53       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
54       for (uint32_t i = 0; i < kBlockSize; i++) {
55         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
56         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
57           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
58           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
59           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
60       }
61     }
62   }
63 
TEST(ROUNDNE__SSE_ADDSUB,positive_integral)64   TEST(ROUNDNE__SSE_ADDSUB, positive_integral) {
65     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
66     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
67     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
68       for (uint32_t i = 0; i < kBlockSize; i++) {
69         inputs[i] = uint32_as_float(n + i);
70       }
71       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
72       for (uint32_t i = 0; i < kBlockSize; i++) {
73         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
74         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
75           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
76           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
77           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
78       }
79     }
80   }
81 
TEST(ROUNDNE__SSE_ADDSUB,negative_integral)82   TEST(ROUNDNE__SSE_ADDSUB, negative_integral) {
83     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
84     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
85     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
86       for (uint32_t i = 0; i < kBlockSize; i++) {
87         inputs[i] = uint32_as_float(n + i);
88       }
89       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
90       for (uint32_t i = 0; i < kBlockSize; i++) {
91         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
92         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
93           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
94           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
95           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
96       }
97     }
98   }
99 
TEST(ROUNDNE__SSE_ADDSUB,positive_infinity)100   TEST(ROUNDNE__SSE_ADDSUB, positive_infinity) {
101     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
102     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
103     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
104     xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
105     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
106     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
107       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
108       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
109       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
110   }
111 
TEST(ROUNDNE__SSE_ADDSUB,negative_infinity)112   TEST(ROUNDNE__SSE_ADDSUB, negative_infinity) {
113     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
114     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
115     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
116     xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
117     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
118     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
119       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
120       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
121       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
122   }
123 
TEST(ROUNDNE__SSE_ADDSUB,positive_qnan)124   TEST(ROUNDNE__SSE_ADDSUB, positive_qnan) {
125     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
126     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
127     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
128       for (uint32_t i = 0; i < kBlockSize; i++) {
129         inputs[i] = uint32_as_float(n + i);
130       }
131       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
132       for (uint32_t i = 0; i < kBlockSize; i++) {
133         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
134         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
135           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
136           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
137           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
138       }
139     }
140   }
141 
TEST(ROUNDNE__SSE_ADDSUB,negative_qnan)142   TEST(ROUNDNE__SSE_ADDSUB, negative_qnan) {
143     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
144     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
145     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
146       for (uint32_t i = 0; i < kBlockSize; i++) {
147         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
148       }
149       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
150       for (uint32_t i = 0; i < kBlockSize; i++) {
151         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
152         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
153           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
154           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
155           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
156       }
157     }
158   }
159 
TEST(ROUNDNE__SSE_ADDSUB,positive_snan)160   TEST(ROUNDNE__SSE_ADDSUB, positive_snan) {
161     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
162     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
163     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
164       for (uint32_t i = 0; i < kBlockSize; i++) {
165         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
166       }
167       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
168       for (uint32_t i = 0; i < kBlockSize; i++) {
169         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
170         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
171           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
172           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
173           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
174       }
175     }
176   }
177 
TEST(ROUNDNE__SSE_ADDSUB,negative_snan)178   TEST(ROUNDNE__SSE_ADDSUB, negative_snan) {
179     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
180     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
181     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
182       for (uint32_t i = 0; i < kBlockSize; i++) {
183         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
184       }
185       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
186       for (uint32_t i = 0; i < kBlockSize; i++) {
187         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
188         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
189           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
190           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
191           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
192       }
193     }
194   }
195 
TEST(ROUNDNE__SSE_ADDSUB,positive_snan_to_qnan)196   TEST(ROUNDNE__SSE_ADDSUB, positive_snan_to_qnan) {
197     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
198     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
199     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
200       for (uint32_t i = 0; i < kBlockSize; i++) {
201         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
202       }
203       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
204       for (uint32_t i = 0; i < kBlockSize; i++) {
205         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
206         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
207           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
208           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
209           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
210       }
211     }
212   }
213 
TEST(ROUNDNE__SSE_ADDSUB,negative_snan_to_qnan)214   TEST(ROUNDNE__SSE_ADDSUB, negative_snan_to_qnan) {
215     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
216     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
217     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
218       for (uint32_t i = 0; i < kBlockSize; i++) {
219         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
220       }
221       xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
222       for (uint32_t i = 0; i < kBlockSize; i++) {
223         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
224         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
225           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
226           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
227           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
228       }
229     }
230   }
231 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
232 
233 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE2_CVT,positive_normal)234   TEST(ROUNDNE__SSE2_CVT, positive_normal) {
235     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
236     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
237     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
238       for (uint32_t i = 0; i < kBlockSize; i++) {
239         inputs[i] = uint32_as_float(n + i);
240       }
241       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
242       for (uint32_t i = 0; i < kBlockSize; i++) {
243         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
244         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
245           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
246           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
247           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
248       }
249     }
250   }
251 
TEST(ROUNDNE__SSE2_CVT,negative_normal)252   TEST(ROUNDNE__SSE2_CVT, negative_normal) {
253     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
254     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
255     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
256       for (uint32_t i = 0; i < kBlockSize; i++) {
257         inputs[i] = uint32_as_float(n + i);
258       }
259       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
260       for (uint32_t i = 0; i < kBlockSize; i++) {
261         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
262         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
263           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
264           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
265           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
266       }
267     }
268   }
269 
TEST(ROUNDNE__SSE2_CVT,positive_integral)270   TEST(ROUNDNE__SSE2_CVT, positive_integral) {
271     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
272     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
273     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
274       for (uint32_t i = 0; i < kBlockSize; i++) {
275         inputs[i] = uint32_as_float(n + i);
276       }
277       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
278       for (uint32_t i = 0; i < kBlockSize; i++) {
279         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
280         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
281           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
282           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
283           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
284       }
285     }
286   }
287 
TEST(ROUNDNE__SSE2_CVT,negative_integral)288   TEST(ROUNDNE__SSE2_CVT, negative_integral) {
289     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
290     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
291     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
292       for (uint32_t i = 0; i < kBlockSize; i++) {
293         inputs[i] = uint32_as_float(n + i);
294       }
295       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
296       for (uint32_t i = 0; i < kBlockSize; i++) {
297         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
298         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
299           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
300           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
301           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
302       }
303     }
304   }
305 
TEST(ROUNDNE__SSE2_CVT,positive_infinity)306   TEST(ROUNDNE__SSE2_CVT, positive_infinity) {
307     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
310     xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
312     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
313       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
314       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
315       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
316   }
317 
TEST(ROUNDNE__SSE2_CVT,negative_infinity)318   TEST(ROUNDNE__SSE2_CVT, negative_infinity) {
319     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
320     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
321     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
322     xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
323     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
324     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
325       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
326       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
327       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
328   }
329 
TEST(ROUNDNE__SSE2_CVT,positive_qnan)330   TEST(ROUNDNE__SSE2_CVT, positive_qnan) {
331     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
332     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
333     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
334       for (uint32_t i = 0; i < kBlockSize; i++) {
335         inputs[i] = uint32_as_float(n + i);
336       }
337       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
338       for (uint32_t i = 0; i < kBlockSize; i++) {
339         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
340         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
341           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
342           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
343           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
344       }
345     }
346   }
347 
TEST(ROUNDNE__SSE2_CVT,negative_qnan)348   TEST(ROUNDNE__SSE2_CVT, negative_qnan) {
349     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
350     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
351     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
352       for (uint32_t i = 0; i < kBlockSize; i++) {
353         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
354       }
355       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
356       for (uint32_t i = 0; i < kBlockSize; i++) {
357         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
358         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
359           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
360           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
361           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
362       }
363     }
364   }
365 
TEST(ROUNDNE__SSE2_CVT,positive_snan)366   TEST(ROUNDNE__SSE2_CVT, positive_snan) {
367     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
368     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
369     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
370       for (uint32_t i = 0; i < kBlockSize; i++) {
371         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
372       }
373       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
374       for (uint32_t i = 0; i < kBlockSize; i++) {
375         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
376         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
377           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
378           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
379           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
380       }
381     }
382   }
383 
TEST(ROUNDNE__SSE2_CVT,negative_snan)384   TEST(ROUNDNE__SSE2_CVT, negative_snan) {
385     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
386     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
387     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
388       for (uint32_t i = 0; i < kBlockSize; i++) {
389         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
390       }
391       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
392       for (uint32_t i = 0; i < kBlockSize; i++) {
393         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
394         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
395           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
396           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
397           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
398       }
399     }
400   }
401 
TEST(ROUNDNE__SSE2_CVT,DISABLED_positive_snan_to_qnan)402   TEST(ROUNDNE__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
403     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
404     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
405     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
406       for (uint32_t i = 0; i < kBlockSize; i++) {
407         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
408       }
409       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
410       for (uint32_t i = 0; i < kBlockSize; i++) {
411         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
412         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
413           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
414           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
415           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
416       }
417     }
418   }
419 
TEST(ROUNDNE__SSE2_CVT,DISABLED_negative_snan_to_qnan)420   TEST(ROUNDNE__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
421     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
422     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
423     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
424       for (uint32_t i = 0; i < kBlockSize; i++) {
425         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
426       }
427       xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
428       for (uint32_t i = 0; i < kBlockSize; i++) {
429         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
430         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
431           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
432           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
433           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
434       }
435     }
436   }
437 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
438 
439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE41,positive_normal)440   TEST(ROUNDNE__SSE41, positive_normal) {
441     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
442     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
443     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
444       for (uint32_t i = 0; i < kBlockSize; i++) {
445         inputs[i] = uint32_as_float(n + i);
446       }
447       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
448       for (uint32_t i = 0; i < kBlockSize; i++) {
449         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
450         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
451           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
452           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
453           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
454       }
455     }
456   }
457 
TEST(ROUNDNE__SSE41,negative_normal)458   TEST(ROUNDNE__SSE41, negative_normal) {
459     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
460     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
461     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
462       for (uint32_t i = 0; i < kBlockSize; i++) {
463         inputs[i] = uint32_as_float(n + i);
464       }
465       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
466       for (uint32_t i = 0; i < kBlockSize; i++) {
467         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
468         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
469           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
470           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
471           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
472       }
473     }
474   }
475 
TEST(ROUNDNE__SSE41,positive_integral)476   TEST(ROUNDNE__SSE41, positive_integral) {
477     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
478     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
479     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
480       for (uint32_t i = 0; i < kBlockSize; i++) {
481         inputs[i] = uint32_as_float(n + i);
482       }
483       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
484       for (uint32_t i = 0; i < kBlockSize; i++) {
485         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
486         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
487           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
488           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
489           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
490       }
491     }
492   }
493 
TEST(ROUNDNE__SSE41,negative_integral)494   TEST(ROUNDNE__SSE41, negative_integral) {
495     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
496     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
497     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
498       for (uint32_t i = 0; i < kBlockSize; i++) {
499         inputs[i] = uint32_as_float(n + i);
500       }
501       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
502       for (uint32_t i = 0; i < kBlockSize; i++) {
503         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
504         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
505           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
506           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
507           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
508       }
509     }
510   }
511 
TEST(ROUNDNE__SSE41,positive_infinity)512   TEST(ROUNDNE__SSE41, positive_infinity) {
513     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
514     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
515     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
516     xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
517     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
518     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
519       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
520       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
521       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
522   }
523 
TEST(ROUNDNE__SSE41,negative_infinity)524   TEST(ROUNDNE__SSE41, negative_infinity) {
525     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
526     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
527     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
528     xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
529     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
530     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
531       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
532       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
533       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
534   }
535 
TEST(ROUNDNE__SSE41,positive_qnan)536   TEST(ROUNDNE__SSE41, positive_qnan) {
537     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
538     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
539     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
540       for (uint32_t i = 0; i < kBlockSize; i++) {
541         inputs[i] = uint32_as_float(n + i);
542       }
543       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
544       for (uint32_t i = 0; i < kBlockSize; i++) {
545         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
546         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
547           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
548           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
549           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
550       }
551     }
552   }
553 
TEST(ROUNDNE__SSE41,negative_qnan)554   TEST(ROUNDNE__SSE41, negative_qnan) {
555     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
556     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
557     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
558       for (uint32_t i = 0; i < kBlockSize; i++) {
559         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
560       }
561       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
562       for (uint32_t i = 0; i < kBlockSize; i++) {
563         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
564         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
565           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
566           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
567           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
568       }
569     }
570   }
571 
TEST(ROUNDNE__SSE41,positive_snan)572   TEST(ROUNDNE__SSE41, positive_snan) {
573     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
574     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
575     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
576       for (uint32_t i = 0; i < kBlockSize; i++) {
577         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
578       }
579       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
580       for (uint32_t i = 0; i < kBlockSize; i++) {
581         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
582         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
583           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
584           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
585           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
586       }
587     }
588   }
589 
TEST(ROUNDNE__SSE41,negative_snan)590   TEST(ROUNDNE__SSE41, negative_snan) {
591     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
592     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
593     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
594       for (uint32_t i = 0; i < kBlockSize; i++) {
595         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
596       }
597       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
598       for (uint32_t i = 0; i < kBlockSize; i++) {
599         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
600         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
601           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
602           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
603           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
604       }
605     }
606   }
607 
TEST(ROUNDNE__SSE41,positive_snan_to_qnan)608   TEST(ROUNDNE__SSE41, positive_snan_to_qnan) {
609     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
610     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
611     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
612       for (uint32_t i = 0; i < kBlockSize; i++) {
613         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
614       }
615       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
616       for (uint32_t i = 0; i < kBlockSize; i++) {
617         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
618         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
619           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
620           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
621           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
622       }
623     }
624   }
625 
TEST(ROUNDNE__SSE41,negative_snan_to_qnan)626   TEST(ROUNDNE__SSE41, negative_snan_to_qnan) {
627     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
628     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
629     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
630       for (uint32_t i = 0; i < kBlockSize; i++) {
631         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
632       }
633       xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
634       for (uint32_t i = 0; i < kBlockSize; i++) {
635         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
636         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
637           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
638           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
639           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
640       }
641     }
642   }
643 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
644 
645 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDNE__NEON_ADDSUB,positive_normal)646   TEST(ROUNDNE__NEON_ADDSUB, positive_normal) {
647     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
648     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
649     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
650       for (uint32_t i = 0; i < kBlockSize; i++) {
651         inputs[i] = uint32_as_float(n + i);
652       }
653       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
654       for (uint32_t i = 0; i < kBlockSize; i++) {
655         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
656         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
657           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
658           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
659           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
660       }
661     }
662   }
663 
TEST(ROUNDNE__NEON_ADDSUB,negative_normal)664   TEST(ROUNDNE__NEON_ADDSUB, negative_normal) {
665     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
666     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
667     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
668       for (uint32_t i = 0; i < kBlockSize; i++) {
669         inputs[i] = uint32_as_float(n + i);
670       }
671       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
672       for (uint32_t i = 0; i < kBlockSize; i++) {
673         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
674         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
675           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
676           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
677           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
678       }
679     }
680   }
681 
TEST(ROUNDNE__NEON_ADDSUB,positive_integral)682   TEST(ROUNDNE__NEON_ADDSUB, positive_integral) {
683     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
684     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
685     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
686       for (uint32_t i = 0; i < kBlockSize; i++) {
687         inputs[i] = uint32_as_float(n + i);
688       }
689       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
690       for (uint32_t i = 0; i < kBlockSize; i++) {
691         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
692         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
693           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
694           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
695           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
696       }
697     }
698   }
699 
TEST(ROUNDNE__NEON_ADDSUB,negative_integral)700   TEST(ROUNDNE__NEON_ADDSUB, negative_integral) {
701     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
702     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
703     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
704       for (uint32_t i = 0; i < kBlockSize; i++) {
705         inputs[i] = uint32_as_float(n + i);
706       }
707       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
708       for (uint32_t i = 0; i < kBlockSize; i++) {
709         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
710         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
711           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
712           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
713           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
714       }
715     }
716   }
717 
TEST(ROUNDNE__NEON_ADDSUB,positive_infinity)718   TEST(ROUNDNE__NEON_ADDSUB, positive_infinity) {
719     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
720     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
721     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
722     xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
723     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
724     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
725       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
726       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
727       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
728   }
729 
TEST(ROUNDNE__NEON_ADDSUB,negative_infinity)730   TEST(ROUNDNE__NEON_ADDSUB, negative_infinity) {
731     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
732     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
733     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
734     xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
735     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
736     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
737       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
738       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
739       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
740   }
741 
TEST(ROUNDNE__NEON_ADDSUB,positive_qnan)742   TEST(ROUNDNE__NEON_ADDSUB, positive_qnan) {
743     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
744     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
745     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
746       for (uint32_t i = 0; i < kBlockSize; i++) {
747         inputs[i] = uint32_as_float(n + i);
748       }
749       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
750       for (uint32_t i = 0; i < kBlockSize; i++) {
751         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
752         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
753           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
754           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
755           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
756       }
757     }
758   }
759 
TEST(ROUNDNE__NEON_ADDSUB,negative_qnan)760   TEST(ROUNDNE__NEON_ADDSUB, negative_qnan) {
761     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
762     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
763     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
764       for (uint32_t i = 0; i < kBlockSize; i++) {
765         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
766       }
767       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
768       for (uint32_t i = 0; i < kBlockSize; i++) {
769         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
770         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
771           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
772           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
773           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
774       }
775     }
776   }
777 
TEST(ROUNDNE__NEON_ADDSUB,positive_snan)778   TEST(ROUNDNE__NEON_ADDSUB, positive_snan) {
779     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
780     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
781     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
782       for (uint32_t i = 0; i < kBlockSize; i++) {
783         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
784       }
785       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
786       for (uint32_t i = 0; i < kBlockSize; i++) {
787         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
788         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
789           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
790           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
791           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
792       }
793     }
794   }
795 
TEST(ROUNDNE__NEON_ADDSUB,negative_snan)796   TEST(ROUNDNE__NEON_ADDSUB, negative_snan) {
797     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
798     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
799     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
800       for (uint32_t i = 0; i < kBlockSize; i++) {
801         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
802       }
803       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
804       for (uint32_t i = 0; i < kBlockSize; i++) {
805         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
806         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
807           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
808           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
809           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
810       }
811     }
812   }
813 
TEST(ROUNDNE__NEON_ADDSUB,positive_snan_to_qnan)814   TEST(ROUNDNE__NEON_ADDSUB, positive_snan_to_qnan) {
815     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
816     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
817     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
818       for (uint32_t i = 0; i < kBlockSize; i++) {
819         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
820       }
821       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
822       for (uint32_t i = 0; i < kBlockSize; i++) {
823         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
824         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
825           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
826           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
827           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
828       }
829     }
830   }
831 
TEST(ROUNDNE__NEON_ADDSUB,negative_snan_to_qnan)832   TEST(ROUNDNE__NEON_ADDSUB, negative_snan_to_qnan) {
833     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
834     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
835     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
836       for (uint32_t i = 0; i < kBlockSize; i++) {
837         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
838       }
839       xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
840       for (uint32_t i = 0; i < kBlockSize; i++) {
841         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
842         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
843           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
844           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
845           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
846       }
847     }
848   }
849 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
850 
851 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDNE__NEONV8,positive_normal)852   TEST(ROUNDNE__NEONV8, positive_normal) {
853     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
854     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
855     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
856       for (uint32_t i = 0; i < kBlockSize; i++) {
857         inputs[i] = uint32_as_float(n + i);
858       }
859       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
860       for (uint32_t i = 0; i < kBlockSize; i++) {
861         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
862         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
863           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
864           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
865           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
866       }
867     }
868   }
869 
TEST(ROUNDNE__NEONV8,negative_normal)870   TEST(ROUNDNE__NEONV8, negative_normal) {
871     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
872     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
873     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
874       for (uint32_t i = 0; i < kBlockSize; i++) {
875         inputs[i] = uint32_as_float(n + i);
876       }
877       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
878       for (uint32_t i = 0; i < kBlockSize; i++) {
879         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
880         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
881           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
882           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
883           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
884       }
885     }
886   }
887 
TEST(ROUNDNE__NEONV8,positive_integral)888   TEST(ROUNDNE__NEONV8, positive_integral) {
889     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
890     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
891     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
892       for (uint32_t i = 0; i < kBlockSize; i++) {
893         inputs[i] = uint32_as_float(n + i);
894       }
895       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
896       for (uint32_t i = 0; i < kBlockSize; i++) {
897         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
898         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
899           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
900           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
901           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
902       }
903     }
904   }
905 
TEST(ROUNDNE__NEONV8,negative_integral)906   TEST(ROUNDNE__NEONV8, negative_integral) {
907     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
908     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
909     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
910       for (uint32_t i = 0; i < kBlockSize; i++) {
911         inputs[i] = uint32_as_float(n + i);
912       }
913       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
914       for (uint32_t i = 0; i < kBlockSize; i++) {
915         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
916         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
917           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
918           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
919           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
920       }
921     }
922   }
923 
TEST(ROUNDNE__NEONV8,positive_infinity)924   TEST(ROUNDNE__NEONV8, positive_infinity) {
925     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
926     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
927     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
928     xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
929     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
930     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
931       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
932       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
933       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
934   }
935 
TEST(ROUNDNE__NEONV8,negative_infinity)936   TEST(ROUNDNE__NEONV8, negative_infinity) {
937     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
938     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
939     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
940     xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
941     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
942     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
943       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
944       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
945       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
946   }
947 
TEST(ROUNDNE__NEONV8,positive_qnan)948   TEST(ROUNDNE__NEONV8, positive_qnan) {
949     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
950     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
951     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
952       for (uint32_t i = 0; i < kBlockSize; i++) {
953         inputs[i] = uint32_as_float(n + i);
954       }
955       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
956       for (uint32_t i = 0; i < kBlockSize; i++) {
957         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
958         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
959           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
960           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
961           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
962       }
963     }
964   }
965 
TEST(ROUNDNE__NEONV8,negative_qnan)966   TEST(ROUNDNE__NEONV8, negative_qnan) {
967     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
968     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
969     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
970       for (uint32_t i = 0; i < kBlockSize; i++) {
971         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
972       }
973       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
974       for (uint32_t i = 0; i < kBlockSize; i++) {
975         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
976         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
977           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
978           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
979           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
980       }
981     }
982   }
983 
TEST(ROUNDNE__NEONV8,positive_snan)984   TEST(ROUNDNE__NEONV8, positive_snan) {
985     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
986     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
987     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
988       for (uint32_t i = 0; i < kBlockSize; i++) {
989         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
990       }
991       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
992       for (uint32_t i = 0; i < kBlockSize; i++) {
993         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
994         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
995           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
996           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
997           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
998       }
999     }
1000   }
1001 
TEST(ROUNDNE__NEONV8,negative_snan)1002   TEST(ROUNDNE__NEONV8, negative_snan) {
1003     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1004     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1005     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1006       for (uint32_t i = 0; i < kBlockSize; i++) {
1007         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1008       }
1009       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1010       for (uint32_t i = 0; i < kBlockSize; i++) {
1011         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1012         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1013           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1014           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1015           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1016       }
1017     }
1018   }
1019 
TEST(ROUNDNE__NEONV8,positive_snan_to_qnan)1020   TEST(ROUNDNE__NEONV8, positive_snan_to_qnan) {
1021     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1022     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1023     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1024       for (uint32_t i = 0; i < kBlockSize; i++) {
1025         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1026       }
1027       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1028       for (uint32_t i = 0; i < kBlockSize; i++) {
1029         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1030         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1031           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1032           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1033           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1034       }
1035     }
1036   }
1037 
TEST(ROUNDNE__NEONV8,negative_snan_to_qnan)1038   TEST(ROUNDNE__NEONV8, negative_snan_to_qnan) {
1039     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1040     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1041     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1042       for (uint32_t i = 0; i < kBlockSize; i++) {
1043         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1044       }
1045       xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1046       for (uint32_t i = 0; i < kBlockSize; i++) {
1047         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1048         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1049           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1050           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1051           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1052       }
1053     }
1054   }
1055 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1056 
1057 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_normal)1058   TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_normal) {
1059     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1060     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1061     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1062       for (uint32_t i = 0; i < kBlockSize; i++) {
1063         inputs[i] = uint32_as_float(n + i);
1064       }
1065       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1066       for (uint32_t i = 0; i < kBlockSize; i++) {
1067         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1068         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1069           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1070           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1071           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1072       }
1073     }
1074   }
1075 
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_normal)1076   TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_normal) {
1077     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1078     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1079     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1080       for (uint32_t i = 0; i < kBlockSize; i++) {
1081         inputs[i] = uint32_as_float(n + i);
1082       }
1083       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1084       for (uint32_t i = 0; i < kBlockSize; i++) {
1085         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1086         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1087           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1088           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1089           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1090       }
1091     }
1092   }
1093 
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_integral)1094   TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_integral) {
1095     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1096     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1097     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1098       for (uint32_t i = 0; i < kBlockSize; i++) {
1099         inputs[i] = uint32_as_float(n + i);
1100       }
1101       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1102       for (uint32_t i = 0; i < kBlockSize; i++) {
1103         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1104         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1105           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1106           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1107           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1108       }
1109     }
1110   }
1111 
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_integral)1112   TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_integral) {
1113     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1114     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1115     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1116       for (uint32_t i = 0; i < kBlockSize; i++) {
1117         inputs[i] = uint32_as_float(n + i);
1118       }
1119       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1120       for (uint32_t i = 0; i < kBlockSize; i++) {
1121         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1122         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1123           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1124           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1125           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1126       }
1127     }
1128   }
1129 
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_infinity)1130   TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_infinity) {
1131     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1132     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1133     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1134     xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1135     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1136     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1137       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1138       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1139       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1140   }
1141 
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_infinity)1142   TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_infinity) {
1143     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1144     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1145     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1146     xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1147     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1148     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1149       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1150       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1151       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1152   }
1153 
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_qnan)1154   TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_qnan) {
1155     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1156     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1157     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1158       for (uint32_t i = 0; i < kBlockSize; i++) {
1159         inputs[i] = uint32_as_float(n + i);
1160       }
1161       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1162       for (uint32_t i = 0; i < kBlockSize; i++) {
1163         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1164         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1165           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1166           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1167           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1168       }
1169     }
1170   }
1171 
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_qnan)1172   TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_qnan) {
1173     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1174     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1175     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1176       for (uint32_t i = 0; i < kBlockSize; i++) {
1177         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1178       }
1179       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1180       for (uint32_t i = 0; i < kBlockSize; i++) {
1181         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1182         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1183           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1184           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1185           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1186       }
1187     }
1188   }
1189 
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_snan)1190   TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan) {
1191     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1192     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1193     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1194       for (uint32_t i = 0; i < kBlockSize; i++) {
1195         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1196       }
1197       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1198       for (uint32_t i = 0; i < kBlockSize; i++) {
1199         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1200         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1201           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1202           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1203           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1204       }
1205     }
1206   }
1207 
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_snan)1208   TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan) {
1209     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1210     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1211     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1212       for (uint32_t i = 0; i < kBlockSize; i++) {
1213         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1214       }
1215       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1216       for (uint32_t i = 0; i < kBlockSize; i++) {
1217         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1218         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1219           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1220           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1221           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1222       }
1223     }
1224   }
1225 
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_snan_to_qnan)1226   TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1227     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1228     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1229     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1230       for (uint32_t i = 0; i < kBlockSize; i++) {
1231         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1232       }
1233       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1234       for (uint32_t i = 0; i < kBlockSize; i++) {
1235         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1236         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1237           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1238           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1239           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1240       }
1241     }
1242   }
1243 
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_snan_to_qnan)1244   TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1245     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1246     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1247     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1248       for (uint32_t i = 0; i < kBlockSize; i++) {
1249         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1250       }
1251       xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1252       for (uint32_t i = 0; i < kBlockSize; i++) {
1253         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1254         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1255           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1256           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1257           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1258       }
1259     }
1260   }
1261 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1262 
1263 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_normal)1264   TEST(ROUNDNE__WASMSIMD_NATIVE, positive_normal) {
1265     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1266     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1267     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1268       for (uint32_t i = 0; i < kBlockSize; i++) {
1269         inputs[i] = uint32_as_float(n + i);
1270       }
1271       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1272       for (uint32_t i = 0; i < kBlockSize; i++) {
1273         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1274         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1275           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1276           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1277           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1278       }
1279     }
1280   }
1281 
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_normal)1282   TEST(ROUNDNE__WASMSIMD_NATIVE, negative_normal) {
1283     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1284     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1285     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1286       for (uint32_t i = 0; i < kBlockSize; i++) {
1287         inputs[i] = uint32_as_float(n + i);
1288       }
1289       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1290       for (uint32_t i = 0; i < kBlockSize; i++) {
1291         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1292         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1293           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1294           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1295           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1296       }
1297     }
1298   }
1299 
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_integral)1300   TEST(ROUNDNE__WASMSIMD_NATIVE, positive_integral) {
1301     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1302     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1303     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1304       for (uint32_t i = 0; i < kBlockSize; i++) {
1305         inputs[i] = uint32_as_float(n + i);
1306       }
1307       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1308       for (uint32_t i = 0; i < kBlockSize; i++) {
1309         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1310         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1311           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1312           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1313           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1314       }
1315     }
1316   }
1317 
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_integral)1318   TEST(ROUNDNE__WASMSIMD_NATIVE, negative_integral) {
1319     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1320     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1321     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1322       for (uint32_t i = 0; i < kBlockSize; i++) {
1323         inputs[i] = uint32_as_float(n + i);
1324       }
1325       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1326       for (uint32_t i = 0; i < kBlockSize; i++) {
1327         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1328         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1329           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1330           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1331           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1332       }
1333     }
1334   }
1335 
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_infinity)1336   TEST(ROUNDNE__WASMSIMD_NATIVE, positive_infinity) {
1337     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1338     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1339     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1340     xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1341     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1342     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1343       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1344       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1345       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1346   }
1347 
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_infinity)1348   TEST(ROUNDNE__WASMSIMD_NATIVE, negative_infinity) {
1349     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1350     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1351     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1352     xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1353     const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1354     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1355       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1356       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1357       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1358   }
1359 
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_qnan)1360   TEST(ROUNDNE__WASMSIMD_NATIVE, positive_qnan) {
1361     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1362     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1363     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1364       for (uint32_t i = 0; i < kBlockSize; i++) {
1365         inputs[i] = uint32_as_float(n + i);
1366       }
1367       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1368       for (uint32_t i = 0; i < kBlockSize; i++) {
1369         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1370         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1371           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1372           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1373           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1374       }
1375     }
1376   }
1377 
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_qnan)1378   TEST(ROUNDNE__WASMSIMD_NATIVE, negative_qnan) {
1379     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1380     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1381     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1382       for (uint32_t i = 0; i < kBlockSize; i++) {
1383         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1384       }
1385       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1386       for (uint32_t i = 0; i < kBlockSize; i++) {
1387         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1388         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1389           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1390           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1391           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1392       }
1393     }
1394   }
1395 
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_snan)1396   TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan) {
1397     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1398     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1399     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1400       for (uint32_t i = 0; i < kBlockSize; i++) {
1401         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1402       }
1403       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1404       for (uint32_t i = 0; i < kBlockSize; i++) {
1405         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1406         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1407           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1408           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1409           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1410       }
1411     }
1412   }
1413 
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_snan)1414   TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan) {
1415     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1416     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1417     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1418       for (uint32_t i = 0; i < kBlockSize; i++) {
1419         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1420       }
1421       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1422       for (uint32_t i = 0; i < kBlockSize; i++) {
1423         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1424         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1425           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1426           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1427           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1428       }
1429     }
1430   }
1431 
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_snan_to_qnan)1432   TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan_to_qnan) {
1433     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1434     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1435     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1436       for (uint32_t i = 0; i < kBlockSize; i++) {
1437         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1438       }
1439       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1440       for (uint32_t i = 0; i < kBlockSize; i++) {
1441         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1442         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1443           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1444           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1445           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1446       }
1447     }
1448   }
1449 
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_snan_to_qnan)1450   TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan_to_qnan) {
1451     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1452     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1453     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1454       for (uint32_t i = 0; i < kBlockSize; i++) {
1455         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1456       }
1457       xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1458       for (uint32_t i = 0; i < kBlockSize; i++) {
1459         const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1460         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1461           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1462           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1463           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1464       }
1465     }
1466   }
1467 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1468 
TEST(ROUNDNE__SCALAR_ADDSUB,positive_normal)1469 TEST(ROUNDNE__SCALAR_ADDSUB, positive_normal) {
1470   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1471   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1472   for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1473     for (uint32_t i = 0; i < kBlockSize; i++) {
1474       inputs[i] = uint32_as_float(n + i);
1475     }
1476     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1477     for (uint32_t i = 0; i < kBlockSize; i++) {
1478       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1479       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1480         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1481         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1482         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1483     }
1484   }
1485 }
1486 
TEST(ROUNDNE__SCALAR_ADDSUB,negative_normal)1487 TEST(ROUNDNE__SCALAR_ADDSUB, negative_normal) {
1488   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1489   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1490   for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1491     for (uint32_t i = 0; i < kBlockSize; i++) {
1492       inputs[i] = uint32_as_float(n + i);
1493     }
1494     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1495     for (uint32_t i = 0; i < kBlockSize; i++) {
1496       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1497       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1498         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1499         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1500         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1501     }
1502   }
1503 }
1504 
TEST(ROUNDNE__SCALAR_ADDSUB,positive_integral)1505 TEST(ROUNDNE__SCALAR_ADDSUB, positive_integral) {
1506   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1507   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1508   for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1509     for (uint32_t i = 0; i < kBlockSize; i++) {
1510       inputs[i] = uint32_as_float(n + i);
1511     }
1512     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1513     for (uint32_t i = 0; i < kBlockSize; i++) {
1514       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1515       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1516         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1517         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1518         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1519     }
1520   }
1521 }
1522 
TEST(ROUNDNE__SCALAR_ADDSUB,negative_integral)1523 TEST(ROUNDNE__SCALAR_ADDSUB, negative_integral) {
1524   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1525   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1526   for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1527     for (uint32_t i = 0; i < kBlockSize; i++) {
1528       inputs[i] = uint32_as_float(n + i);
1529     }
1530     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1531     for (uint32_t i = 0; i < kBlockSize; i++) {
1532       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1533       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1534         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1535         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1536         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1537     }
1538   }
1539 }
1540 
TEST(ROUNDNE__SCALAR_ADDSUB,positive_infinity)1541 TEST(ROUNDNE__SCALAR_ADDSUB, positive_infinity) {
1542   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1543   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1544   std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1545   xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1546   const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1547   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1548     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1549     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1550     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1551 }
1552 
TEST(ROUNDNE__SCALAR_ADDSUB,negative_infinity)1553 TEST(ROUNDNE__SCALAR_ADDSUB, negative_infinity) {
1554   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1555   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1556   std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1557   xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1558   const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1559   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1560     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1561     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1562     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1563 }
1564 
TEST(ROUNDNE__SCALAR_ADDSUB,positive_qnan)1565 TEST(ROUNDNE__SCALAR_ADDSUB, positive_qnan) {
1566   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1567   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1568   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1569     for (uint32_t i = 0; i < kBlockSize; i++) {
1570       inputs[i] = uint32_as_float(n + i);
1571     }
1572     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1573     for (uint32_t i = 0; i < kBlockSize; i++) {
1574       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1575       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1576         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1577         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1578         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1579     }
1580   }
1581 }
1582 
TEST(ROUNDNE__SCALAR_ADDSUB,negative_qnan)1583 TEST(ROUNDNE__SCALAR_ADDSUB, negative_qnan) {
1584   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1585   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1586   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1587     for (uint32_t i = 0; i < kBlockSize; i++) {
1588       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1589     }
1590     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1591     for (uint32_t i = 0; i < kBlockSize; i++) {
1592       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1593       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1594         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1595         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1596         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1597     }
1598   }
1599 }
1600 
TEST(ROUNDNE__SCALAR_ADDSUB,positive_snan)1601 TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan) {
1602   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1603   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1604   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1605     for (uint32_t i = 0; i < kBlockSize; i++) {
1606       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1607     }
1608     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1609     for (uint32_t i = 0; i < kBlockSize; i++) {
1610       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1611       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1612         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1613         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1614         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1615     }
1616   }
1617 }
1618 
TEST(ROUNDNE__SCALAR_ADDSUB,negative_snan)1619 TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan) {
1620   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1621   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1622   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1623     for (uint32_t i = 0; i < kBlockSize; i++) {
1624       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1625     }
1626     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1627     for (uint32_t i = 0; i < kBlockSize; i++) {
1628       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1629       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1630         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1631         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1632         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1633     }
1634   }
1635 }
1636 
TEST(ROUNDNE__SCALAR_ADDSUB,positive_snan_to_qnan)1637 TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan_to_qnan) {
1638   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1639   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1640   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1641     for (uint32_t i = 0; i < kBlockSize; i++) {
1642       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1643     }
1644     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1645     for (uint32_t i = 0; i < kBlockSize; i++) {
1646       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1647       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1648         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1649         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1650         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1651     }
1652   }
1653 }
1654 
TEST(ROUNDNE__SCALAR_ADDSUB,negative_snan_to_qnan)1655 TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan_to_qnan) {
1656   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1657   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1658   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1659     for (uint32_t i = 0; i < kBlockSize; i++) {
1660       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1661     }
1662     xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1663     for (uint32_t i = 0; i < kBlockSize; i++) {
1664       const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1665       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1666         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1667         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1668         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1669     }
1670   }
1671 }
1672